or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

dataset-conversions.mdindex.mdjava-api.mdpython-integration.mdrdd-conversions.md
tile.json

java-api.mddocs/

Java API

The JavaADAMContext class provides the primary Java interface for loading and working with genomic data files in ADAM. It wraps the core ADAMContext functionality with Java-friendly method signatures and handles automatic format detection based on file extensions.

Capabilities

JavaADAMContext Class

Main entry point for Java applications to access ADAM's genomic data loading capabilities.

/**
 * Java-friendly wrapper for ADAMContext providing genomic data loading functions.
 */
class JavaADAMContext {
    JavaADAMContext(ADAMContext ac);
    
    /**
     * Returns the Java Spark Context associated with this Java ADAM Context.
     */
    JavaSparkContext getSparkContext();
}

Usage Example:

import org.apache.spark.api.java.JavaSparkContext;
import org.bdgenomics.adam.api.java.JavaADAMContext;
import org.bdgenomics.adam.rdd.ADAMContext;

// Create from existing Spark context
JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
ADAMContext ac = new ADAMContext(jsc.sc());
JavaADAMContext jac = new JavaADAMContext(ac);

// Or using companion object methods
JavaADAMContext jac2 = JavaADAMContext.fromADAMContext(ac);
ADAMContext ac2 = JavaADAMContext.toADAMContext(jac);

Alignment Data Loading

Load sequence alignment data from BAM, SAM, CRAM, FASTA, or FASTQ files.

/**
 * Load alignment records from various formats.
 * Supports: .bam/.cram/.sam (BAM/CRAM/SAM), .fa/.fasta (FASTA), 
 * .fq/.fastq (FASTQ), .ifq (interleaved FASTQ)
 * Falls back to Parquet + Avro if no extension matches.
 * Compressed files (.gz, .bz2) supported via Hadoop codecs.
 */
AlignmentRecordRDD loadAlignments(String pathName);

/**
 * Load alignment records with validation stringency for format validation.
 */
AlignmentRecordRDD loadAlignments(String pathName, ValidationStringency stringency);

Usage Examples:

import htsjdk.samtools.ValidationStringency;
import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD;

// Load BAM file with default validation
AlignmentRecordRDD alignments = jac.loadAlignments("sample.bam");

// Load with strict validation
AlignmentRecordRDD strictAlignments = jac.loadAlignments(
    "sample.bam", 
    ValidationStringency.STRICT
);

// Load compressed FASTQ
AlignmentRecordRDD fastqData = jac.loadAlignments("reads.fastq.gz");

// Load from multiple files using glob pattern
AlignmentRecordRDD multipleFiles = jac.loadAlignments("data/*.bam");

Fragment Data Loading

Load paired-end sequencing fragment data from BAM, SAM, CRAM, or interleaved FASTQ files.

/**
 * Load fragments from BAM/SAM/CRAM or interleaved FASTQ formats.
 */
FragmentRDD loadFragments(String pathName);

/**
 * Load fragments with validation stringency.
 */
FragmentRDD loadFragments(String pathName, ValidationStringency stringency);

Usage Examples:

import org.bdgenomics.adam.rdd.fragment.FragmentRDD;

// Load paired-end data as fragments
FragmentRDD fragments = jac.loadFragments("paired_reads.bam");

// Load interleaved FASTQ as fragments
FragmentRDD ifqFragments = jac.loadFragments("interleaved.ifq");

Variant Data Loading

Load genetic variant and genotype data from VCF files.

/**
 * Load variants from VCF format (.vcf, .vcf.gz, .vcf.bgzf, .vcf.bgz).
 */
VariantRDD loadVariants(String pathName);

/**
 * Load variants with validation stringency.
 */
VariantRDD loadVariants(String pathName, ValidationStringency stringency);

/**
 * Load genotypes from VCF format.
 */
GenotypeRDD loadGenotypes(String pathName);

/**
 * Load genotypes with validation stringency.
 */
GenotypeRDD loadGenotypes(String pathName, ValidationStringency stringency);

Usage Examples:

import org.bdgenomics.adam.rdd.variant.VariantRDD;
import org.bdgenomics.adam.rdd.variant.GenotypeRDD;

// Load variant calls
VariantRDD variants = jac.loadVariants("variants.vcf.gz");

// Load genotype information
GenotypeRDD genotypes = jac.loadGenotypes("genotypes.vcf");

// Load with lenient validation for problematic files
VariantRDD lenientVariants = jac.loadVariants(
    "noisy_variants.vcf", 
    ValidationStringency.LENIENT
);

Feature Data Loading

Load genomic feature annotations from BED, GFF, GTF, NarrowPeak, or IntervalList files.

/**
 * Load features from BED/GFF/GTF/NarrowPeak/IntervalList formats.
 */
FeatureRDD loadFeatures(String pathName);

/**
 * Load features with validation stringency.
 */
FeatureRDD loadFeatures(String pathName, ValidationStringency stringency);

/**
 * Load coverage data derived from features.
 */
CoverageRDD loadCoverage(String pathName);

/**
 * Load coverage data with validation stringency.
 */
CoverageRDD loadCoverage(String pathName, ValidationStringency stringency);

Usage Examples:

import org.bdgenomics.adam.rdd.feature.FeatureRDD;
import org.bdgenomics.adam.rdd.feature.CoverageRDD;

// Load gene annotations from GFF3
FeatureRDD genes = jac.loadFeatures("genes.gff3");

// Load genomic intervals from BED file
FeatureRDD intervals = jac.loadFeatures("regions.bed");

// Load coverage data
CoverageRDD coverage = jac.loadCoverage("coverage.bed");

// Load peak calls from ChIP-seq
FeatureRDD peaks = jac.loadFeatures("peaks.narrowPeak");

Reference Sequence Loading

Load reference genome sequences from FASTA or 2bit format files.

/**
 * Load reference sequences from 2bit or FASTA formats.
 */
ReferenceFile loadReferenceFile(String pathName);

/**
 * Load reference sequences with maximum fragment length limit.
 */
ReferenceFile loadReferenceFile(String pathName, Long maximumLength);

/**
 * Load nucleotide contig fragments from FASTA format.
 */
NucleotideContigFragmentRDD loadContigFragments(String pathName);

Usage Examples:

import org.bdgenomics.adam.util.ReferenceFile;
import org.bdgenomics.adam.rdd.contig.NucleotideContigFragmentRDD;

// Load reference genome
ReferenceFile reference = jac.loadReferenceFile("reference.fa");

// Load with fragment size limit (e.g., for memory management)
ReferenceFile limitedRef = jac.loadReferenceFile("reference.2bit", 1000000L);

// Load reference as contig fragments for distributed processing
NucleotideContigFragmentRDD contigs = jac.loadContigFragments("reference.fasta");

Type Conversions

Scala provides implicit conversions between JavaADAMContext and ADAMContext instances. From Java code, use the constructor directly:

/**
 * Constructor creates JavaADAMContext from ADAMContext.
 * The ac parameter becomes accessible as a public field.
 */
JavaADAMContext(ADAMContext ac);

/**
 * Access the underlying ADAMContext via the public ac field.
 */
public final ADAMContext ac;

Validation Stringency

All loading methods support htsjdk.samtools.ValidationStringency for controlling format validation:

// ValidationStringency options from htsjdk
enum ValidationStringency {
    STRICT,    // Strict validation - fail on any format violations
    LENIENT,   // Lenient validation - warn on format violations but continue
    SILENT     // Silent validation - ignore format violations
}

Error Handling

  • FileNotFoundException: Thrown when specified file paths don't exist
  • IllegalArgumentException: Thrown for invalid parameters or unsupported file formats
  • SparkException: Thrown for Spark-related errors during data loading
  • ValidationException: Thrown when validation stringency requirements aren't met