Java/Python API wrappers for ADAM genomics analysis library enabling scalable genomic data processing with Apache Spark
—
The JavaADAMContext provides comprehensive genomic data loading capabilities, automatically detecting file formats and providing validation controls for various genomic file types.
import org.bdgenomics.adam.api.java.JavaADAMContext;
import org.bdgenomics.adam.rdd.ADAMContext;
import htsjdk.samtools.ValidationStringency;Main context class providing Java-friendly methods for genomic data operations.
/**
* Java-friendly wrapper for ADAM Context providing genomic data loading capabilities
*/
class JavaADAMContext {
/**
* Creates a JavaADAMContext wrapping the provided ADAMContext
* @param ac The ADAMContext to wrap
*/
JavaADAMContext(ADAMContext ac);
/**
* Returns the Java Spark Context associated with this context
* @return JavaSparkContext for Spark operations
*/
JavaSparkContext getSparkContext();
}Load sequencing alignment data from various formats with automatic format detection.
/**
* Load alignment records with automatic format detection
* Supports: .bam/.cram/.sam (BAM/CRAM/SAM), .fa/.fasta (FASTA),
* .fq/.fastq (FASTQ), .ifq (interleaved FASTQ)
* Falls back to Parquet + Avro for unrecognized extensions
* @param pathName Path to alignment file(s). Supports globs and directories
* @return AlignmentRecordRDD containing reads, sequence dictionary, and record groups
*/
AlignmentRecordRDD loadAlignments(String pathName);
/**
* Load alignment records with validation stringency control
* @param pathName Path to alignment file(s)
* @param stringency Validation strictness (LENIENT, SILENT, STRICT)
* @return AlignmentRecordRDD containing reads, sequence dictionary, and record groups
*/
AlignmentRecordRDD loadAlignments(String pathName, ValidationStringency stringency);Load reference genome sequences and create broadcastable reference files.
/**
* Load nucleotide contig fragments from reference sequences
* Supports: .fa/.fasta (FASTA format)
* Falls back to Parquet + Avro for other extensions
* @param pathName Path to reference file(s). Supports globs and directories for FASTA
* @return NucleotideContigFragmentRDD containing reference sequences
*/
NucleotideContigFragmentRDD loadContigFragments(String pathName);
/**
* Load reference sequences into broadcastable format
* Supports: .2bit files directly, other formats via loadContigFragments
* @param pathName Path to reference file (no globs/directories for 2bit)
* @return ReferenceFile for broadcast operations
*/
ReferenceFile loadReferenceFile(String pathName);
/**
* Load reference sequences with custom maximum fragment length
* @param pathName Path to reference file
* @param maximumLength Maximum fragment length (default 10000L, avoid >1e9)
* @return ReferenceFile for broadcast operations
*/
ReferenceFile loadReferenceFile(String pathName, Long maximumLength);Load paired-end sequencing fragments from alignment files.
/**
* Load fragments from alignment data
* Supports: .bam/.cram/.sam (BAM/CRAM/SAM), .ifq (interleaved FASTQ)
* Falls back to Parquet + Avro for other extensions
* @param pathName Path to fragment file(s). Supports globs and directories
* @return FragmentRDD containing paired-end fragments
*/
FragmentRDD loadFragments(String pathName);
/**
* Load fragments with validation stringency control
* @param pathName Path to fragment file(s)
* @param stringency Validation strictness for BAM/CRAM/SAM and FASTQ formats
* @return FragmentRDD containing paired-end fragments
*/
FragmentRDD loadFragments(String pathName, ValidationStringency stringency);Load genomic annotations and feature data from various annotation formats.
/**
* Load genomic features from annotation files
* Supports: .bed (BED6/12), .gff3 (GFF3), .gtf/.gff (GTF/GFF2),
* .narrow[pP]eak (NarrowPeak), .interval_list (IntervalList)
* Falls back to Parquet + Avro for other extensions
* @param pathName Path to feature file(s). Supports globs and directories
* @return FeatureRDD containing genomic annotations
*/
FeatureRDD loadFeatures(String pathName);
/**
* Load features with validation stringency control
* @param pathName Path to feature file(s)
* @param stringency Validation strictness for supported text formats
* @return FeatureRDD containing genomic annotations
*/
FeatureRDD loadFeatures(String pathName, ValidationStringency stringency);Load genomic coverage data, converting features to coverage information.
/**
* Load features and convert to coverage data
* Coverage values are stored in the score field of Feature records
* Supports same formats as loadFeatures
* @param pathName Path to coverage file(s). Supports globs and directories
* @return CoverageRDD containing coverage depth information
*/
CoverageRDD loadCoverage(String pathName);
/**
* Load coverage data with validation stringency control
* @param pathName Path to coverage file(s)
* @param stringency Validation strictness for supported text formats
* @return CoverageRDD containing coverage depth information
*/
CoverageRDD loadCoverage(String pathName, ValidationStringency stringency);Load genetic variation data from VCF files or Parquet format.
/**
* Load genotype calls from variant files
* Supports: .vcf/.vcf.gz/.vcf.bgzf/.vcf.bgz (VCF format)
* Falls back to Parquet + Avro for other extensions
* @param pathName Path to genotype file(s). Supports globs and directories for VCF
* @return GenotypeRDD containing sample genotype calls
*/
GenotypeRDD loadGenotypes(String pathName);
/**
* Load genotypes with validation stringency control
* @param pathName Path to genotype file(s)
* @param stringency Validation strictness for VCF format
* @return GenotypeRDD containing sample genotype calls
*/
GenotypeRDD loadGenotypes(String pathName, ValidationStringency stringency);
/**
* Load variant records from variant files
* Supports: .vcf/.vcf.gz/.vcf.bgzf/.vcf.bgz (VCF format)
* Falls back to Parquet + Avro for other extensions
* @param pathName Path to variant file(s). Supports globs and directories for VCF
* @return VariantRDD containing genetic variations
*/
VariantRDD loadVariants(String pathName);
/**
* Load variants with validation stringency control
* @param pathName Path to variant file(s)
* @param stringency Validation strictness for VCF format
* @return VariantRDD containing genetic variations
*/
VariantRDD loadVariants(String pathName, ValidationStringency stringency);Basic alignment loading:
JavaADAMContext jac = new JavaADAMContext(adamContext);
// Load BAM file with default settings
AlignmentRecordRDD alignments = jac.loadAlignments("sample.bam");
// Load with strict validation
AlignmentRecordRDD strictAlignments = jac.loadAlignments("sample.bam",
ValidationStringency.STRICT);
// Load compressed FASTQ
AlignmentRecordRDD fastqReads = jac.loadAlignments("reads.fastq.gz");Reference and feature loading:
// Load reference genome
NucleotideContigFragmentRDD reference = jac.loadContigFragments("hg38.fa");
// Create broadcastable reference
ReferenceFile refFile = jac.loadReferenceFile("hg38.2bit");
// Load genomic annotations
FeatureRDD genes = jac.loadFeatures("gencode.gtf");
CoverageRDD coverage = jac.loadCoverage("sample.bed");Variant data loading:
// Load VCF file
VariantRDD variants = jac.loadVariants("variants.vcf.gz");
GenotypeRDD genotypes = jac.loadGenotypes("variants.vcf.gz");
// With lenient validation for problematic files
VariantRDD lenientVariants = jac.loadVariants("problematic.vcf",
ValidationStringency.LENIENT);All text-based formats support Hadoop compression codecs:
.gz (gzip).bz2 (bzip2)*.bam, sample*.vcf.gz/**
* Validation stringency enumeration for controlling file format validation
*/
enum ValidationStringency {
STRICT, // Fail on any format violations
LENIENT, // Warn on format issues but continue processing
SILENT // Ignore format violations silently
}Loading methods may throw exceptions for:
Install with Tessl CLI
npx tessl i tessl/maven-org-bdgenomics-adam--adam-apis-2-10