This document covers ADAM CLI's core genomic data processing capabilities, including alignment transformations, feature processing, variant analysis, k-mer counting, and coverage analysis.
Analyzes k-mer frequencies in read sequences for quality control and genomic analysis.
object CountReadKmers extends BDGCommandCompanion {
val commandName = "countKmers"
val commandDescription = "Counts the k-mers/q-mers from a read dataset."
def apply(cmdLine: Array[String]): CountReadKmers
}
class CountReadKmersArgs extends Args4jBase with ParquetArgs {
var inputPath: String
var outputPath: String
var kmerLength: Int
var printHistogram: Boolean
var repartition: Int
}Usage Example:
adam-submit countKmers \
input.adam output_kmers.adam 21 \
--print_histogramAnalyzes k-mer frequencies in assembled contig sequences.
object CountContigKmers extends BDGCommandCompanion {
val commandName = "countContigKmers"
val commandDescription = "Counts the k-mers/q-mers from a read dataset."
def apply(cmdLine: Array[String]): CountContigKmers
}
class CountContigKmersArgs extends Args4jBase with ParquetArgs {
var inputPath: String // ADAM or FASTA file
var outputPath: String // Output location for k-mer counts
var kmerLength: Int // Length of k-mers
var printHistogram: Boolean // Print histogram of counts
}Comprehensive alignment processing with format conversion, quality score recalibration, duplicate marking, and local realignment.
object TransformAlignments extends BDGCommandCompanion {
val commandName = "transformAlignments"
val commandDescription = "Convert SAM/BAM to ADAM format and optionally perform read pre-processing transformations"
def apply(cmdLine: Array[String]): TransformAlignments
}
class TransformAlignmentsArgs extends Args4jBase with ADAMSaveAnyArgs with ParquetArgs {
// Input/Output
var inputPath: String
var outputPath: String
// Filtering and projection
var limitProjection: Boolean
var useAlignedReadPredicate: Boolean
var regionPredicate: String
// Sorting options
var sortReads: Boolean
var sortLexicographically: Boolean
// Quality processing
var markDuplicates: Boolean
var recalibrateBaseQualities: Boolean
var locallyRealign: Boolean
var realignAroundIndels: Boolean
// Trimming and binning
var trim: Boolean
var qualityScoreBin: Int
// Performance tuning
var coalesce: Int
var forceShuffle: Boolean
var storageLevel: String
}Key Processing Options:
Usage Examples:
# Basic format conversion
adam-submit transformAlignments input.bam output.adam
# Full preprocessing pipeline
adam-submit transformAlignments \
--markDuplicates \
--recalibrateBaseQualities \
--locallyRealign \
--sortReads \
input.bam output.adam
# With region filtering
adam-submit transformAlignments \
--regionPredicate "referenceName=chr1 AND start>=1000000 AND end<=2000000" \
input.bam output.adamProcess genomic features from BED, GFF3, GTF, and other annotation formats.
object TransformFeatures extends BDGCommandCompanion {
val commandName = "transformFeatures"
val commandDescription = "Convert a file with sequence features into corresponding ADAM format"
def apply(cmdLine: Array[String]): TransformFeatures
}
class TransformFeaturesArgs extends Args4jBase with ADAMSaveAnyArgs with ParquetArgs {
var inputPath: String
var outputPath: String
var sortFeatures: Boolean
var sortLexicographically: Boolean
var coalesce: Int
var forceShuffle: Boolean
}Usage Example:
adam-submit transformFeatures \
--sortFeatures \
annotations.gtf features.adamProcess variant data from VCF files with sorting and validation options.
object TransformVariants extends BDGCommandCompanion {
val commandName = "transformVariants"
val commandDescription = "Convert a VCF file into corresponding ADAM format"
def apply(cmdLine: Array[String]): TransformVariants
}
class TransformVariantsArgs extends Args4jBase with ADAMSaveAnyArgs with ParquetArgs {
var inputPath: String
var outputPath: String
var coalesce: Int
var forceShuffle: Boolean
var sort: Boolean
var sortLexicographically: Boolean
var stringency: String
}Usage Example:
adam-submit transformVariants \
--sort \
--stringency LENIENT \
variants.vcf variants.adamProcess genotype data with filtering and quality control options.
object TransformGenotypes extends BDGCommandCompanion {
val commandName = "transformGenotypes"
val commandDescription = "Convert a VCF file into corresponding ADAM format"
def apply(cmdLine: Array[String]): TransformGenotypes
}
class TransformGenotypesArgs extends Args4jBase with ADAMSaveAnyArgs with ParquetArgs {
var inputPath: String
var outputPath: String
var coalesce: Int
var forceShuffle: Boolean
var sort: Boolean
var sortLexicographically: Boolean
}Process paired-end read fragments with insert size analysis and quality filtering.
object TransformFragments extends BDGCommandCompanion {
val commandName = "transformFragments"
val commandDescription = "Convert SAM/BAM/CRAM to ADAM fragments"
def apply(cmdLine: Array[String]): TransformFragments
}
class TransformFragmentsArgs extends Args4jBase with ADAMSaveAnyArgs with ParquetArgs {
var inputPath: String
var outputPath: String
var coalesce: Int
var forceShuffle: Boolean
var storageLevel: String
}Generate coverage depth information from aligned reads.
object Reads2Coverage extends BDGCommandCompanion {
val commandName = "reads2coverage"
val commandDescription = "Calculate the coverage from a given ADAM file"
def apply(cmdLine: Array[String]): Reads2Coverage
}
class Reads2CoverageArgs extends Args4jBase with ParquetArgs {
var inputPath: String
var outputPath: String
var collapse: Boolean
var onlyCountUniqueReads: Boolean
var coalesce: Int
var forceShuffle: Boolean
}Usage Example:
adam-submit reads2coverage \
--onlyCountUniqueReads \
--collapse \
alignments.adam coverage.adamCombine multiple data shards into consolidated files for improved query performance.
object MergeShards extends BDGCommandCompanion {
val commandName = "mergeShards"
val commandDescription = "Merge multiple shards of genomic data"
def apply(cmdLine: Array[String]): MergeShards
}
class MergeShardsArgs extends Args4jBase with ParquetArgs {
var inputPath: String
var outputPath: String
var coalesce: Int
var sortOrder: String
}Usage Example:
adam-submit mergeShards \
--sortOrder coordinate \
sharded_data/ merged_output.adam--storageLevel to control Spark caching strategy--coalesce to optimize output file count--forceShuffle when data skew is detected