Type-safe conversion system for transforming between different genomic dataset types using Spark DataFrames. The GenomicDatasetConverters module provides comprehensive conversion capabilities between all genomic data types while preserving type safety and metadata.
Base traits defining the conversion interface for each target genomic data type.
/**
* Convert to NucleotideContigFragmentRDD from any source genomic dataset.
*/
trait ToContigDatasetConversion[T <: Product, U <: GenomicDataset[_, T, U]]
extends GenomicDatasetConversion[T, U, NucleotideContigFragment, NucleotideContigFragmentRDD] {
val xTag: TypeTag[NucleotideContigFragment]
}
/**
* Convert to CoverageRDD from any source genomic dataset.
*/
trait ToCoverageDatasetConversion[T <: Product, U <: GenomicDataset[_, T, U]]
extends GenomicDatasetConversion[T, U, Coverage, CoverageRDD] {
val xTag: TypeTag[Coverage]
}
/**
* Convert to FeatureRDD from any source genomic dataset.
*/
trait ToFeatureDatasetConversion[T <: Product, U <: GenomicDataset[_, T, U]]
extends GenomicDatasetConversion[T, U, Feature, FeatureRDD] {
val xTag: TypeTag[Feature]
}
/**
* Convert to FragmentRDD from any source genomic dataset.
*/
trait ToFragmentDatasetConversion[T <: Product, U <: GenomicDataset[_, T, U]]
extends GenomicDatasetConversion[T, U, Fragment, FragmentRDD] {
val xTag: TypeTag[Fragment]
}
/**
* Convert to AlignmentRecordRDD from any source genomic dataset.
*/
trait ToAlignmentRecordDatasetConversion[T <: Product, U <: GenomicDataset[_, T, U]]
extends GenomicDatasetConversion[T, U, AlignmentRecord, AlignmentRecordRDD] {
val xTag: TypeTag[AlignmentRecord]
}
/**
* Convert to GenotypeRDD from any source genomic dataset.
*/
trait ToGenotypeDatasetConversion[T <: Product, U <: GenomicDataset[_, T, U]]
extends GenomicDatasetConversion[T, U, Genotype, GenotypeRDD] {
val xTag: TypeTag[Genotype]
}
/**
* Convert to VariantRDD from any source genomic dataset.
*/
trait ToVariantDatasetConversion[T <: Product, U <: GenomicDataset[_, T, U]]
extends GenomicDatasetConversion[T, U, Variant, VariantRDD] {
val xTag: TypeTag[Variant]
}Convert other genomic data types to nucleotide contig fragments (reference sequences).
/**
* Convert CoverageRDD to NucleotideContigFragmentRDD via Dataset.
*/
class CoverageToContigsDatasetConverter
extends ToContigDatasetConversion[Coverage, NucleotideContigFragment]
/**
* Convert FeatureRDD to NucleotideContigFragmentRDD via Dataset.
*/
class FeaturesToContigsDatasetConverter
extends ToContigDatasetConversion[Feature, NucleotideContigFragment]
/**
* Convert FragmentRDD to NucleotideContigFragmentRDD via Dataset.
*/
class FragmentsToContigsDatasetConverter
extends ToContigDatasetConversion[Fragment, NucleotideContigFragment]
/**
* Convert AlignmentRecordRDD to NucleotideContigFragmentRDD via Dataset.
*/
class AlignmentRecordsToContigsDatasetConverter
extends ToContigDatasetConversion[AlignmentRecord, NucleotideContigFragment]
/**
* Convert GenotypeRDD to NucleotideContigFragmentRDD via Dataset.
*/
class GenotypesToContigsDatasetConverter
extends ToContigDatasetConversion[Genotype, NucleotideContigFragment]
/**
* Convert VariantRDD to NucleotideContigFragmentRDD via Dataset.
*/
class VariantsToContigsDatasetConverter
extends ToContigDatasetConversion[Variant, NucleotideContigFragment]Convert other genomic data types to coverage data representing sequencing depth or signal intensity.
/**
* Convert NucleotideContigFragmentRDD to CoverageRDD via Dataset.
*/
class ContigsToCoverageDatasetConverter
extends ToCoverageDatasetConversion[NucleotideContigFragment, Coverage]
/**
* Convert FeatureRDD to CoverageRDD via Dataset.
*/
class FeaturesToCoverageDatasetConverter
extends ToCoverageDatasetConversion[Feature, Coverage]
/**
* Convert FragmentRDD to CoverageRDD via Dataset.
*/
class FragmentsToCoverageDatasetConverter
extends ToCoverageDatasetConversion[Fragment, Coverage]
/**
* Convert AlignmentRecordRDD to CoverageRDD via Dataset.
*/
class AlignmentRecordsToCoverageDatasetConverter
extends ToCoverageDatasetConversion[AlignmentRecord, Coverage]
/**
* Convert GenotypeRDD to CoverageRDD via Dataset.
*/
class GenotypesToCoverageDatasetConverter
extends ToCoverageDatasetConversion[Genotype, Coverage]
/**
* Convert VariantRDD to CoverageRDD via Dataset.
*/
class VariantsToCoverageDatasetConverter
extends ToCoverageDatasetConversion[Variant, Coverage]Convert other genomic data types to genomic feature annotations (genes, intervals, etc.).
/**
* Convert NucleotideContigFragmentRDD to FeatureRDD via Dataset.
*/
class ContigsToFeaturesDatasetConverter
extends ToFeatureDatasetConversion[NucleotideContigFragment, Feature]
/**
* Convert CoverageRDD to FeatureRDD via Dataset.
*/
class CoverageToFeaturesDatasetConverter
extends ToFeatureDatasetConversion[Coverage, Feature]
/**
* Convert FragmentRDD to FeatureRDD via Dataset.
*/
class FragmentsToFeaturesDatasetConverter
extends ToFeatureDatasetConversion[Fragment, Feature]
/**
* Convert AlignmentRecordRDD to FeatureRDD via Dataset.
*/
class AlignmentRecordsToFeaturesDatasetConverter
extends ToFeatureDatasetConversion[AlignmentRecord, Feature]
/**
* Convert GenotypeRDD to FeatureRDD via Dataset.
*/
class GenotypesToFeaturesDatasetConverter
extends ToFeatureDatasetConversion[Genotype, Feature]
/**
* Convert VariantRDD to FeatureRDD via Dataset.
*/
class VariantsToFeaturesDatasetConverter
extends ToFeatureDatasetConversion[Variant, Feature]Convert other genomic data types to paired-end sequencing fragments.
/**
* Convert NucleotideContigFragmentRDD to FragmentRDD via Dataset.
*/
class ContigsToFragmentsDatasetConverter
extends ToFragmentDatasetConversion[NucleotideContigFragment, Fragment]
/**
* Convert CoverageRDD to FragmentRDD via Dataset.
*/
class CoverageToFragmentsDatasetConverter
extends ToFragmentDatasetConversion[Coverage, Fragment]
/**
* Convert FeatureRDD to FragmentRDD via Dataset.
*/
class FeaturesToFragmentsDatasetConverter
extends ToFragmentDatasetConversion[Feature, Fragment]
/**
* Convert AlignmentRecordRDD to FragmentRDD via Dataset.
*/
class AlignmentRecordsToFragmentsDatasetConverter
extends ToFragmentDatasetConversion[AlignmentRecord, Fragment]
/**
* Convert GenotypeRDD to FragmentRDD via Dataset.
*/
class GenotypesToFragmentsDatasetConverter
extends ToFragmentDatasetConversion[Genotype, Fragment]
/**
* Convert VariantRDD to FragmentRDD via Dataset.
*/
class VariantsToFragmentsDatasetConverter
extends ToFragmentDatasetConversion[Variant, Fragment]Convert other genomic data types to sequence alignment records.
/**
* Convert NucleotideContigFragmentRDD to AlignmentRecordRDD via Dataset.
*/
class ContigsToAlignmentRecordsDatasetConverter
extends ToAlignmentRecordDatasetConversion[NucleotideContigFragment, AlignmentRecord]
/**
* Convert CoverageRDD to AlignmentRecordRDD via Dataset.
*/
class CoverageToAlignmentRecordsDatasetConverter
extends ToAlignmentRecordDatasetConversion[Coverage, AlignmentRecord]
/**
* Convert FeatureRDD to AlignmentRecordRDD via Dataset.
*/
class FeaturesToAlignmentRecordsDatasetConverter
extends ToAlignmentRecordDatasetConversion[Feature, AlignmentRecord]
/**
* Convert FragmentRDD to AlignmentRecordRDD via Dataset.
*/
class FragmentsToAlignmentRecordsDatasetConverter
extends ToAlignmentRecordDatasetConversion[Fragment, AlignmentRecord]
/**
* Convert GenotypeRDD to AlignmentRecordRDD via Dataset.
*/
class GenotypesToAlignmentRecordsDatasetConverter
extends ToAlignmentRecordDatasetConversion[Genotype, AlignmentRecord]
/**
* Convert VariantRDD to AlignmentRecordRDD via Dataset.
*/
class VariantsToAlignmentRecordsDatasetConverter
extends ToAlignmentRecordDatasetConversion[Variant, AlignmentRecord]Convert other genomic data types to genotype information from variant calling.
/**
* Convert NucleotideContigFragmentRDD to GenotypeRDD via Dataset.
*/
class ContigsToGenotypesDatasetConverter
extends ToGenotypeDatasetConversion[NucleotideContigFragment, Genotype]
/**
* Convert CoverageRDD to GenotypeRDD via Dataset.
*/
class CoverageToGenotypesDatasetConverter
extends ToGenotypeDatasetConversion[Coverage, Genotype]
/**
* Convert FeatureRDD to GenotypeRDD via Dataset.
*/
class FeaturesToGenotypesDatasetConverter
extends ToGenotypeDatasetConversion[Feature, Genotype]
/**
* Convert FragmentRDD to GenotypeRDD via Dataset.
*/
class FragmentsToGenotypesDatasetConverter
extends ToGenotypeDatasetConversion[Fragment, Genotype]
/**
* Convert AlignmentRecordRDD to GenotypeRDD via Dataset.
*/
class AlignmentRecordsToGenotypesDatasetConverter
extends ToGenotypeDatasetConversion[AlignmentRecord, Genotype]
/**
* Convert VariantRDD to GenotypeRDD via Dataset.
*/
class VariantsToGenotypesDatasetConverter
extends ToGenotypeDatasetConversion[Variant, Genotype]Convert other genomic data types to genetic variant information.
/**
* Convert NucleotideContigFragmentRDD to VariantRDD via Dataset.
*/
class ContigsToVariantsDatasetConverter
extends ToVariantDatasetConversion[NucleotideContigFragment, Variant]
/**
* Convert CoverageRDD to VariantRDD via Dataset.
*/
class CoverageToVariantsDatasetConverter
extends ToVariantDatasetConversion[Coverage, Variant]
/**
* Convert FeatureRDD to VariantRDD via Dataset.
*/
class FeaturesToVariantsDatasetConverter
extends ToVariantDatasetConversion[Feature, Variant]
/**
* Convert FragmentRDD to VariantRDD via Dataset.
*/
class FragmentsToVariantsDatasetConverter
extends ToVariantDatasetConversion[Fragment, Variant]
/**
* Convert AlignmentRecordRDD to VariantRDD via Dataset.
*/
class AlignmentRecordsToVariantsDatasetConverter
extends ToVariantDatasetConversion[AlignmentRecord, Variant]
/**
* Convert GenotypeRDD to VariantRDD via Dataset.
*/
class GenotypesToVariantsDatasetConverter
extends ToVariantDatasetConversion[Genotype, Variant]import org.bdgenomics.adam.api.java.GenomicDatasetConverters._
import org.apache.spark.sql.Dataset
// Convert alignment records to features using Dataset
val alignments: AlignmentRecordRDD = jac.loadAlignments("input.bam")
val alignmentDataset: Dataset[AlignmentRecord] = alignments.toDF()
val emptyFeatureDataset: Dataset[Feature] = spark.emptyDataset[Feature]
val converter = new AlignmentRecordsToFeaturesDatasetConverter()
val features: FeatureRDD = converter.call(alignments, emptyFeatureDataset)
// Convert variants to coverage using Dataset
val variants: VariantRDD = jac.loadVariants("variants.vcf")
val variantDataset: Dataset[Variant] = variants.toDF()
val emptyCoverageDataset: Dataset[Coverage] = spark.emptyDataset[Coverage]
val coverageConverter = new VariantsToCoverageDatasetConverter()
val coverage: CoverageRDD = coverageConverter.call(variants, emptyCoverageDataset)All dataset converters maintain: