or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

dataset-conversions.mdindex.mdjava-api.mdpython-integration.mdrdd-conversions.md

java-api.mddocs/

0

# Java API

1

2

The JavaADAMContext class provides the primary Java interface for loading and working with genomic data files in ADAM. It wraps the core ADAMContext functionality with Java-friendly method signatures and handles automatic format detection based on file extensions.

3

4

## Capabilities

5

6

### JavaADAMContext Class

7

8

Main entry point for Java applications to access ADAM's genomic data loading capabilities.

9

10

```java { .api }

11

/**

12

* Java-friendly wrapper for ADAMContext providing genomic data loading functions.

13

*/

14

class JavaADAMContext {

15

JavaADAMContext(ADAMContext ac);

16

17

/**

18

* Returns the Java Spark Context associated with this Java ADAM Context.

19

*/

20

JavaSparkContext getSparkContext();

21

}

22

```

23

24

**Usage Example:**

25

26

```java

27

import org.apache.spark.api.java.JavaSparkContext;

28

import org.bdgenomics.adam.api.java.JavaADAMContext;

29

import org.bdgenomics.adam.rdd.ADAMContext;

30

31

// Create from existing Spark context

32

JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());

33

ADAMContext ac = new ADAMContext(jsc.sc());

34

JavaADAMContext jac = new JavaADAMContext(ac);

35

36

// Or using companion object methods

37

JavaADAMContext jac2 = JavaADAMContext.fromADAMContext(ac);

38

ADAMContext ac2 = JavaADAMContext.toADAMContext(jac);

39

```

40

41

### Alignment Data Loading

42

43

Load sequence alignment data from BAM, SAM, CRAM, FASTA, or FASTQ files.

44

45

```java { .api }

46

/**

47

* Load alignment records from various formats.

48

* Supports: .bam/.cram/.sam (BAM/CRAM/SAM), .fa/.fasta (FASTA),

49

* .fq/.fastq (FASTQ), .ifq (interleaved FASTQ)

50

* Falls back to Parquet + Avro if no extension matches.

51

* Compressed files (.gz, .bz2) supported via Hadoop codecs.

52

*/

53

AlignmentRecordRDD loadAlignments(String pathName);

54

55

/**

56

* Load alignment records with validation stringency for format validation.

57

*/

58

AlignmentRecordRDD loadAlignments(String pathName, ValidationStringency stringency);

59

```

60

61

**Usage Examples:**

62

63

```java

64

import htsjdk.samtools.ValidationStringency;

65

import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD;

66

67

// Load BAM file with default validation

68

AlignmentRecordRDD alignments = jac.loadAlignments("sample.bam");

69

70

// Load with strict validation

71

AlignmentRecordRDD strictAlignments = jac.loadAlignments(

72

"sample.bam",

73

ValidationStringency.STRICT

74

);

75

76

// Load compressed FASTQ

77

AlignmentRecordRDD fastqData = jac.loadAlignments("reads.fastq.gz");

78

79

// Load from multiple files using glob pattern

80

AlignmentRecordRDD multipleFiles = jac.loadAlignments("data/*.bam");

81

```

82

83

### Fragment Data Loading

84

85

Load paired-end sequencing fragment data from BAM, SAM, CRAM, or interleaved FASTQ files.

86

87

```java { .api }

88

/**

89

* Load fragments from BAM/SAM/CRAM or interleaved FASTQ formats.

90

*/

91

FragmentRDD loadFragments(String pathName);

92

93

/**

94

* Load fragments with validation stringency.

95

*/

96

FragmentRDD loadFragments(String pathName, ValidationStringency stringency);

97

```

98

99

**Usage Examples:**

100

101

```java

102

import org.bdgenomics.adam.rdd.fragment.FragmentRDD;

103

104

// Load paired-end data as fragments

105

FragmentRDD fragments = jac.loadFragments("paired_reads.bam");

106

107

// Load interleaved FASTQ as fragments

108

FragmentRDD ifqFragments = jac.loadFragments("interleaved.ifq");

109

```

110

111

### Variant Data Loading

112

113

Load genetic variant and genotype data from VCF files.

114

115

```java { .api }

116

/**

117

* Load variants from VCF format (.vcf, .vcf.gz, .vcf.bgzf, .vcf.bgz).

118

*/

119

VariantRDD loadVariants(String pathName);

120

121

/**

122

* Load variants with validation stringency.

123

*/

124

VariantRDD loadVariants(String pathName, ValidationStringency stringency);

125

126

/**

127

* Load genotypes from VCF format.

128

*/

129

GenotypeRDD loadGenotypes(String pathName);

130

131

/**

132

* Load genotypes with validation stringency.

133

*/

134

GenotypeRDD loadGenotypes(String pathName, ValidationStringency stringency);

135

```

136

137

**Usage Examples:**

138

139

```java

140

import org.bdgenomics.adam.rdd.variant.VariantRDD;

141

import org.bdgenomics.adam.rdd.variant.GenotypeRDD;

142

143

// Load variant calls

144

VariantRDD variants = jac.loadVariants("variants.vcf.gz");

145

146

// Load genotype information

147

GenotypeRDD genotypes = jac.loadGenotypes("genotypes.vcf");

148

149

// Load with lenient validation for problematic files

150

VariantRDD lenientVariants = jac.loadVariants(

151

"noisy_variants.vcf",

152

ValidationStringency.LENIENT

153

);

154

```

155

156

### Feature Data Loading

157

158

Load genomic feature annotations from BED, GFF, GTF, NarrowPeak, or IntervalList files.

159

160

```java { .api }

161

/**

162

* Load features from BED/GFF/GTF/NarrowPeak/IntervalList formats.

163

*/

164

FeatureRDD loadFeatures(String pathName);

165

166

/**

167

* Load features with validation stringency.

168

*/

169

FeatureRDD loadFeatures(String pathName, ValidationStringency stringency);

170

171

/**

172

* Load coverage data derived from features.

173

*/

174

CoverageRDD loadCoverage(String pathName);

175

176

/**

177

* Load coverage data with validation stringency.

178

*/

179

CoverageRDD loadCoverage(String pathName, ValidationStringency stringency);

180

```

181

182

**Usage Examples:**

183

184

```java

185

import org.bdgenomics.adam.rdd.feature.FeatureRDD;

186

import org.bdgenomics.adam.rdd.feature.CoverageRDD;

187

188

// Load gene annotations from GFF3

189

FeatureRDD genes = jac.loadFeatures("genes.gff3");

190

191

// Load genomic intervals from BED file

192

FeatureRDD intervals = jac.loadFeatures("regions.bed");

193

194

// Load coverage data

195

CoverageRDD coverage = jac.loadCoverage("coverage.bed");

196

197

// Load peak calls from ChIP-seq

198

FeatureRDD peaks = jac.loadFeatures("peaks.narrowPeak");

199

```

200

201

### Reference Sequence Loading

202

203

Load reference genome sequences from FASTA or 2bit format files.

204

205

```java { .api }

206

/**

207

* Load reference sequences from 2bit or FASTA formats.

208

*/

209

ReferenceFile loadReferenceFile(String pathName);

210

211

/**

212

* Load reference sequences with maximum fragment length limit.

213

*/

214

ReferenceFile loadReferenceFile(String pathName, Long maximumLength);

215

216

/**

217

* Load nucleotide contig fragments from FASTA format.

218

*/

219

NucleotideContigFragmentRDD loadContigFragments(String pathName);

220

```

221

222

**Usage Examples:**

223

224

```java

225

import org.bdgenomics.adam.util.ReferenceFile;

226

import org.bdgenomics.adam.rdd.contig.NucleotideContigFragmentRDD;

227

228

// Load reference genome

229

ReferenceFile reference = jac.loadReferenceFile("reference.fa");

230

231

// Load with fragment size limit (e.g., for memory management)

232

ReferenceFile limitedRef = jac.loadReferenceFile("reference.2bit", 1000000L);

233

234

// Load reference as contig fragments for distributed processing

235

NucleotideContigFragmentRDD contigs = jac.loadContigFragments("reference.fasta");

236

```

237

238

### Type Conversions

239

240

Scala provides implicit conversions between JavaADAMContext and ADAMContext instances. From Java code, use the constructor directly:

241

242

```java { .api }

243

/**

244

* Constructor creates JavaADAMContext from ADAMContext.

245

* The ac parameter becomes accessible as a public field.

246

*/

247

JavaADAMContext(ADAMContext ac);

248

249

/**

250

* Access the underlying ADAMContext via the public ac field.

251

*/

252

public final ADAMContext ac;

253

```

254

255

## Validation Stringency

256

257

All loading methods support htsjdk.samtools.ValidationStringency for controlling format validation:

258

259

```java { .api }

260

// ValidationStringency options from htsjdk

261

enum ValidationStringency {

262

STRICT, // Strict validation - fail on any format violations

263

LENIENT, // Lenient validation - warn on format violations but continue

264

SILENT // Silent validation - ignore format violations

265

}

266

```

267

268

## Error Handling

269

270

- **FileNotFoundException**: Thrown when specified file paths don't exist

271

- **IllegalArgumentException**: Thrown for invalid parameters or unsupported file formats

272

- **SparkException**: Thrown for Spark-related errors during data loading

273

- **ValidationException**: Thrown when validation stringency requirements aren't met