or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

code-generation.mdencoding-decoding.mdfile-operations.mdgeneric-data.mdindex.mdmessage-operations.mdreflection-operations.mdschema-evolution.mdschema-system.md

file-operations.mddocs/

0

# File Operations

1

2

Avro file operations provide comprehensive file-based data storage and retrieval with embedded schemas, metadata, and compression support. Avro data files are self-describing, splittable, and compressible, making them ideal for big data processing and long-term storage.

3

4

## Capabilities

5

6

### File Reading

7

8

Read Avro data files with random access and streaming capabilities.

9

10

```java { .api }

11

public class DataFileReader<D> extends DataFileStream<D> implements FileReader<D> {

12

public DataFileReader(File file, DatumReader<D> reader) throws IOException;

13

public DataFileReader(SeekableInput sin, DatumReader<D> reader) throws IOException;

14

15

// Random access operations

16

public void seek(long position) throws IOException;

17

public void sync(long position) throws IOException;

18

public long tell() throws IOException;

19

public long previousSync() throws IOException;

20

21

// Metadata access

22

public String getMetaString(String key);

23

public byte[] getMeta(String key);

24

public long getBlockCount();

25

public long getBlockSize();

26

}

27

28

public interface FileReader<D> extends Iterator<D>, Iterable<D>, Closeable {

29

public Schema getSchema();

30

public void sync(long position) throws IOException;

31

public boolean pastSync(long position) throws IOException;

32

public long tell() throws IOException;

33

}

34

```

35

36

**Usage Examples:**

37

38

```java

39

// Read from file with generic records

40

File avroFile = new File("users.avro");

41

DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();

42

DataFileReader<GenericRecord> fileReader = new DataFileReader<>(avroFile, datumReader);

43

44

// Read all records

45

while (fileReader.hasNext()) {

46

GenericRecord user = fileReader.next();

47

System.out.println("Name: " + user.get("name"));

48

System.out.println("Age: " + user.get("age"));

49

}

50

fileReader.close();

51

52

// Random access reading

53

DataFileReader<GenericRecord> randomReader = new DataFileReader<>(avroFile, datumReader);

54

Schema schema = randomReader.getSchema();

55

56

// Seek to specific position

57

long position = 1024;

58

randomReader.seek(position);

59

if (randomReader.hasNext()) {

60

GenericRecord record = randomReader.next();

61

processRecord(record);

62

}

63

64

// Access file metadata

65

String codec = randomReader.getMetaString("avro.codec");

66

byte[] schemaBytes = randomReader.getMeta("avro.schema");

67

System.out.println("Compression codec: " + codec);

68

randomReader.close();

69

```

70

71

### File Writing

72

73

Write Avro data files with compression and metadata support.

74

75

```java { .api }

76

public class DataFileWriter<D> implements Closeable, Flushable {

77

public DataFileWriter(DatumWriter<D> dout);

78

79

// File creation

80

public DataFileWriter<D> create(Schema schema, OutputStream outs) throws IOException;

81

public DataFileWriter<D> create(Schema schema, File file) throws IOException;

82

public DataFileWriter<D> create(Schema schema, OutputStream outs, Codec codec) throws IOException;

83

84

// Metadata operations

85

public DataFileWriter<D> setMeta(String key, byte[] value);

86

public DataFileWriter<D> setMeta(String key, String value);

87

public DataFileWriter<D> setSyncInterval(int syncInterval);

88

89

// Writing operations

90

public void append(D datum) throws IOException;

91

public void appendTo(DataFileWriter<D> writer) throws IOException;

92

93

// Synchronization and flushing

94

public long sync() throws IOException;

95

public void flush() throws IOException;

96

public void flusher() throws IOException;

97

98

// Closing

99

public void close() throws IOException;

100

}

101

```

102

103

**Usage Examples:**

104

105

```java

106

// Write generic records to file

107

Schema schema = new Schema.Parser().parse(schemaJson);

108

File outputFile = new File("output.avro");

109

110

DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);

111

DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<>(datumWriter);

112

113

// Create file with compression

114

fileWriter.setCodec(CodecFactory.deflateCodec(6));

115

fileWriter.setMeta("created.by", "MyApplication");

116

fileWriter.create(schema, outputFile);

117

118

// Write multiple records

119

List<GenericRecord> users = createUsers();

120

for (GenericRecord user : users) {

121

fileWriter.append(user);

122

}

123

124

// Force sync periodically for large files

125

fileWriter.sync();

126

127

fileWriter.close();

128

129

// Write to output stream

130

OutputStream outputStream = new BufferedOutputStream(new FileOutputStream("stream.avro"));

131

DataFileWriter<GenericRecord> streamWriter = new DataFileWriter<>(datumWriter);

132

streamWriter.create(schema, outputStream);

133

134

for (GenericRecord user : users) {

135

streamWriter.append(user);

136

}

137

streamWriter.close();

138

```

139

140

### Stream Reading

141

142

Read Avro data from input streams without random access.

143

144

```java { .api }

145

public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {

146

public DataFileStream(InputStream in, DatumReader<D> reader) throws IOException;

147

148

// Schema and metadata access

149

public Schema getSchema();

150

public String getMetaString(String key);

151

public byte[] getMeta(String key);

152

public List<String> getMetaKeys();

153

154

// Iterator operations

155

public boolean hasNext();

156

public D next();

157

public D next(D reuse);

158

159

// Stream operations

160

public void close() throws IOException;

161

}

162

```

163

164

**Usage Examples:**

165

166

```java

167

// Read from input stream

168

InputStream inputStream = new FileInputStream("data.avro");

169

DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();

170

DataFileStream<GenericRecord> stream = new DataFileStream<>(inputStream, datumReader);

171

172

// Get schema from stream

173

Schema schema = stream.getSchema();

174

System.out.println("Schema: " + schema.toString(true));

175

176

// Read all records

177

GenericRecord reusedRecord = null;

178

while (stream.hasNext()) {

179

reusedRecord = stream.next(reusedRecord); // Reuse object for performance

180

processRecord(reusedRecord);

181

}

182

stream.close();

183

184

// Read from compressed stream

185

InputStream gzipStream = new GZIPInputStream(new FileInputStream("compressed.avro.gz"));

186

DataFileStream<GenericRecord> compressedStream = new DataFileStream<>(gzipStream, datumReader);

187

188

// Process compressed data

189

for (GenericRecord record : compressedStream) {

190

System.out.println("Record: " + record);

191

}

192

compressedStream.close();

193

```

194

195

### Seekable Input Interface

196

197

Interface for random access input operations.

198

199

```java { .api }

200

public interface SeekableInput extends Closeable {

201

void seek(long p) throws IOException;

202

long tell() throws IOException;

203

long length() throws IOException;

204

int read(byte[] b, int off, int len) throws IOException;

205

}

206

```

207

208

**Usage Examples:**

209

210

```java

211

// Implement custom seekable input

212

public class CustomSeekableInput implements SeekableInput {

213

private final RandomAccessFile file;

214

215

public CustomSeekableInput(File file) throws IOException {

216

this.file = new RandomAccessFile(file, "r");

217

}

218

219

@Override

220

public void seek(long p) throws IOException {

221

file.seek(p);

222

}

223

224

@Override

225

public long tell() throws IOException {

226

return file.getFilePointer();

227

}

228

229

@Override

230

public long length() throws IOException {

231

return file.length();

232

}

233

234

@Override

235

public int read(byte[] b, int off, int len) throws IOException {

236

return file.read(b, off, len);

237

}

238

239

@Override

240

public void close() throws IOException {

241

file.close();

242

}

243

}

244

245

// Use custom seekable input

246

SeekableInput seekableInput = new CustomSeekableInput(new File("data.avro"));

247

DatumReader<GenericRecord> reader = new GenericDatumReader<>();

248

DataFileReader<GenericRecord> fileReader = new DataFileReader<>(seekableInput, reader);

249

250

// Use random access capabilities

251

fileReader.seek(1000);

252

GenericRecord record = fileReader.next();

253

fileReader.close();

254

```

255

256

### Compression Codecs

257

258

Support for various compression algorithms to reduce file size.

259

260

```java { .api }

261

public abstract class Codec {

262

public abstract String getName();

263

public abstract ByteBuffer compress(ByteBuffer uncompressedData) throws IOException;

264

public abstract ByteBuffer decompress(ByteBuffer compressedData) throws IOException;

265

public abstract int hashCode();

266

public abstract boolean equals(Object obj);

267

}

268

269

public class CodecFactory {

270

public static Codec nullCodec();

271

public static Codec deflateCodec(int level);

272

public static Codec snappyCodec();

273

public static Codec bzip2Codec();

274

public static Codec xzCodec(int level);

275

public static Codec zstandardCodec(int level);

276

public static Codec zstandardCodec(int level, boolean includeChecksum);

277

278

public static Codec fromString(String codecName);

279

}

280

```

281

282

**Usage Examples:**

283

284

```java

285

// Use different compression codecs

286

DataFileWriter<GenericRecord> writer = new DataFileWriter<>(datumWriter);

287

288

// Deflate compression (good balance of speed and compression)

289

writer.setCodec(CodecFactory.deflateCodec(6));

290

291

// Snappy compression (very fast, moderate compression)

292

writer.setCodec(CodecFactory.snappyCodec());

293

294

// BZip2 compression (slower, better compression)

295

writer.setCodec(CodecFactory.bzip2Codec());

296

297

// XZ compression (slowest, best compression)

298

writer.setCodec(CodecFactory.xzCodec(6));

299

300

// Zstandard compression (good speed and compression)

301

writer.setCodec(CodecFactory.zstandardCodec(3, true));

302

303

// No compression

304

writer.setCodec(CodecFactory.nullCodec());

305

306

writer.create(schema, outputFile);

307

308

// Check codec used in file

309

DataFileReader<GenericRecord> reader = new DataFileReader<>(file, datumReader);

310

String codecName = reader.getMetaString("avro.codec");

311

System.out.println("File uses codec: " + codecName);

312

reader.close();

313

```

314

315

### File Metadata

316

317

Access and manipulate file-level metadata stored in Avro data files.

318

319

```java { .api }

320

// Writer metadata operations

321

public DataFileWriter<D> setMeta(String key, byte[] value);

322

public DataFileWriter<D> setMeta(String key, String value);

323

public DataFileWriter<D> setMeta(String key, long value);

324

325

// Reader metadata operations

326

public String getMetaString(String key);

327

public byte[] getMeta(String key);

328

public long getMetaLong(String key);

329

public List<String> getMetaKeys();

330

```

331

332

**Usage Examples:**

333

334

```java

335

// Set metadata when writing

336

DataFileWriter<GenericRecord> writer = new DataFileWriter<>(datumWriter);

337

writer.setMeta("created.by", "MyApplication v1.2.3");

338

writer.setMeta("created.time", System.currentTimeMillis());

339

writer.setMeta("source.system", "production");

340

writer.setMeta("record.count.estimate", 1000000L);

341

342

byte[] customData = "custom metadata".getBytes();

343

writer.setMeta("custom.data", customData);

344

345

writer.create(schema, outputFile);

346

347

// Read metadata from file

348

DataFileReader<GenericRecord> reader = new DataFileReader<>(file, datumReader);

349

350

// Get all metadata keys

351

List<String> metaKeys = reader.getMetaKeys();

352

System.out.println("Metadata keys: " + metaKeys);

353

354

// Read specific metadata

355

String createdBy = reader.getMetaString("created.by");

356

long createdTime = reader.getMetaLong("created.time");

357

byte[] customData = reader.getMeta("custom.data");

358

359

System.out.println("Created by: " + createdBy);

360

System.out.println("Created time: " + new Date(createdTime));

361

System.out.println("Custom data: " + new String(customData));

362

363

// Standard Avro metadata

364

String codecName = reader.getMetaString("avro.codec");

365

String schemaJson = reader.getMetaString("avro.schema");

366

System.out.println("Codec: " + codecName);

367

368

reader.close();

369

```

370

371

## Types

372

373

```java { .api }

374

public class DataFileReader<D> extends DataFileStream<D> implements FileReader<D> {

375

// File-based reader with random access

376

}

377

378

public class DataFileWriter<D> implements Closeable, Flushable, Syncable {

379

// File writer with compression and metadata support

380

}

381

382

public class DataFileStream<D> implements Iterator<D>, Iterable<D>, Closeable {

383

// Stream-based reader for sequential access

384

}

385

386

public interface FileReader<D> extends Iterator<D>, Iterable<D>, Closeable {

387

Schema getSchema();

388

void sync(long position) throws IOException;

389

boolean pastSync(long position) throws IOException;

390

long tell() throws IOException;

391

}

392

393

public interface SeekableInput extends Closeable {

394

void seek(long p) throws IOException;

395

long tell() throws IOException;

396

long length() throws IOException;

397

int read(byte[] b, int off, int len) throws IOException;

398

}

399

400

public interface Syncable {

401

void sync() throws IOException;

402

}

403

404

public abstract class Codec {

405

public abstract String getName();

406

public abstract ByteBuffer compress(ByteBuffer data) throws IOException;

407

public abstract ByteBuffer decompress(ByteBuffer data) throws IOException;

408

}

409

410

public class CodecFactory {

411

// Factory for creating compression codecs

412

}

413

414

// Specific codec implementations

415

public class DeflateCodec extends Codec;

416

public class SnappyCodec extends Codec;

417

public class BZip2Codec extends Codec;

418

public class XZCodec extends Codec;

419

public class ZstandardCodec extends Codec;

420

public class NullCodec extends Codec;

421

```