Tessl Tile for maven/org.apache.hudi/hudi-hadoop-common@1.0.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

configuration-management.md filesystem-utilities.md format-utilities.md index.md io-operations.md storage-operations.md

io-operations.mddocs/

0
# I/O Operations
1

2
Factory pattern for creating format-specific file readers and writers with support for Avro, Parquet, and ORC formats in Hadoop environments. Provides comprehensive I/O capabilities for reading and writing structured data files.
3

4
## Capabilities
5

6
### HoodieHadoopIOFactory
7

8
Primary I/O factory for creating Hadoop-based file readers and writers with format-specific optimizations.
9

10
```java { .api }
11
/**
12
 * Factory for creating Hadoop-based file readers and writers
13
 * Supports multiple record types and file formats
14
 */
15
public class HoodieHadoopIOFactory implements HoodieIOFactory {
16
    
17
    /** Create I/O factory with storage backend */
18
    public HoodieHadoopIOFactory(HoodieStorage storage);
19
    
20
    /** Get reader factory for specific record type */
21
    public HoodieFileReaderFactory getReaderFactory(HoodieRecord.HoodieRecordType recordType);
22
    
23
    /** Get writer factory for specific record type */
24
    public HoodieFileWriterFactory getWriterFactory(HoodieRecord.HoodieRecordType recordType);
25
    
26
    /** Get format utilities for specific file format */
27
    public FileFormatUtils getFileFormatUtils(HoodieFileFormat fileFormat);
28
    
29
    /** Get storage instance for path */
30
    public HoodieStorage getStorage(StoragePath storagePath);
31
    
32
    /** Get storage instance with retry configuration */
33
    public HoodieStorage getStorage(StoragePath path, boolean enableRetry, 
34
                                   long maxRetryIntervalMs, int maxRetryNumbers, 
35
                                   long initialRetryIntervalMs, String retryExceptions, 
36
                                   ConsistencyGuard consistencyGuard);
37
}
38
```
39

40
### Avro File Reader Factory
41

42
Factory for creating Avro-based file readers supporting Parquet format with Avro serialization.
43

44
```java { .api }
45
/**
46
 * Factory for creating Avro file readers
47
 * Specialized for Parquet files with Avro schema
48
 */
49
public class HoodieAvroFileReaderFactory implements HoodieFileReaderFactory {
50
    
51
    /** Create reader factory with storage backend */
52
    public HoodieAvroFileReaderFactory(HoodieStorage storage);
53
    
54
    /** Create Parquet file reader for Avro records */
55
    public HoodieAvroFileReader newParquetFileReader(HoodieStorage storage, StoragePath path);
56
}
57
```
58

59
### Avro File Writer Factory
60

61
Factory for creating Avro-based file writers supporting multiple output formats.
62

63
```java { .api }
64
/**
65
 * Factory for creating Avro file writers
66
 * Supports Parquet and ORC output formats
67
 */
68
public class HoodieAvroFileWriterFactory implements HoodieFileWriterFactory {
69
    
70
    /** Create writer factory with storage backend */
71
    public HoodieAvroFileWriterFactory(HoodieStorage storage);
72
}
73
```
74

75
### Avro Parquet Reader
76

77
Avro-based Parquet file reader providing schema evolution and efficient columnar access.
78

79
```java { .api }
80
/**
81
 * Avro-based Parquet file reader
82
 * Supports schema evolution and columnar data access
83
 */
84
public class HoodieAvroParquetReader implements HoodieFileReader {
85
    
86
    /** Create reader for Parquet file with Avro schema */
87
    public HoodieAvroParquetReader(HoodieStorage storage, StoragePath filePath);
88
    
89
    /** Create reader with explicit writer schema */
90
    public HoodieAvroParquetReader(HoodieStorage storage, StoragePath filePath, 
91
                                  Option<Schema> writerSchemaOpt);
92
    
93
    /** Get the schema of the file */
94
    public Schema getSchema();
95
    
96
    /** Get iterator for records with custom reader schema */
97
    public ClosableIterator<IndexedRecord> getRecordIterator(Schema readerSchema);
98
    
99
    /** Get iterator for records with file schema */
100
    public ClosableIterator<IndexedRecord> getRecordIterator();
101
    
102
    /** Close the reader and release resources */
103
    public void close();
104
}
105
```
106

107
### Avro Parquet Writer
108

109
Avro-based Parquet file writer with bloom filter integration and metadata support.
110

111
```java { .api }
112
/**
113
 * Avro-based Parquet file writer
114
 * Supports bloom filters and custom metadata
115
 */
116
public class HoodieAvroParquetWriter implements HoodieFileWriter {
117
    
118
    /** Create writer with configuration and schema */
119
    public HoodieAvroParquetWriter(StoragePath file, HoodieConfig config, Schema schema, 
120
                                  Task task, Option<BloomFilter> bloomFilterOpt, 
121
                                  boolean populateMetaFields);
122
    
123
    /** Check if writer can accept more data */
124
    public boolean canWrite();
125
    
126
    /** Write Avro record with Hudi metadata */
127
    public void writeAvroWithMetadata(HoodieKey key, IndexedRecord avroRecord);
128
    
129
    /** Write Avro record with record key */
130
    public void writeAvro(String recordKey, IndexedRecord record);
131
    
132
    /** Close writer and return status */
133
    public WriteStatus close();
134
    
135
    /** Get current write status */
136
    public WriteStatus getWriteStatus();
137
    
138
    /** Get number of bytes written */
139
    public long getBytesWritten();
140
}
141
```
142

143
### Avro ORC Reader
144

145
Avro-based ORC file reader supporting schema evolution and efficient columnar access.
146

147
```java { .api }
148
/**
149
 * Avro-based ORC file reader
150
 * Supports schema evolution and columnar data access
151
 */
152
public class HoodieAvroOrcReader implements HoodieFileReader {
153
    
154
    /** Create reader with explicit writer schema */
155
    public HoodieAvroOrcReader(HoodieStorage storage, StoragePath filePath, 
156
                              Option<Schema> writerSchemaOpt);
157
    
158
    /** Get iterator for records with custom reader schema */
159
    public ClosableIterator<IndexedRecord> getRecordIterator(Schema readerSchema);
160
    
161
    /** Get iterator for records with file schema */
162
    public ClosableIterator<IndexedRecord> getRecordIterator();
163
    
164
    /** Close the reader and release resources */
165
    public void close();
166
    
167
    /** Get the schema of the file */
168
    public Schema getSchema();
169
}
170
```
171

172
### Avro ORC Writer
173

174
Avro-based ORC file writer with bloom filter integration and metadata support.
175

176
```java { .api }
177
/**
178
 * Avro-based ORC file writer
179
 * Supports bloom filters and custom metadata
180
 */
181
public class HoodieAvroOrcWriter implements HoodieFileWriter {
182
    
183
    /** Create writer with configuration and schema */
184
    public HoodieAvroOrcWriter(StoragePath filePath, HoodieConfig config, Schema schema, 
185
                              Task task, boolean populateMetaFields, 
186
                              Option<BloomFilter> bloomFilterOpt);
187
    
188
    /** Check if writer can accept more data */
189
    public boolean canWrite();
190
    
191
    /** Write Avro record with Hudi metadata */
192
    public void writeAvroWithMetadata(HoodieKey key, IndexedRecord avroRecord);
193
    
194
    /** Write Avro record with record key */
195
    public void writeAvro(String recordKey, IndexedRecord record);
196
    
197
    /** Close writer and return status */
198
    public WriteStatus close();
199
    
200
    /** Get current write status */
201
    public WriteStatus getWriteStatus();
202
    
203
    /** Get number of bytes written */
204
    public long getBytesWritten();
205
}
206
```
207

208
### HFile Utilities
209

210
Utilities for working with HBase HFile format in Hadoop environments.
211

212
```java { .api }
213
/**
214
 * Utilities for working with HFile format
215
 * Provides HBase integration capabilities
216
 */
217
public class HoodieHFileUtils {
218
    
219
    /** Create HFile reader with configuration */
220
    public static HFile.Reader createHFileReader(FileSystem fs, Path path, 
221
                                                CacheConfig cacheConf, Configuration conf);
222
    
223
    /** Get optimized configuration for HFile reading */
224
    public static Configuration getHFileReaderConfiguration(Configuration conf);
225
    
226
    /** Check if path points to an HFile */
227
    public static boolean isHFile(StoragePath path);
228
}
229
```
230

231
### Parquet Write Support
232

233
Parquet write support for Avro records with bloom filter integration.
234

235
```java { .api }
236
/**
237
 * Parquet write support for Avro with bloom filter integration
238
 * Extends standard Parquet writing with Hudi-specific features
239
 */
240
public class HoodieAvroWriteSupport extends AvroWriteSupport<IndexedRecord> {
241
    
242
    /** Create write support with schema and bloom filter */
243
    public HoodieAvroWriteSupport(MessageType schema, Schema avroSchema, 
244
                                 Option<BloomFilter> bloomFilterOpt, Properties properties);
245
    
246
    /** Finalize write context with metadata */
247
    public WriteSupport.FinalizedWriteContext finalizeWrite();
248
    
249
    /** Add record key to bloom filter */
250
    public void add(String recordKey);
251
    
252
    /** Add custom footer metadata */
253
    public void addFooterMetadata(String key, String value);
254
}
255
```
256

257
### Parquet Reader Builder
258

259
Builder pattern for creating Hoodie-specific Avro Parquet readers.
260

261
```java { .api }
262
/**
263
 * Builder for Hoodie Avro Parquet readers
264
 * Provides configuration and customization options
265
 */
266
public class HoodieAvroParquetReaderBuilder<T> extends ParquetReaderBuilder<T> {
267
    
268
    /** Create builder with file path */
269
    public HoodieAvroParquetReaderBuilder(Path path);
270
    
271
    /** Create builder with input file */
272
    public HoodieAvroParquetReaderBuilder(InputFile file);
273
    
274
    /** Set Hadoop configuration */
275
    public HoodieAvroParquetReaderBuilder<T> withConf(Configuration conf);
276
    
277
    /** Build configured ParquetReader */
278
    public ParquetReader<T> build();
279
}
280
```
281

282
### Parquet Read Support
283

284
Parquet read support for Avro records with Hudi-specific optimizations.
285

286
```java { .api }
287
/**
288
 * Parquet read support for Avro records
289
 * Extends standard AvroReadSupport with Hudi optimizations
290
 */
291
public class HoodieAvroReadSupport extends AvroReadSupport {
292
    // Extends AvroReadSupport with Hoodie-specific functionality
293
    // for reading Parquet files with Avro schema
294
}
295
```
296

297
**Usage Examples:**
298

299
```java
300
import org.apache.hudi.io.hadoop.*;
301
import org.apache.hudi.storage.hadoop.HoodieHadoopStorage;
302
import org.apache.avro.Schema;
303
import org.apache.avro.generic.IndexedRecord;
304

305
// Set up I/O factory
306
HoodieHadoopStorage storage = new HoodieHadoopStorage(storagePath, storageConf);
307
HoodieHadoopIOFactory ioFactory = new HoodieHadoopIOFactory(storage);
308

309
// Reading Parquet files
310
StoragePath parquetFile = new StoragePath("/data/table1/file.parquet");
311

312
// Create Avro Parquet reader
313
HoodieAvroParquetReader reader = new HoodieAvroParquetReader(storage, parquetFile);
314
Schema schema = reader.getSchema();
315

316
// Read records
317
try (ClosableIterator<IndexedRecord> iterator = reader.getRecordIterator()) {
318
    while (iterator.hasNext()) {
319
        IndexedRecord record = iterator.next();
320
        // Process record
321
    }
322
}
323
reader.close();
324

325
// Writing Parquet files
326
Schema writeSchema = new Schema.Parser().parse(schemaString);
327
StoragePath outputFile = new StoragePath("/data/table1/output.parquet");
328

329
HoodieAvroParquetWriter writer = new HoodieAvroParquetWriter(
330
    outputFile, 
331
    hoodieConfig, 
332
    writeSchema, 
333
    task, 
334
    Option.empty(), // No bloom filter
335
    true  // Populate meta fields
336
);
337

338
// Write records
339
for (IndexedRecord record : records) {
340
    writer.writeAvro("key", record);
341
}
342

343
WriteStatus status = writer.close();
344
System.out.println("Bytes written: " + status.getBytesWritten());
345

346
// Working with ORC files
347
HoodieAvroOrcReader orcReader = new HoodieAvroOrcReader(storage, orcFilePath, Option.empty());
348
try (ClosableIterator<IndexedRecord> iterator = orcReader.getRecordIterator()) {
349
    while (iterator.hasNext()) {
350
        IndexedRecord record = iterator.next();
351
        // Process ORC record
352
    }
353
}
354
orcReader.close();
355

356
// HFile operations
357
boolean isHFile = HoodieHFileUtils.isHFile(somePath);
358
if (isHFile) {
359
    Configuration hfileConf = HoodieHFileUtils.getHFileReaderConfiguration(hadoopConf);
360
    // Work with HFile using optimized configuration
361
}
362
```

Version

Tile

Files

io-operations.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

io-operations.mddocs/