or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration-management.mdfilesystem-utilities.mdformat-utilities.mdindex.mdio-operations.mdstorage-operations.md

storage-operations.mddocs/

0

# Storage Operations

1

2

Core Hadoop FileSystem abstraction providing unified interface for distributed storage operations with consistency guarantees, retry mechanisms, and comprehensive file system capabilities.

3

4

## Capabilities

5

6

### HoodieHadoopStorage

7

8

Primary storage implementation providing Hadoop FileSystem-based operations with built-in retry logic and consistency guarantees.

9

10

```java { .api }

11

/**

12

* Hadoop FileSystem-based implementation of HoodieStorage interface

13

* Provides distributed storage operations with consistency and retry support

14

*/

15

public class HoodieHadoopStorage implements HoodieStorage {

16

17

/** Create storage instance with path and configuration */

18

public HoodieHadoopStorage(StoragePath path, StorageConfiguration<?> conf);

19

20

/** Create storage instance with Hadoop path and configuration */

21

public HoodieHadoopStorage(Path path, Configuration conf);

22

23

/** Create storage instance with string path and configuration */

24

public HoodieHadoopStorage(String path, Configuration conf);

25

26

/** Create storage instance with string path and storage configuration */

27

public HoodieHadoopStorage(String path, StorageConfiguration<?> conf);

28

29

/** Create storage instance with retry configuration */

30

public HoodieHadoopStorage(StoragePath path, StorageConfiguration<?> conf,

31

boolean enableRetry, long maxRetryIntervalMs,

32

int maxRetryNumbers, long initialRetryIntervalMs,

33

String retryExceptions, ConsistencyGuard consistencyGuard);

34

35

/** Create storage instance from existing FileSystem */

36

public HoodieHadoopStorage(FileSystem fs);

37

}

38

```

39

40

### File Operations

41

42

Core file system operations for reading, writing, and managing files in distributed storage.

43

44

```java { .api }

45

/**

46

* Opens an input stream for reading from the specified path

47

* @param path - Storage path to read from

48

* @return InputStream for reading file contents

49

*/

50

public InputStream open(StoragePath path);

51

52

/**

53

* Opens a seekable data input stream with buffering options

54

* @param path - Storage path to read from

55

* @param bufferSize - Buffer size for read operations

56

* @param wrapStream - Whether to wrap the stream

57

* @return SeekableDataInputStream for random access reading

58

*/

59

public SeekableDataInputStream openSeekable(StoragePath path, int bufferSize, boolean wrapStream);

60

61

/**

62

* Creates an output stream for writing to the specified path

63

* @param path - Storage path to write to

64

* @param overwrite - Whether to overwrite existing file

65

* @return OutputStream for writing file contents

66

*/

67

public OutputStream create(StoragePath path, boolean overwrite);

68

69

/**

70

* Creates an output stream with advanced options

71

* @param path - Storage path to write to

72

* @param overwrite - Whether to overwrite existing file

73

* @param bufferSize - Buffer size for write operations (nullable)

74

* @param replication - Replication factor (nullable)

75

* @param sizeThreshold - Size threshold for optimization (nullable)

76

* @return OutputStream for writing file contents

77

*/

78

public OutputStream create(StoragePath path, boolean overwrite,

79

Integer bufferSize, Short replication, Long sizeThreshold);

80

81

/**

82

* Appends to an existing file

83

* @param path - Storage path to append to

84

* @return OutputStream for appending to file

85

*/

86

public OutputStream append(StoragePath path);

87

88

/**

89

* Creates a new empty file if it doesn't exist

90

* @param path - Storage path for new file

91

* @return true if file was created, false if it already existed

92

*/

93

public boolean createNewFile(StoragePath path);

94

```

95

96

### Directory and Metadata Operations

97

98

Operations for managing directories and retrieving file metadata information.

99

100

```java { .api }

101

/**

102

* Checks if a path exists in storage

103

* @param path - Storage path to check

104

* @return true if path exists, false otherwise

105

*/

106

public boolean exists(StoragePath path);

107

108

/**

109

* Gets detailed information about a path

110

* @param path - Storage path to inspect

111

* @return StoragePathInfo with metadata

112

*/

113

public StoragePathInfo getPathInfo(StoragePath path);

114

115

/**

116

* Creates a directory and any necessary parent directories

117

* @param path - Storage path for directory to create

118

* @return true if directory was created or already exists

119

*/

120

public boolean createDirectory(StoragePath path);

121

122

/**

123

* Lists direct entries in a directory

124

* @param path - Directory path to list

125

* @return List of StoragePathInfo for direct children

126

*/

127

public List<StoragePathInfo> listDirectEntries(StoragePath path);

128

129

/**

130

* Lists all files recursively in a directory

131

* @param path - Directory path to list

132

* @return List of StoragePathInfo for all files

133

*/

134

public List<StoragePathInfo> listFiles(StoragePath path);

135

136

/**

137

* Lists direct entries with filtering

138

* @param path - Directory path to list

139

* @param filter - StoragePathFilter to apply

140

* @return Filtered list of StoragePathInfo

141

*/

142

public List<StoragePathInfo> listDirectEntries(StoragePath path, StoragePathFilter filter);

143

144

/**

145

* Lists direct entries for multiple paths

146

* @param pathList - List of directory paths to list

147

* @return Combined list of StoragePathInfo

148

*/

149

public List<StoragePathInfo> listDirectEntries(List<StoragePath> pathList);

150

151

/**

152

* Finds entries matching a glob pattern

153

* @param pathPattern - Glob pattern to match

154

* @return List of StoragePathInfo matching pattern

155

*/

156

public List<StoragePathInfo> globEntries(StoragePath pathPattern);

157

```

158

159

### File Management Operations

160

161

Operations for moving, deleting, and modifying files and directories.

162

163

```java { .api }

164

/**

165

* Renames or moves a file/directory

166

* @param oldPath - Current path

167

* @param newPath - New path

168

* @return true if rename succeeded

169

*/

170

public boolean rename(StoragePath oldPath, StoragePath newPath);

171

172

/**

173

* Deletes a directory and all its contents

174

* @param path - Directory path to delete

175

* @return true if deletion succeeded

176

*/

177

public boolean deleteDirectory(StoragePath path);

178

179

/**

180

* Deletes a single file

181

* @param path - File path to delete

182

* @return true if deletion succeeded

183

*/

184

public boolean deleteFile(StoragePath path);

185

186

/**

187

* Sets modification time for a path

188

* @param path - Storage path to modify

189

* @param modificationTimeInMillisEpoch - New modification time in milliseconds since epoch

190

*/

191

public void setModificationTime(StoragePath path, long modificationTimeInMillisEpoch);

192

```

193

194

### Storage Configuration and Properties

195

196

Storage configuration methods and property access.

197

198

```java { .api }

199

/**

200

* Gets the URI scheme for this storage

201

* @return Scheme string (e.g., "hdfs", "file")

202

*/

203

public String getScheme();

204

205

/**

206

* Gets the URI for this storage

207

* @return URI representing the storage location

208

*/

209

public URI getUri();

210

211

/**

212

* Gets default block size for a path

213

* @param path - Storage path to check

214

* @return Default block size in bytes

215

*/

216

public int getDefaultBlockSize(StoragePath path);

217

218

/**

219

* Gets default buffer size for I/O operations

220

* @return Default buffer size in bytes

221

*/

222

public int getDefaultBufferSize();

223

224

/**

225

* Gets default replication factor for a path

226

* @param path - Storage path to check

227

* @return Default replication factor

228

*/

229

public short getDefaultReplication(StoragePath path);

230

231

/**

232

* Gets the underlying FileSystem object

233

* @return Hadoop FileSystem instance

234

*/

235

public Object getFileSystem();

236

237

/**

238

* Gets the raw storage implementation

239

* @return Raw HoodieStorage instance

240

*/

241

public HoodieStorage getRawStorage();

242

243

/**

244

* Creates a new storage instance with different path

245

* @param path - New storage path

246

* @param storageConf - Storage configuration

247

* @return New HoodieStorage instance

248

*/

249

public HoodieStorage newInstance(StoragePath path, StorageConfiguration<?> storageConf);

250

251

/**

252

* Closes the storage and releases resources

253

*/

254

public void close();

255

```

256

257

### HadoopStorageConfiguration

258

259

Hadoop Configuration wrapper providing storage configuration abstraction.

260

261

```java { .api }

262

/**

263

* Hadoop Configuration wrapper for storage configuration

264

*/

265

public class HadoopStorageConfiguration extends StorageConfiguration<Configuration> {

266

267

/** Create configuration wrapper with defaults flag */

268

public HadoopStorageConfiguration(Boolean loadDefaults);

269

270

/** Create configuration wrapper */

271

public HadoopStorageConfiguration(Configuration configuration);

272

273

/** Create configuration wrapper with copy option */

274

public HadoopStorageConfiguration(Configuration configuration, boolean copy);

275

276

/** Create configuration wrapper from existing instance */

277

public HadoopStorageConfiguration(HadoopStorageConfiguration configuration);

278

279

/** Create new instance of this configuration */

280

public StorageConfiguration<Configuration> newInstance();

281

282

/** Get underlying Hadoop Configuration */

283

public Configuration unwrap();

284

285

/** Get copy of underlying configuration */

286

public Configuration unwrapCopy();

287

288

/** Set configuration property */

289

public void set(String key, String value);

290

291

/** Get configuration property as Option */

292

public Option<String> getString(String key);

293

294

/** Get inline configuration for InLineFileSystem */

295

public StorageConfiguration<Configuration> getInline();

296

}

297

```

298

299

**Usage Examples:**

300

301

```java

302

import org.apache.hudi.storage.hadoop.HoodieHadoopStorage;

303

import org.apache.hudi.storage.hadoop.HadoopStorageConfiguration;

304

import org.apache.hadoop.conf.Configuration;

305

306

// Basic storage setup

307

Configuration hadoopConf = new Configuration();

308

hadoopConf.set("fs.defaultFS", "hdfs://namenode:8020");

309

310

HadoopStorageConfiguration storageConf = new HadoopStorageConfiguration(hadoopConf);

311

StoragePath basePath = new StoragePath("hdfs://namenode:8020/data/hudi");

312

313

HoodieHadoopStorage storage = new HoodieHadoopStorage(basePath, storageConf);

314

315

// File operations

316

StoragePath filePath = new StoragePath(basePath, "table1/partition1/file.parquet");

317

318

// Check existence and read

319

if (storage.exists(filePath)) {

320

try (InputStream input = storage.open(filePath)) {

321

// Process file content

322

}

323

}

324

325

// Write new file

326

StoragePath outputPath = new StoragePath(basePath, "table1/partition2/output.parquet");

327

try (OutputStream output = storage.create(outputPath, true)) {

328

// Write data

329

}

330

331

// List directory contents

332

List<StoragePathInfo> entries = storage.listDirectEntries(basePath);

333

for (StoragePathInfo entry : entries) {

334

System.out.println("Path: " + entry.getPath() +

335

", Size: " + entry.getLength() +

336

", IsDir: " + entry.isDirectory());

337

}

338

```