or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration-management.mdfilesystem-utilities.mdformat-utilities.mdindex.mdio-operations.mdstorage-operations.md

configuration-management.mddocs/

0

# Configuration Management

1

2

DFS-based configuration management providing support for global properties, environment-specific settings, and Hadoop configuration integration. Enables centralized configuration management across distributed Hudi deployments.

3

4

## Capabilities

5

6

### DFSPropertiesConfiguration

7

8

Main configuration class providing DFS-based properties management with support for global configuration files and environment-specific overrides.

9

10

```java { .api }

11

/**

12

* DFS-based properties configuration extending PropertiesConfig

13

* Provides centralized configuration management for Hudi operations

14

*/

15

public class DFSPropertiesConfiguration extends PropertiesConfig {

16

17

/** Default properties file name */

18

public static final String DEFAULT_PROPERTIES_FILE = "hudi-defaults.conf";

19

20

/** Environment variable for configuration directory */

21

public static final String CONF_FILE_DIR_ENV_NAME = "HUDI_CONF_DIR";

22

23

/** Default configuration file directory */

24

public static final String DEFAULT_CONF_FILE_DIR = "file:/etc/hudi/conf";

25

26

/** Default path for configuration file */

27

public static final StoragePath DEFAULT_PATH;

28

29

/** Create configuration with Hadoop configuration and file path */

30

public DFSPropertiesConfiguration(Configuration hadoopConf, StoragePath filePath);

31

32

/** Create configuration with default settings */

33

public DFSPropertiesConfiguration();

34

35

/** Add properties from file path */

36

public void addPropsFromFile(StoragePath filePath);

37

38

/** Add properties from BufferedReader stream */

39

public void addPropsFromStream(BufferedReader reader, StoragePath cfgFilePath);

40

41

/** Get global properties instance */

42

public TypedProperties getGlobalProperties();

43

44

/** Get instance properties */

45

public TypedProperties getProps();

46

47

/** Get instance properties with global properties option */

48

public TypedProperties getProps(boolean includeGlobalProps);

49

}

50

```

51

52

### Global Configuration Management

53

54

Static methods for managing global configuration properties across the application.

55

56

```java { .api }

57

/**

58

* Load global properties from default configuration location

59

* Loads properties from HUDI_CONF_DIR or default location

60

* @return TypedProperties containing global configuration

61

*/

62

public static TypedProperties loadGlobalProps();

63

64

/**

65

* Get global properties (cached version)

66

* Returns cached global properties or loads them if not cached

67

* @return TypedProperties containing global configuration

68

*/

69

public static TypedProperties getGlobalProps();

70

71

/**

72

* Refresh global properties by reloading from file system

73

* Clears cache and reloads properties from configuration files

74

*/

75

public static void refreshGlobalProps();

76

77

/**

78

* Clear global properties cache

79

* Forces next access to reload properties from files

80

*/

81

public static void clearGlobalProps();

82

83

/**

84

* Add property to global properties

85

* @param key - Property key to add

86

* @param value - Property value to set

87

* @return Updated TypedProperties with new property

88

*/

89

public static TypedProperties addToGlobalProps(String key, String value);

90

```

91

92

### Configuration File Locations

93

94

The configuration system supports multiple approaches for locating configuration files:

95

96

1. **Environment Variable**: Set `HUDI_CONF_DIR` to specify custom configuration directory

97

2. **Default Location**: Falls back to `/etc/hudi/conf` if environment variable not set

98

3. **Explicit Path**: Pass specific `StoragePath` to constructor for custom location

99

100

### Hadoop Configuration Integration

101

102

Seamless integration with Hadoop Configuration system for unified configuration management.

103

104

```java { .api }

105

/**

106

* Configuration integration patterns with Hadoop

107

*/

108

109

// Create with existing Hadoop configuration

110

Configuration hadoopConf = new Configuration();

111

hadoopConf.addResource("core-site.xml");

112

hadoopConf.addResource("hdfs-site.xml");

113

114

// Custom configuration file location

115

StoragePath configPath = new StoragePath("hdfs://namenode:8020/config/hudi-custom.conf");

116

DFSPropertiesConfiguration hudiConf = new DFSPropertiesConfiguration(hadoopConf, configPath);

117

118

// Access properties

119

String tableType = hudiConf.getString("hoodie.table.type", "COPY_ON_WRITE");

120

int parquetBlockSize = hudiConf.getInt("hoodie.parquet.block.size", 134217728);

121

boolean asyncCompaction = hudiConf.getBoolean("hoodie.compact.inline", false);

122

```

123

124

### Configuration Properties Access

125

126

Inherited methods from PropertiesConfig for accessing configuration values with type safety and defaults.

127

128

```java { .api }

129

/**

130

* Property access methods (inherited from PropertiesConfig)

131

*/

132

133

// String properties

134

public String getString(String key);

135

public String getString(String key, String defaultValue);

136

137

// Integer properties

138

public int getInt(String key);

139

public int getInt(String key, int defaultValue);

140

141

// Long properties

142

public long getLong(String key);

143

public long getLong(String key, long defaultValue);

144

145

// Boolean properties

146

public boolean getBoolean(String key);

147

public boolean getBoolean(String key, boolean defaultValue);

148

149

// Double properties

150

public double getDouble(String key);

151

public double getDouble(String key, double defaultValue);

152

153

// Get all properties as TypedProperties

154

public TypedProperties getProps();

155

```

156

157

### Common Hudi Configuration Properties

158

159

Standard configuration properties commonly used in Hudi operations:

160

161

#### Table Configuration

162

- `hoodie.table.name` - Name of the Hudi table

163

- `hoodie.table.type` - Table type (COPY_ON_WRITE or MERGE_ON_READ)

164

- `hoodie.table.base.file.format` - Base file format (PARQUET, ORC, etc.)

165

166

#### Write Configuration

167

- `hoodie.write.markers.type` - Marker type for write operations

168

- `hoodie.write.concurrency.mode` - Concurrency mode for writes

169

- `hoodie.datasource.write.operation` - Write operation type (INSERT, UPSERT, etc.)

170

171

#### Compaction Configuration

172

- `hoodie.compact.inline` - Enable inline compaction

173

- `hoodie.compact.inline.max.delta.commits` - Max delta commits before compaction

174

- `hoodie.compact.strategy` - Compaction strategy

175

176

#### File System Configuration

177

- `hoodie.filesystem.consistency.check.enabled` - Enable consistency checks

178

- `hoodie.filesystem.operation.retry.enable` - Enable operation retries

179

- `hoodie.filesystem.operation.retry.initial.interval` - Initial retry interval

180

181

### Configuration File Format

182

183

Hudi configuration files use standard Java properties format:

184

185

```properties

186

# Hudi configuration file (hudi-defaults.conf)

187

188

# Table settings

189

hoodie.table.type=COPY_ON_WRITE

190

hoodie.table.base.file.format=PARQUET

191

192

# Write settings

193

hoodie.write.markers.type=TIMELINE_SERVER_BASED

194

hoodie.write.concurrency.mode=SINGLE_WRITER

195

hoodie.datasource.write.operation=UPSERT

196

197

# Compaction settings

198

hoodie.compact.inline=false

199

hoodie.compact.inline.max.delta.commits=10

200

hoodie.compact.strategy=org.apache.hudi.table.action.compact.strategy.LogFileSizeBasedCompactionStrategy

201

202

# File system settings

203

hoodie.filesystem.consistency.check.enabled=true

204

hoodie.filesystem.operation.retry.enable=true

205

hoodie.filesystem.operation.retry.initial.interval=100

206

207

# Parquet settings

208

hoodie.parquet.block.size=134217728

209

hoodie.parquet.page.size=1048576

210

hoodie.parquet.compression.codec=snappy

211

```

212

213

**Usage Examples:**

214

215

```java

216

import org.apache.hudi.common.config.DFSPropertiesConfiguration;

217

import org.apache.hadoop.conf.Configuration;

218

import org.apache.hudi.storage.StoragePath;

219

import org.apache.hudi.common.config.TypedProperties;

220

221

// Using global configuration

222

TypedProperties globalProps = DFSPropertiesConfiguration.getGlobalProps();

223

String defaultTableType = globalProps.getString("hoodie.table.type", "COPY_ON_WRITE");

224

225

// Using global DFS configuration instance

226

DFSPropertiesConfiguration globalConfig = DFSPropertiesConfiguration.getGlobalDFSPropsConfiguration();

227

boolean inlineCompaction = globalConfig.getBoolean("hoodie.compact.inline", false);

228

229

// Creating custom configuration with Hadoop integration

230

Configuration hadoopConf = new Configuration();

231

hadoopConf.set("fs.defaultFS", "hdfs://namenode:8020");

232

233

// Custom configuration file location

234

StoragePath customConfigPath = new StoragePath("hdfs://namenode:8020/apps/hudi/conf/production.conf");

235

DFSPropertiesConfiguration customConfig = new DFSPropertiesConfiguration(hadoopConf, customConfigPath);

236

237

// Access configuration properties with defaults

238

String tableName = customConfig.getString("hoodie.table.name", "default_table");

239

int parquetBlockSize = customConfig.getInt("hoodie.parquet.block.size", 134217728);

240

boolean consistencyCheck = customConfig.getBoolean("hoodie.filesystem.consistency.check.enabled", true);

241

242

// Environment-based configuration directory

243

// Set environment variable: export HUDI_CONF_DIR=hdfs://namenode:8020/config/hudi

244

// Configuration will automatically load from: hdfs://namenode:8020/config/hudi/hudi-defaults.conf

245

DFSPropertiesConfiguration envConfig = new DFSPropertiesConfiguration();

246

247

// Working with TypedProperties for bulk operations

248

TypedProperties allProps = customConfig.getProps();

249

for (String key : allProps.stringPropertyNames()) {

250

String value = allProps.getString(key);

251

System.out.println(key + " = " + value);

252

}

253

254

// Combining with Hadoop configuration for unified setup

255

Configuration unifiedConf = new Configuration();

256

unifiedConf.addResource("core-site.xml");

257

unifiedConf.addResource("hdfs-site.xml");

258

259

DFSPropertiesConfiguration hudiConfig = new DFSPropertiesConfiguration(

260

unifiedConf,

261

new StoragePath("hdfs://namenode:8020/config/hudi-defaults.conf")

262

);

263

264

// Use in Hudi operations

265

String recordKeyField = hudiConfig.getString("hoodie.datasource.write.recordkey.field", "_row_key");

266

String partitionPathField = hudiConfig.getString("hoodie.datasource.write.partitionpath.field", "partition");

267

String precombineField = hudiConfig.getString("hoodie.datasource.write.precombine.field", "ts");

268

```

269

270

### Configuration Best Practices

271

272

1. **Environment Separation**: Use different configuration files for different environments (dev, staging, prod)

273

2. **Centralized Storage**: Store configuration files in HDFS or other distributed storage for consistency

274

3. **Property Precedence**: Understand that programmatically set properties override configuration file values

275

4. **Default Values**: Always provide sensible defaults when accessing configuration properties

276

5. **Documentation**: Document custom configuration properties and their expected values