or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

batch-processing.mdconfiguration.mdindex.mdschema-conversion.mdserialization.mdstream-processing.md

configuration.mddocs/

0

# Configuration Options

1

2

Comprehensive configuration system for handling various CSV dialects and processing requirements through the `CsvFormatOptions` class, providing standardized configuration keys for Table API/SQL integration and programmatic usage.

3

4

## Capabilities

5

6

### CsvFormatOptions Class

7

8

Configuration options class providing standardized configuration keys for CSV format processing.

9

10

```java { .api }

11

/**

12

* Configuration options for CSV format processing

13

* Contains standard configuration keys used across CSV components

14

*/

15

public class CsvFormatOptions {

16

17

/**

18

* Field delimiter character for separating values

19

* Default: "," (comma)

20

* Accepts: Any single character as string

21

*/

22

public static final ConfigOption<String> FIELD_DELIMITER;

23

24

/**

25

* Quote character for enclosing field values containing special characters

26

* Default: "\"" (double quote)

27

* Accepts: Any single character as string

28

*/

29

public static final ConfigOption<String> QUOTE_CHARACTER;

30

31

/**

32

* Flag to disable quote character usage entirely

33

* Default: false

34

* When true, fields are never quoted regardless of content

35

*/

36

public static final ConfigOption<Boolean> DISABLE_QUOTE_CHARACTER;

37

38

/**

39

* Allow comment lines starting with '#' to be ignored during parsing

40

* Default: false

41

* When true, lines beginning with '#' are skipped

42

*/

43

public static final ConfigOption<Boolean> ALLOW_COMMENTS;

44

45

/**

46

* Skip fields and rows with parse errors instead of failing

47

* Default: false

48

* When true, malformed records are ignored and processing continues

49

*/

50

public static final ConfigOption<Boolean> IGNORE_PARSE_ERRORS;

51

52

/**

53

* Array element delimiter for separating elements within array fields

54

* Default: ";" (semicolon)

55

* Used when serializing/deserializing array and collection types

56

*/

57

public static final ConfigOption<String> ARRAY_ELEMENT_DELIMITER;

58

59

/**

60

* Escape character for escaping special characters within quoted fields

61

* Default: null (no escape character)

62

* When set, this character can escape quotes and other special characters

63

*/

64

public static final ConfigOption<String> ESCAPE_CHARACTER;

65

66

/**

67

* Null literal string that represents null values in CSV data

68

* Default: null (empty string represents null)

69

* When set, this exact string is treated as null during parsing

70

*/

71

public static final ConfigOption<String> NULL_LITERAL;

72

73

/**

74

* Control BigDecimal output format for numeric serialization

75

* Default: true (use scientific notation)

76

* When false, uses standard decimal notation for large numbers

77

*/

78

public static final ConfigOption<Boolean> WRITE_BIGDECIMAL_IN_SCIENTIFIC_NOTATION;

79

}

80

```

81

82

## Usage Examples

83

84

### Table API/SQL Configuration

85

86

```java

87

import org.apache.flink.table.api.TableEnvironment;

88

import org.apache.flink.formats.csv.CsvFormatOptions;

89

90

// Configure CSV format in Table API

91

TableEnvironment tEnv = TableEnvironment.create(settings);

92

93

// Create table with custom CSV configuration

94

tEnv.executeSql(

95

"CREATE TABLE csv_source (" +

96

" name STRING," +

97

" age INT," +

98

" salary DECIMAL(10,2)" +

99

") WITH (" +

100

" 'connector' = 'filesystem'," +

101

" 'path' = '/path/to/file.csv'," +

102

" 'format' = 'csv'," +

103

" 'csv.field-delimiter' = '|'," +

104

" 'csv.quote-character' = '''," +

105

" 'csv.ignore-parse-errors' = 'true'," +

106

" 'csv.null-literal' = 'NULL'" +

107

")"

108

);

109

```

110

111

### Programmatic Configuration

112

113

```java

114

import org.apache.flink.configuration.Configuration;

115

import org.apache.flink.formats.csv.CsvFormatOptions;

116

117

// Create configuration object

118

Configuration config = new Configuration();

119

120

// Set CSV format options

121

config.setString(CsvFormatOptions.FIELD_DELIMITER, "|");

122

config.setString(CsvFormatOptions.QUOTE_CHARACTER, "'");

123

config.setBoolean(CsvFormatOptions.IGNORE_PARSE_ERRORS, true);

124

config.setString(CsvFormatOptions.NULL_LITERAL, "NULL");

125

config.setBoolean(CsvFormatOptions.ALLOW_COMMENTS, true);

126

127

// Use configuration with format factories

128

// Configuration is automatically applied to CSV components

129

```

130

131

### Builder Pattern Integration

132

133

The configuration options integrate seamlessly with builder patterns in CSV components:

134

135

```java

136

import org.apache.flink.formats.csv.CsvRowDataSerializationSchema;

137

import org.apache.flink.formats.csv.RowCsvInputFormat;

138

139

// Extract values from configuration

140

Configuration config = new Configuration();

141

String delimiter = config.getString(CsvFormatOptions.FIELD_DELIMITER, ",");

142

String quoteChar = config.getString(CsvFormatOptions.QUOTE_CHARACTER, "\"");

143

Boolean ignoreErrors = config.getBoolean(CsvFormatOptions.IGNORE_PARSE_ERRORS, false);

144

145

// Apply to serialization schema

146

CsvRowDataSerializationSchema serializer = new CsvRowDataSerializationSchema.Builder(rowType)

147

.setFieldDelimiter(delimiter.charAt(0))

148

.setQuoteCharacter(quoteChar.charAt(0))

149

.build();

150

151

// Apply to input format

152

RowCsvInputFormat inputFormat = RowCsvInputFormat

153

.builder(typeInfo, paths)

154

.setFieldDelimiter(delimiter.charAt(0))

155

.setQuoteCharacter(quoteChar.charAt(0))

156

.setIgnoreParseErrors(ignoreErrors)

157

.build();

158

```

159

160

## Configuration Scenarios

161

162

### Standard CSV (RFC 4180)

163

164

```java

165

// Default configuration matches RFC 4180 standard

166

Configuration standardCsv = new Configuration();

167

standardCsv.setString(CsvFormatOptions.FIELD_DELIMITER, ",");

168

standardCsv.setString(CsvFormatOptions.QUOTE_CHARACTER, "\"");

169

standardCsv.setBoolean(CsvFormatOptions.DISABLE_QUOTE_CHARACTER, false);

170

standardCsv.setBoolean(CsvFormatOptions.ALLOW_COMMENTS, false);

171

standardCsv.setBoolean(CsvFormatOptions.IGNORE_PARSE_ERRORS, false);

172

173

// Handles: "name","age","active"

174

// "John Doe",25,true

175

// "Jane Smith",30,false

176

```

177

178

### Tab-Separated Values (TSV)

179

180

```java

181

// Configure for tab-delimited files

182

Configuration tsvConfig = new Configuration();

183

tsvConfig.setString(CsvFormatOptions.FIELD_DELIMITER, "\t");

184

tsvConfig.setString(CsvFormatOptions.QUOTE_CHARACTER, "\"");

185

186

// Handles: name age active

187

// John Doe 25 true

188

// Jane Smith 30 false

189

```

190

191

### Pipe-Delimited with Custom Quoting

192

193

```java

194

// Configure for pipe-delimited data warehouse format

195

Configuration pipeConfig = new Configuration();

196

pipeConfig.setString(CsvFormatOptions.FIELD_DELIMITER, "|");

197

pipeConfig.setString(CsvFormatOptions.QUOTE_CHARACTER, "'");

198

pipeConfig.setString(CsvFormatOptions.ESCAPE_CHARACTER, "\\");

199

pipeConfig.setString(CsvFormatOptions.NULL_LITERAL, "NULL");

200

201

// Handles: 'John Doe'|25|NULL

202

// 'Jane O\'Brien'|30|true

203

```

204

205

### Unquoted Format

206

207

```java

208

// Configure for unquoted, simple delimiter format

209

Configuration unquotedConfig = new Configuration();

210

unquotedConfig.setString(CsvFormatOptions.FIELD_DELIMITER, ",");

211

unquotedConfig.setBoolean(CsvFormatOptions.DISABLE_QUOTE_CHARACTER, true);

212

unquotedConfig.setString(CsvFormatOptions.NULL_LITERAL, "NULL");

213

214

// Handles: John Doe,25,NULL

215

// Jane Smith,30,true

216

```

217

218

### Fault-Tolerant Configuration

219

220

```java

221

// Configure for robust parsing of messy data

222

Configuration robustConfig = new Configuration();

223

robustConfig.setBoolean(CsvFormatOptions.IGNORE_PARSE_ERRORS, true);

224

robustConfig.setBoolean(CsvFormatOptions.ALLOW_COMMENTS, true);

225

robustConfig.setString(CsvFormatOptions.NULL_LITERAL, "NULL");

226

227

// Handles files with:

228

// # This is a comment line

229

// name,age,active

230

// John Doe,25,true

231

// Jane Smith,invalid_age,false # This record will be skipped

232

// Bob Johnson,35,true

233

```

234

235

### Scientific Notation Control

236

237

```java

238

// Configure numeric formatting for financial data

239

Configuration financialConfig = new Configuration();

240

financialConfig.setBoolean(CsvFormatOptions.WRITE_BIGDECIMAL_IN_SCIENTIFIC_NOTATION, false);

241

financialConfig.setString(CsvFormatOptions.FIELD_DELIMITER, ",");

242

243

// Large numbers output as: 1234567890.12 instead of 1.23456789012E9

244

```

245

246

### Complex Array Handling

247

248

```java

249

// Configure for complex data types with arrays

250

Configuration arrayConfig = new Configuration();

251

arrayConfig.setString(CsvFormatOptions.ARRAY_ELEMENT_DELIMITER, "::");

252

arrayConfig.setString(CsvFormatOptions.FIELD_DELIMITER, "|");

253

arrayConfig.setString(CsvFormatOptions.QUOTE_CHARACTER, "\"");

254

255

// Handles: "John Doe"|"java::scala::python"|"senior"

256

// "Jane Smith"|"javascript::typescript"|"junior"

257

```

258

259

## Integration Patterns

260

261

### Configuration Inheritance

262

263

```java

264

// Base configuration for organization standard

265

Configuration baseConfig = new Configuration();

266

baseConfig.setString(CsvFormatOptions.FIELD_DELIMITER, "|");

267

baseConfig.setString(CsvFormatOptions.QUOTE_CHARACTER, "'");

268

baseConfig.setBoolean(CsvFormatOptions.ALLOW_COMMENTS, true);

269

270

// Project-specific overrides

271

Configuration projectConfig = new Configuration(baseConfig);

272

projectConfig.setBoolean(CsvFormatOptions.IGNORE_PARSE_ERRORS, true);

273

projectConfig.setString(CsvFormatOptions.NULL_LITERAL, "N/A");

274

```

275

276

### Environment-Based Configuration

277

278

```java

279

// Load configuration from environment or properties

280

Configuration envConfig = new Configuration();

281

282

// Check environment variables

283

String delimiter = System.getenv("CSV_DELIMITER");

284

if (delimiter != null) {

285

envConfig.setString(CsvFormatOptions.FIELD_DELIMITER, delimiter);

286

}

287

288

String ignoreErrors = System.getenv("CSV_IGNORE_ERRORS");

289

if ("true".equalsIgnoreCase(ignoreErrors)) {

290

envConfig.setBoolean(CsvFormatOptions.IGNORE_PARSE_ERRORS, true);

291

}

292

```

293

294

### Dynamic Configuration

295

296

```java

297

// Configure based on data source characteristics

298

public Configuration createConfigForSource(String sourceType) {

299

Configuration config = new Configuration();

300

301

switch (sourceType) {

302

case "financial":

303

config.setString(CsvFormatOptions.FIELD_DELIMITER, ",");

304

config.setBoolean(CsvFormatOptions.WRITE_BIGDECIMAL_IN_SCIENTIFIC_NOTATION, false);

305

config.setString(CsvFormatOptions.NULL_LITERAL, "NULL");

306

break;

307

308

case "log_files":

309

config.setString(CsvFormatOptions.FIELD_DELIMITER, "\t");

310

config.setBoolean(CsvFormatOptions.ALLOW_COMMENTS, true);

311

config.setBoolean(CsvFormatOptions.IGNORE_PARSE_ERRORS, true);

312

break;

313

314

case "data_warehouse":

315

config.setString(CsvFormatOptions.FIELD_DELIMITER, "|");

316

config.setString(CsvFormatOptions.QUOTE_CHARACTER, "'");

317

config.setString(CsvFormatOptions.ESCAPE_CHARACTER, "\\");

318

break;

319

}

320

321

return config;

322

}

323

```

324

325

## Validation and Error Handling

326

327

### Configuration Validation

328

329

```java

330

// Validate configuration consistency

331

public void validateCsvConfig(Configuration config) {

332

String delimiter = config.getString(CsvFormatOptions.FIELD_DELIMITER, ",");

333

String quote = config.getString(CsvFormatOptions.QUOTE_CHARACTER, "\"");

334

Boolean disableQuote = config.getBoolean(CsvFormatOptions.DISABLE_QUOTE_CHARACTER, false);

335

336

// Validate delimiter is single character

337

if (delimiter.length() != 1) {

338

throw new IllegalArgumentException("Field delimiter must be a single character");

339

}

340

341

// Validate quote character consistency

342

if (!disableQuote && quote.length() != 1) {

343

throw new IllegalArgumentException("Quote character must be a single character");

344

}

345

346

// Validate delimiter and quote are different

347

if (!disableQuote && delimiter.equals(quote)) {

348

throw new IllegalArgumentException("Field delimiter and quote character must be different");

349

}

350

}

351

```

352

353

### Default Value Handling

354

355

All configuration options have sensible defaults that work for standard CSV files:

356

357

- **Field delimiter**: Comma (`,`) - most common CSV delimiter

358

- **Quote character**: Double quote (`"`) - RFC 4180 standard

359

- **Disable quoting**: `false` - preserves data integrity

360

- **Allow comments**: `false` - strict parsing by default

361

- **Ignore errors**: `false` - fail-fast approach for data quality

362

- **Array delimiter**: Semicolon (`;`) - doesn't conflict with common CSV content

363

- **Escape character**: None - relies on proper quoting

364

- **Null literal**: None - empty strings represent null values

365

- **Scientific notation**: `true` - preserves precision for large numbers

366

367

These defaults ensure that CSV components work correctly with minimal configuration while providing flexibility for customization when needed.