or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

api-completeness.mdcheckpointing.mddata-generation.mdexecution.mdfault-tolerance.mdindex.mdstreaming.md

data-generation.mddocs/

0

# Data Generation

1

2

Comprehensive test data generation utilities providing consistent, reusable datasets for Apache Flink API testing. These utilities generate standard datasets that are widely used across Flink's test suite.

3

4

## Core Data Generation Classes

5

6

### CollectionDataSets

7

8

Primary utility for generating Java API test datasets with various data types and sizes.

9

10

```java { .api }

11

public class CollectionDataSets {

12

// Basic tuple datasets

13

public static DataSet<Tuple3<Integer, Long, String>> get3TupleDataSet(ExecutionEnvironment env);

14

public static DataSet<Tuple3<Integer, Long, String>> getSmall3TupleDataSet(ExecutionEnvironment env);

15

public static DataSet<Tuple5<Integer, Long, Integer, String, Long>> get5TupleDataSet(ExecutionEnvironment env);

16

public static DataSet<Tuple5<Integer, Long, Integer, String, Long>> getSmall5TupleDataSet(ExecutionEnvironment env);

17

18

// Nested tuple datasets

19

public static DataSet<Tuple2<Tuple2<Integer, Integer>, String>> getSmallNestedTupleDataSet(ExecutionEnvironment env);

20

public static DataSet<Tuple2<Tuple2<Integer, Integer>, String>> getGroupSortedNestedTupleDataSet(ExecutionEnvironment env);

21

public static DataSet<Tuple3<Tuple2<Integer, Integer>, String, Integer>> getGroupSortedNestedTupleDataSet2(ExecutionEnvironment env);

22

public static DataSet<Tuple2<byte[], Integer>> getTuple2WithByteArrayDataSet(ExecutionEnvironment env);

23

24

// Complex tuple datasets

25

public static DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> getSmallTuplebasedDataSet(ExecutionEnvironment env);

26

public static DataSet<Tuple7<Long, Integer, Integer, Long, String, Integer, String>> getSmallTuplebasedDataSetMatchingPojo(ExecutionEnvironment env);

27

28

// Primitive type datasets

29

public static DataSet<String> getStringDataSet(ExecutionEnvironment env);

30

public static DataSet<Integer> getIntegerDataSet(ExecutionEnvironment env);

31

32

// Basic custom type datasets

33

public static DataSet<CustomType> getCustomTypeDataSet(ExecutionEnvironment env);

34

public static DataSet<CustomType> getSmallCustomTypeDataSet(ExecutionEnvironment env);

35

36

// POJO datasets

37

public static DataSet<POJO> getSmallPojoDataSet(ExecutionEnvironment env);

38

public static DataSet<POJO> getDuplicatePojoDataSet(ExecutionEnvironment env);

39

public static DataSet<POJO> getMixedPojoDataSet(ExecutionEnvironment env);

40

41

// Complex nested datasets

42

public static DataSet<CrazyNested> getCrazyNestedDataSet(ExecutionEnvironment env);

43

public static DataSet<FromTupleWithCTor> getPojoExtendingFromTuple(ExecutionEnvironment env);

44

public static DataSet<PojoContainingTupleAndWritable> getPojoContainingTupleAndWritable(ExecutionEnvironment env);

45

public static DataSet<PojoContainingTupleAndWritable> getGroupSortedPojoContainingTupleAndWritable(ExecutionEnvironment env);

46

public static DataSet<Tuple3<Integer, CrazyNested, POJO>> getTupleContainingPojos(ExecutionEnvironment env);

47

48

// Advanced POJO datasets

49

public static DataSet<PojoWithMultiplePojos> getPojoWithMultiplePojos(ExecutionEnvironment env);

50

public static DataSet<PojoWithDateAndEnum> getPojoWithDateAndEnum(ExecutionEnvironment env);

51

public static DataSet<PojoWithCollection> getPojoWithCollection(ExecutionEnvironment env);

52

}

53

```

54

55

### ValueCollectionDataSets

56

57

Utility for generating datasets using Flink Value types for serialization and performance testing.

58

59

```java { .api }

60

public class ValueCollectionDataSets {

61

public static DataSet<Tuple3<IntValue, LongValue, StringValue>> get3TupleDataSet(ExecutionEnvironment env);

62

public static DataSet<Tuple3<IntValue, LongValue, StringValue>> getSmall3TupleDataSet(ExecutionEnvironment env);

63

public static DataSet<Tuple5<IntValue, LongValue, IntValue, StringValue, LongValue>> get5TupleDataSet(ExecutionEnvironment env);

64

public static DataSet<StringValue> getStringDataSet(ExecutionEnvironment env);

65

public static DataSet<IntValue> getIntDataSet(ExecutionEnvironment env);

66

public static DataSet<CustomType> getCustomTypeDataSet(ExecutionEnvironment env);

67

public static DataSet<POJO> getSmallPojoDataSet(ExecutionEnvironment env);

68

}

69

```

70

71

## Custom Test Types

72

73

### CustomType

74

75

Serializable class with integer, long, and string fields for general testing.

76

77

```java { .api }

78

public static class CustomType implements Serializable {

79

public int myInt;

80

public long myLong;

81

public String myString;

82

83

public CustomType();

84

public CustomType(int i, long l, String s);

85

86

@Override

87

public boolean equals(Object obj);

88

@Override

89

public int hashCode();

90

@Override

91

public String toString();

92

}

93

```

94

95

### POJO

96

97

Plain Old Java Object for testing POJO serialization and type extraction.

98

99

```java { .api }

100

public static class POJO implements Serializable {

101

public int number;

102

public String str;

103

104

public POJO();

105

public POJO(int i, String s);

106

107

@Override

108

public boolean equals(Object obj);

109

@Override

110

public int hashCode();

111

@Override

112

public String toString();

113

}

114

```

115

116

### CrazyNested

117

118

Complex nested structure for advanced testing scenarios.

119

120

```java { .api }

121

public static class CrazyNested implements Serializable {

122

public POJO nestLvl1;

123

public CustomType nestLvl2;

124

public int simpleField;

125

126

public CrazyNested();

127

public CrazyNested(POJO p, CustomType ct, int i);

128

129

@Override

130

public boolean equals(Object obj);

131

@Override

132

public int hashCode();

133

@Override

134

public String toString();

135

}

136

```

137

138

### FromTupleWithCTor

139

140

POJO class that extends from Tuple3 for testing inheritance and serialization scenarios.

141

142

```java { .api }

143

public static class FromTupleWithCTor extends FromTuple {

144

public FromTupleWithCTor();

145

public FromTupleWithCTor(String f0, String f1, Long f2);

146

}

147

148

public static class FromTuple extends Tuple3<String, String, Long> {

149

public FromTuple();

150

}

151

```

152

153

### PojoContainingTupleAndWritable

154

155

Complex POJO containing both tuple and Hadoop Writable types for compatibility testing.

156

157

```java { .api }

158

public static class PojoContainingTupleAndWritable {

159

public int someInt;

160

public String someString;

161

public IntWritable hadoopFan;

162

public Tuple2<Long, Long> theTuple;

163

164

public PojoContainingTupleAndWritable();

165

public PojoContainingTupleAndWritable(int i, String s, IntWritable iw, Tuple2<Long, Long> t);

166

}

167

```

168

169

### PojoWithMultiplePojos

170

171

POJO containing multiple nested POJO instances for complex object graph testing.

172

173

```java { .api }

174

public static class PojoWithMultiplePojos {

175

public Pojo1 pojo1;

176

public Pojo2 pojo2;

177

public Integer key;

178

179

public PojoWithMultiplePojos();

180

}

181

182

public static class Pojo1 {

183

public String a;

184

public String b;

185

186

public Pojo1();

187

}

188

189

public static class Pojo2 {

190

public int a2;

191

192

public Pojo2();

193

}

194

```

195

196

### PojoWithDateAndEnum

197

198

POJO containing Date and Enum fields for specialized serialization testing.

199

200

```java { .api }

201

public static class PojoWithDateAndEnum {

202

public String group;

203

public Date date;

204

public Color color;

205

206

public PojoWithDateAndEnum();

207

}

208

```

209

210

### PojoWithCollection

211

212

POJO containing collection fields for testing complex collection serialization.

213

214

```java { .api }

215

public static class PojoWithCollection {

216

public List<Pojo1> pojos;

217

public int key;

218

219

public PojoWithCollection();

220

}

221

```

222

223

## Input Format Utilities

224

225

### InfiniteIntegerInputFormat

226

227

Input format that generates infinite sequences of integers for stress testing.

228

229

```java { .api }

230

public class InfiniteIntegerInputFormat extends GenericInputFormat<Integer> {

231

public InfiniteIntegerInputFormat(boolean delay);

232

233

@Override

234

public boolean reachedEnd();

235

@Override

236

public Integer nextRecord(Integer reuse);

237

}

238

```

239

240

### InfiniteIntegerTupleInputFormat

241

242

Input format generating infinite sequences of integer tuples.

243

244

```java { .api }

245

public class InfiniteIntegerTupleInputFormat extends GenericInputFormat<Tuple2<Integer, Integer>> {

246

public InfiniteIntegerTupleInputFormat(boolean delay);

247

248

@Override

249

public boolean reachedEnd();

250

@Override

251

public Tuple2<Integer, Integer> nextRecord(Tuple2<Integer, Integer> reuse);

252

}

253

```

254

255

### UniformIntTupleGeneratorInputFormat

256

257

Input format for generating uniformly distributed integer tuple data.

258

259

```java { .api }

260

public class UniformIntTupleGeneratorInputFormat extends GenericInputFormat<Tuple2<Integer, Integer>> {

261

public UniformIntTupleGeneratorInputFormat(int numKeys, int numVals);

262

263

@Override

264

public boolean reachedEnd();

265

@Override

266

public Tuple2<Integer, Integer> nextRecord(Tuple2<Integer, Integer> reuse);

267

}

268

```

269

270

### PointInFormat

271

272

Input format for reading Point objects from text files.

273

274

```java { .api }

275

public class PointInFormat extends DelimitedInputFormat<Point> {

276

public PointInFormat();

277

278

@Override

279

public Point readRecord(Point reusable, byte[] bytes, int offset, int numBytes);

280

}

281

```

282

283

## Coordinate and Geometry Types

284

285

### CoordVector

286

287

Vector coordinate representation for geometric testing.

288

289

```java { .api }

290

public class CoordVector implements Serializable {

291

public float x;

292

public float y;

293

public float z;

294

295

public CoordVector();

296

public CoordVector(float x, float y, float z);

297

298

@Override

299

public boolean equals(Object obj);

300

@Override

301

public int hashCode();

302

@Override

303

public String toString();

304

}

305

```

306

307

## Usage Examples

308

309

### Basic Dataset Usage

310

311

```java

312

// Get standard test data

313

ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

314

DataSet<Tuple3<Integer, Long, String>> dataSet = CollectionDataSets.get3TupleDataSet(env);

315

316

// Use small dataset for quick tests

317

DataSet<CustomType> smallDataSet = CollectionDataSets.getSmallCustomTypeDataSet(env);

318

319

// Use Value types for serialization testing

320

DataSet<Tuple3<IntValue, LongValue, StringValue>> valueDataSet =

321

ValueCollectionDataSets.get3TupleDataSet(env);

322

```

323

324

### Custom Input Format Usage

325

326

```java

327

// Infinite data source for stress testing

328

ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

329

DataSet<Integer> infiniteInts = env.createInput(new InfiniteIntegerInputFormat());

330

331

// Uniform distribution generator

332

DataSet<Tuple2<Integer, Integer>> uniformData =

333

env.createInput(new UniformIntTupleGeneratorInputFormat(100, 1000));

334

```

335

336

### Complex Type Testing

337

338

```java

339

// Test with nested objects

340

ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

341

DataSet<CrazyNested> nestedDataSet = CollectionDataSets.getCrazyNestedDataSet(env);

342

343

// Verify serialization

344

nestedDataSet

345

.map(x -> x) // Identity map to trigger serialization

346

.collect(); // Force execution

347

```