Comprehensive test data generation utilities providing consistent, reusable datasets for Apache Flink API testing. These utilities generate standard datasets that are widely used across Flink's test suite.
Primary utility for generating Java API test datasets with various data types and sizes.
public class CollectionDataSets {
// Basic tuple datasets
public static DataSet<Tuple3<Integer, Long, String>> get3TupleDataSet(ExecutionEnvironment env);
public static DataSet<Tuple3<Integer, Long, String>> getSmall3TupleDataSet(ExecutionEnvironment env);
public static DataSet<Tuple5<Integer, Long, Integer, String, Long>> get5TupleDataSet(ExecutionEnvironment env);
public static DataSet<Tuple5<Integer, Long, Integer, String, Long>> getSmall5TupleDataSet(ExecutionEnvironment env);
// Nested tuple datasets
public static DataSet<Tuple2<Tuple2<Integer, Integer>, String>> getSmallNestedTupleDataSet(ExecutionEnvironment env);
public static DataSet<Tuple2<Tuple2<Integer, Integer>, String>> getGroupSortedNestedTupleDataSet(ExecutionEnvironment env);
public static DataSet<Tuple3<Tuple2<Integer, Integer>, String, Integer>> getGroupSortedNestedTupleDataSet2(ExecutionEnvironment env);
public static DataSet<Tuple2<byte[], Integer>> getTuple2WithByteArrayDataSet(ExecutionEnvironment env);
// Complex tuple datasets
public static DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> getSmallTuplebasedDataSet(ExecutionEnvironment env);
public static DataSet<Tuple7<Long, Integer, Integer, Long, String, Integer, String>> getSmallTuplebasedDataSetMatchingPojo(ExecutionEnvironment env);
// Primitive type datasets
public static DataSet<String> getStringDataSet(ExecutionEnvironment env);
public static DataSet<Integer> getIntegerDataSet(ExecutionEnvironment env);
// Basic custom type datasets
public static DataSet<CustomType> getCustomTypeDataSet(ExecutionEnvironment env);
public static DataSet<CustomType> getSmallCustomTypeDataSet(ExecutionEnvironment env);
// POJO datasets
public static DataSet<POJO> getSmallPojoDataSet(ExecutionEnvironment env);
public static DataSet<POJO> getDuplicatePojoDataSet(ExecutionEnvironment env);
public static DataSet<POJO> getMixedPojoDataSet(ExecutionEnvironment env);
// Complex nested datasets
public static DataSet<CrazyNested> getCrazyNestedDataSet(ExecutionEnvironment env);
public static DataSet<FromTupleWithCTor> getPojoExtendingFromTuple(ExecutionEnvironment env);
public static DataSet<PojoContainingTupleAndWritable> getPojoContainingTupleAndWritable(ExecutionEnvironment env);
public static DataSet<PojoContainingTupleAndWritable> getGroupSortedPojoContainingTupleAndWritable(ExecutionEnvironment env);
public static DataSet<Tuple3<Integer, CrazyNested, POJO>> getTupleContainingPojos(ExecutionEnvironment env);
// Advanced POJO datasets
public static DataSet<PojoWithMultiplePojos> getPojoWithMultiplePojos(ExecutionEnvironment env);
public static DataSet<PojoWithDateAndEnum> getPojoWithDateAndEnum(ExecutionEnvironment env);
public static DataSet<PojoWithCollection> getPojoWithCollection(ExecutionEnvironment env);
}Utility for generating datasets using Flink Value types for serialization and performance testing.
public class ValueCollectionDataSets {
public static DataSet<Tuple3<IntValue, LongValue, StringValue>> get3TupleDataSet(ExecutionEnvironment env);
public static DataSet<Tuple3<IntValue, LongValue, StringValue>> getSmall3TupleDataSet(ExecutionEnvironment env);
public static DataSet<Tuple5<IntValue, LongValue, IntValue, StringValue, LongValue>> get5TupleDataSet(ExecutionEnvironment env);
public static DataSet<StringValue> getStringDataSet(ExecutionEnvironment env);
public static DataSet<IntValue> getIntDataSet(ExecutionEnvironment env);
public static DataSet<CustomType> getCustomTypeDataSet(ExecutionEnvironment env);
public static DataSet<POJO> getSmallPojoDataSet(ExecutionEnvironment env);
}Serializable class with integer, long, and string fields for general testing.
public static class CustomType implements Serializable {
public int myInt;
public long myLong;
public String myString;
public CustomType();
public CustomType(int i, long l, String s);
@Override
public boolean equals(Object obj);
@Override
public int hashCode();
@Override
public String toString();
}Plain Old Java Object for testing POJO serialization and type extraction.
public static class POJO implements Serializable {
public int number;
public String str;
public POJO();
public POJO(int i, String s);
@Override
public boolean equals(Object obj);
@Override
public int hashCode();
@Override
public String toString();
}Complex nested structure for advanced testing scenarios.
public static class CrazyNested implements Serializable {
public POJO nestLvl1;
public CustomType nestLvl2;
public int simpleField;
public CrazyNested();
public CrazyNested(POJO p, CustomType ct, int i);
@Override
public boolean equals(Object obj);
@Override
public int hashCode();
@Override
public String toString();
}POJO class that extends from Tuple3 for testing inheritance and serialization scenarios.
public static class FromTupleWithCTor extends FromTuple {
public FromTupleWithCTor();
public FromTupleWithCTor(String f0, String f1, Long f2);
}
public static class FromTuple extends Tuple3<String, String, Long> {
public FromTuple();
}Complex POJO containing both tuple and Hadoop Writable types for compatibility testing.
public static class PojoContainingTupleAndWritable {
public int someInt;
public String someString;
public IntWritable hadoopFan;
public Tuple2<Long, Long> theTuple;
public PojoContainingTupleAndWritable();
public PojoContainingTupleAndWritable(int i, String s, IntWritable iw, Tuple2<Long, Long> t);
}POJO containing multiple nested POJO instances for complex object graph testing.
public static class PojoWithMultiplePojos {
public Pojo1 pojo1;
public Pojo2 pojo2;
public Integer key;
public PojoWithMultiplePojos();
}
public static class Pojo1 {
public String a;
public String b;
public Pojo1();
}
public static class Pojo2 {
public int a2;
public Pojo2();
}POJO containing Date and Enum fields for specialized serialization testing.
public static class PojoWithDateAndEnum {
public String group;
public Date date;
public Color color;
public PojoWithDateAndEnum();
}POJO containing collection fields for testing complex collection serialization.
public static class PojoWithCollection {
public List<Pojo1> pojos;
public int key;
public PojoWithCollection();
}Input format that generates infinite sequences of integers for stress testing.
public class InfiniteIntegerInputFormat extends GenericInputFormat<Integer> {
public InfiniteIntegerInputFormat(boolean delay);
@Override
public boolean reachedEnd();
@Override
public Integer nextRecord(Integer reuse);
}Input format generating infinite sequences of integer tuples.
public class InfiniteIntegerTupleInputFormat extends GenericInputFormat<Tuple2<Integer, Integer>> {
public InfiniteIntegerTupleInputFormat(boolean delay);
@Override
public boolean reachedEnd();
@Override
public Tuple2<Integer, Integer> nextRecord(Tuple2<Integer, Integer> reuse);
}Input format for generating uniformly distributed integer tuple data.
public class UniformIntTupleGeneratorInputFormat extends GenericInputFormat<Tuple2<Integer, Integer>> {
public UniformIntTupleGeneratorInputFormat(int numKeys, int numVals);
@Override
public boolean reachedEnd();
@Override
public Tuple2<Integer, Integer> nextRecord(Tuple2<Integer, Integer> reuse);
}Input format for reading Point objects from text files.
public class PointInFormat extends DelimitedInputFormat<Point> {
public PointInFormat();
@Override
public Point readRecord(Point reusable, byte[] bytes, int offset, int numBytes);
}Vector coordinate representation for geometric testing.
public class CoordVector implements Serializable {
public float x;
public float y;
public float z;
public CoordVector();
public CoordVector(float x, float y, float z);
@Override
public boolean equals(Object obj);
@Override
public int hashCode();
@Override
public String toString();
}// Get standard test data
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> dataSet = CollectionDataSets.get3TupleDataSet(env);
// Use small dataset for quick tests
DataSet<CustomType> smallDataSet = CollectionDataSets.getSmallCustomTypeDataSet(env);
// Use Value types for serialization testing
DataSet<Tuple3<IntValue, LongValue, StringValue>> valueDataSet =
ValueCollectionDataSets.get3TupleDataSet(env);// Infinite data source for stress testing
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Integer> infiniteInts = env.createInput(new InfiniteIntegerInputFormat());
// Uniform distribution generator
DataSet<Tuple2<Integer, Integer>> uniformData =
env.createInput(new UniformIntTupleGeneratorInputFormat(100, 1000));// Test with nested objects
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<CrazyNested> nestedDataSet = CollectionDataSets.getCrazyNestedDataSet(env);
// Verify serialization
nestedDataSet
.map(x -> x) // Identity map to trigger serialization
.collect(); // Force execution