Apache Flink is an open source stream processing framework with powerful stream- and batch-processing capabilities
—
Fundamental function interfaces and type system that form the building blocks for all Flink applications. This includes user-defined functions, tuple system, type descriptors, and core abstractions used across all Flink APIs.
Base interfaces for all user-defined functions in Flink applications.
/**
* Base interface for all user-defined functions
*/
interface Function {}
/**
* Function for 1:1 transformations
* @param <T> Input type
* @param <O> Output type
*/
interface MapFunction<T, O> extends Function {
/**
* Transform a single input element
* @param value Input element
* @return Transformed element
* @throws Exception
*/
O map(T value) throws Exception;
}
/**
* Function for 1:N transformations
* @param <T> Input type
* @param <O> Output type
*/
interface FlatMapFunction<T, O> extends Function {
/**
* Transform one element into zero, one, or more elements
* @param value Input element
* @param out Collector for output elements
* @throws Exception
*/
void flatMap(T value, Collector<O> out) throws Exception;
}
/**
* Function for filtering elements
* @param <T> Element type
*/
interface FilterFunction<T> extends Function {
/**
* Test whether element should be kept
* @param value Element to test
* @return true if element should be kept
* @throws Exception
*/
boolean filter(T value) throws Exception;
}
/**
* Function for reduce operations on streams/datasets
* @param <T> Element type
*/
interface ReduceFunction<T> extends Function {
/**
* Combine two elements into one
* @param value1 First element
* @param value2 Second element
* @return Combined element
* @throws Exception
*/
T reduce(T value1, T value2) throws Exception;
}
/**
* Function for incremental aggregation operations
* @param <IN> Input type
* @param <ACC> Accumulator type
* @param <OUT> Output type
*/
interface AggregateFunction<IN, ACC, OUT> extends Function {
/**
* Create new accumulator
* @return New accumulator
*/
ACC createAccumulator();
/**
* Add input to accumulator
* @param accumulator Current accumulator
* @param value Input value
* @return Updated accumulator
*/
ACC add(IN value, ACC accumulator);
/**
* Get result from accumulator
* @param accumulator Final accumulator
* @return Result
*/
OUT getResult(ACC accumulator);
/**
* Merge two accumulators
* @param a First accumulator
* @param b Second accumulator
* @return Merged accumulator
*/
ACC merge(ACC a, ACC b);
}
/**
* Function for extracting keys from elements
* @param <IN> Input type
* @param <KEY> Key type
*/
interface KeySelector<IN, KEY> extends Function {
/**
* Extract key from element
* @param value Input element
* @return Key
* @throws Exception
*/
KEY getKey(IN value) throws Exception;
}Rich variants of user-defined functions that provide access to runtime context including state, metrics, and configuration.
/**
* Base class for rich user-defined functions
*/
abstract class AbstractRichFunction implements Function {
/**
* Get runtime context
* @return Runtime context
*/
public RuntimeContext getRuntimeContext();
/**
* Initialization method called once per parallel instance
* @param parameters Configuration parameters
* @throws Exception
*/
public void open(Configuration parameters) throws Exception {}
/**
* Cleanup method called once per parallel instance
* @throws Exception
*/
public void close() throws Exception {}
}
/**
* Rich map function with runtime context access
* @param <T> Input type
* @param <O> Output type
*/
abstract class RichMapFunction<T, O> extends AbstractRichFunction implements MapFunction<T, O> {}
/**
* Rich flat map function with runtime context access
* @param <T> Input type
* @param <O> Output type
*/
abstract class RichFlatMapFunction<T, O> extends AbstractRichFunction implements FlatMapFunction<T, O> {}
/**
* Rich filter function with runtime context access
* @param <T> Element type
*/
abstract class RichFilterFunction<T> extends AbstractRichFunction implements FilterFunction<T> {}
/**
* Rich reduce function with runtime context access
* @param <T> Element type
*/
abstract class RichReduceFunction<T> extends AbstractRichFunction implements ReduceFunction<T> {}
/**
* Rich aggregate function with runtime context access
* @param <IN> Input type
* @param <ACC> Accumulator type
* @param <OUT> Output type
*/
abstract class RichAggregateFunction<IN, ACC, OUT> extends AbstractRichFunction implements AggregateFunction<IN, ACC, OUT> {}
/**
* Runtime context providing access to state, metrics, and configuration
*/
interface RuntimeContext {
/**
* Get task name
* @return Task name
*/
String getTaskName();
/**
* Get parallelism of current operator
* @return Parallelism
*/
int getParallelism();
/**
* Get index of current parallel subtask
* @return Subtask index (0-based)
*/
int getIndexOfThisSubtask();
/**
* Get state for the given descriptor
* @param stateDescriptor State descriptor
* @param <T> State type
* @return State instance
*/
<T> ValueState<T> getState(ValueStateDescriptor<T> stateDescriptor);
/**
* Get list state for the given descriptor
* @param stateDescriptor State descriptor
* @param <T> Element type
* @return List state instance
*/
<T> ListState<T> getListState(ListStateDescriptor<T> stateDescriptor);
/**
* Get map state for the given descriptor
* @param stateDescriptor State descriptor
* @param <UK> User key type
* @param <UV> User value type
* @return Map state instance
*/
<UK, UV> MapState<UK, UV> getMapState(MapStateDescriptor<UK, UV> stateDescriptor);
}Functions for joining and co-grouping datasets and streams.
/**
* Function for joining two datasets/streams
* @param <IN1> First input type
* @param <IN2> Second input type
* @param <OUT> Output type
*/
interface JoinFunction<IN1, IN2, OUT> extends Function {
/**
* Join two elements
* @param first Element from first input
* @param second Element from second input
* @return Joined result
* @throws Exception
*/
OUT join(IN1 first, IN2 second) throws Exception;
}
/**
* Function for flat joining two datasets/streams
* @param <IN1> First input type
* @param <IN2> Second input type
* @param <OUT> Output type
*/
interface FlatJoinFunction<IN1, IN2, OUT> extends Function {
/**
* Join two elements producing zero, one, or more results
* @param first Element from first input
* @param second Element from second input
* @param out Collector for output elements
* @throws Exception
*/
void join(IN1 first, IN2 second, Collector<OUT> out) throws Exception;
}
/**
* Function for co-grouping two datasets/streams
* @param <IN1> First input type
* @param <IN2> Second input type
* @param <OUT> Output type
*/
interface CoGroupFunction<IN1, IN2, OUT> extends Function {
/**
* Co-group iterables from both inputs
* @param first Iterable from first input
* @param second Iterable from second input
* @param out Collector for output elements
* @throws Exception
*/
void coGroup(Iterable<IN1> first, Iterable<IN2> second, Collector<OUT> out) throws Exception;
}
/**
* Function for cross product operations
* @param <IN1> First input type
* @param <IN2> Second input type
* @param <OUT> Output type
*/
interface CrossFunction<IN1, IN2, OUT> extends Function {
/**
* Cross two elements
* @param first Element from first input
* @param second Element from second input
* @return Cross result
* @throws Exception
*/
OUT cross(IN1 first, IN2 second) throws Exception;
}
/**
* Rich join function with runtime context access
* @param <IN1> First input type
* @param <IN2> Second input type
* @param <OUT> Output type
*/
abstract class RichJoinFunction<IN1, IN2, OUT> extends AbstractRichFunction implements JoinFunction<IN1, IN2, OUT> {}
/**
* Rich co-group function with runtime context access
* @param <IN1> First input type
* @param <IN2> Second input type
* @param <OUT> Output type
*/
abstract class RichCoGroupFunction<IN1, IN2, OUT> extends AbstractRichFunction implements CoGroupFunction<IN1, IN2, OUT> {}Strongly-typed tuple classes for handling multiple values.
/**
* Tuple with 0 fields
*/
class Tuple0 {
public Tuple0();
public static Tuple0 INSTANCE;
}
/**
* Tuple with 2 fields
* @param <T0> Type of field 0
* @param <T1> Type of field 1
*/
class Tuple2<T0, T1> {
public T0 f0;
public T1 f1;
public Tuple2();
public Tuple2(T0 f0, T1 f1);
public static <T0, T1> Tuple2<T0, T1> of(T0 f0, T1 f1);
public <T> T getField(int pos);
public void setField(Object value, int pos);
}
/**
* Tuple with 3 fields
* @param <T0> Type of field 0
* @param <T1> Type of field 1
* @param <T2> Type of field 2
*/
class Tuple3<T0, T1, T2> {
public T0 f0;
public T1 f1;
public T2 f2;
public Tuple3();
public Tuple3(T0 f0, T1 f1, T2 f2);
public static <T0, T1, T2> Tuple3<T0, T1, T2> of(T0 f0, T1 f1, T2 f2);
}
// Additional tuple classes available: Tuple4 through Tuple25Runtime type information and descriptors for Flink's type system.
/**
* Type descriptor for runtime type information
* @param <T> The type being described
*/
interface TypeDescriptor<T> {
/**
* Get type information
* @return TypeInformation instance
*/
TypeInformation<T> getTypeInformation();
}
/**
* Factory methods for common type descriptors
*/
class TypeDescriptors {
public static TypeDescriptor<String> STRING;
public static TypeDescriptor<Integer> INT;
public static TypeDescriptor<Long> LONG;
public static TypeDescriptor<Double> DOUBLE;
public static TypeDescriptor<Boolean> BOOLEAN;
public static <T> TypeDescriptor<T[]> array(TypeDescriptor<T> elementType);
public static <T> TypeDescriptor<List<T>> list(TypeDescriptor<T> elementType);
public static <K, V> TypeDescriptor<Map<K, V>> map(TypeDescriptor<K> keyType, TypeDescriptor<V> valueType);
}
/**
* Runtime type information
* @param <T> The type
*/
abstract class TypeInformation<T> {
/**
* Get type class
* @return Class object
*/
public abstract Class<T> getTypeClass();
/**
* Check if type is basic type
* @return true if basic type
*/
public abstract boolean isBasicType();
/**
* Create serializer for this type
* @param config Configuration
* @return TypeSerializer instance
*/
public abstract TypeSerializer<T> createSerializer(ExecutionConfig config);
}
/**
* Serializer for data types
* @param <T> The type to serialize
*/
abstract class TypeSerializer<T> {
/**
* Create copy of element
* @param from Source element
* @return Copied element
*/
public abstract T copy(T from);
/**
* Serialize element to DataOutputView
* @param record Element to serialize
* @param target Output target
* @throws IOException
*/
public abstract void serialize(T record, DataOutputView target) throws IOException;
/**
* Deserialize element from DataInputView
* @param source Input source
* @return Deserialized element
* @throws IOException
*/
public abstract T deserialize(DataInputView source) throws IOException;
}Configuration and execution mode settings.
/**
* Execution mode for Flink applications
*/
enum RuntimeExecutionMode {
/** Streaming execution mode */
STREAMING,
/** Batch execution mode */
BATCH,
/** Automatic mode selection based on data source characteristics */
AUTOMATIC
}
/**
* Configuration for resource sharing between operators
*/
class SlotSharingGroup {
/**
* Create slot sharing group with name
* @param name Group name
*/
public SlotSharingGroup(String name);
/**
* Get group name
* @return Group name
*/
public String getName();
}Core utility interfaces used throughout Flink APIs.
/**
* Interface for collecting output elements
* @param <T> Element type
*/
interface Collector<T> {
/**
* Collect/emit an element
* @param record Element to collect
*/
void collect(T record);
/**
* Close the collector
*/
void close();
}
/**
* Iterator that can be closed
* @param <E> Element type
*/
interface CloseableIterator<E> extends Iterator<E>, AutoCloseable {
@Override
void close();
}
/**
* Tag for side outputs in DataStream API
* @param <T> Type of side output
*/
class OutputTag<T> {
/**
* Create output tag with ID and type information
* @param id Unique identifier
* @param typeInfo Type information
*/
public OutputTag(String id, TypeInformation<T> typeInfo);
/**
* Create output tag with ID (type inferred)
* @param id Unique identifier
*/
public OutputTag(String id);
/**
* Get tag ID
* @return Tag identifier
*/
public String getId();
/**
* Get type information
* @return Type information
*/
public TypeInformation<T> getTypeInfo();
}Common exception types used in Flink applications.
/**
* Base exception for Flink-specific errors
*/
class FlinkException extends Exception {
public FlinkException(String message);
public FlinkException(String message, Throwable cause);
}
/**
* Exception thrown when accessing null fields
*/
class NullFieldException extends RuntimeException {
public NullFieldException(int fieldPos);
public NullFieldException(String fieldName);
public int getFieldPos();
}Supporting types used by the core API.
/**
* Execution configuration for Flink programs
*/
class ExecutionConfig {
/**
* Set parallelism
* @param parallelism Parallelism level
*/
public void setParallelism(int parallelism);
/**
* Get parallelism
* @return Parallelism level
*/
public int getParallelism();
/**
* Enable/disable object reuse
* @param objectReuse Whether to reuse objects
*/
public void enableObjectReuse(boolean objectReuse);
/**
* Check if object reuse is enabled
* @return true if object reuse enabled
*/
public boolean isObjectReuseEnabled();
}
/**
* Interface for writing binary data
*/
interface DataOutputView {
/**
* Write byte value
* @param b Byte value
* @throws IOException
*/
void writeByte(int b) throws IOException;
/**
* Write int value
* @param v Int value
* @throws IOException
*/
void writeInt(int v) throws IOException;
/**
* Write long value
* @param v Long value
* @throws IOException
*/
void writeLong(long v) throws IOException;
/**
* Write byte array
* @param b Byte array
* @throws IOException
*/
void write(byte[] b) throws IOException;
}
/**
* Interface for reading binary data
*/
interface DataInputView {
/**
* Read byte value
* @return Byte value
* @throws IOException
*/
int readByte() throws IOException;
/**
* Read int value
* @return Int value
* @throws IOException
*/
int readInt() throws IOException;
/**
* Read long value
* @return Long value
* @throws IOException
*/
long readLong() throws IOException;
/**
* Read bytes into array
* @param b Byte array
* @return Number of bytes read
* @throws IOException
*/
int read(byte[] b) throws IOException;
}Install with Tessl CLI
npx tessl i tessl/maven-org-apache-flink--flink-parent