Apache Flink streaming core library providing fundamental building blocks for scalable stream data processing.
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
Apache Flink Streaming Core provides the fundamental building blocks for scalable stream data processing. It includes streaming APIs, operators, windowing, checkpointing, and runtime execution components for building real-time data processing pipelines with features like event-time processing, watermarks, state management, and exactly-once semantics.
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-core</artifactId>
<version>0.9.1</version>
</dependency>// Core environment and data stream classes
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.GroupedDataStream;
import org.apache.flink.streaming.api.datastream.ConnectedDataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
// Source and sink functions
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
// Transformation functions
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
// Type information and key selection
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.functions.KeySelector;
// Stream output selection and splitting
import org.apache.flink.streaming.api.collector.selector.OutputSelector;import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.datastream.DataStream;
// Create execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// Create a data stream from elements
DataStream<String> text = env.fromElements("Hello", "World", "Flink");
// Apply transformations
DataStream<String> processed = text
.map(value -> value.toUpperCase())
.filter(value -> value.length() > 4);
// Add a sink
processed.print();
// Execute the streaming job
env.execute("Basic Streaming Job");Flink Streaming Core follows a layered architecture:
StreamExecutionEnvironment)DataStream, GroupedDataStream, WindowedDataStream)Entry points for creating and configuring streaming applications with support for local and remote execution.
public abstract class StreamExecutionEnvironment {
public static StreamExecutionEnvironment getExecutionEnvironment();
public static LocalStreamEnvironment createLocalEnvironment();
public static RemoteStreamEnvironment createRemoteEnvironment(String host, int port, String... jarFiles);
public JobExecutionResult execute() throws Exception;
public JobExecutionResult execute(String jobName) throws Exception;
}Core stream abstractions and transformation operations for processing data streams.
public class DataStream<T> {
public <R> SingleOutputStreamOperator<R, ?> map(MapFunction<T, R> mapper);
public <R> SingleOutputStreamOperator<R, ?> flatMap(FlatMapFunction<T, R> flatMapper);
public SingleOutputStreamOperator<T, ?> filter(FilterFunction<T> filter);
public GroupedDataStream<T> groupBy(KeySelector<T, ?> key);
public DataStream<T> union(DataStream<T>... streams);
public <R> ConnectedDataStream<T, R> connect(DataStream<R> dataStream);
public SplitDataStream<T> split(OutputSelector<T> outputSelector);
}Interfaces and implementations for reading from and writing to external systems.
public interface SourceFunction<T> extends Function, Serializable {
void run(SourceContext<T> ctx) throws Exception;
void cancel();
}
public interface SinkFunction<T> extends Function, Serializable {
void invoke(T value) throws Exception;
}Time and count-based windowing support for aggregating stream data over defined intervals.
public abstract class WindowingHelper<T> {
public static <T> Time<T> of(long length, TimeUnit timeUnit);
public static <T> Count<T> of(long windowSize);
}
public class WindowedDataStream<T> {
public DataStream<T> reduce(ReduceFunction<T> reducer);
public <R> DataStream<R> fold(R initialValue, FoldFunction<T, R> folder);
}Internal operator implementations that execute the actual stream processing logic.
public interface StreamOperator<OUT> extends Serializable {
void setup(Output<StreamRecord<OUT>> output, RuntimeContext runtimeContext);
void open(Configuration parameters) throws Exception;
void close() throws Exception;
}
public interface OneInputStreamOperator<IN, OUT> extends StreamOperator<OUT> {
void processElement(StreamRecord<IN> element) throws Exception;
}Fault tolerance mechanisms including checkpointing and state management for exactly-once processing.
public interface Checkpointed<T> extends Serializable {
T snapshotState(long checkpointId, long checkpointTimestamp) throws Exception;
void restoreState(T state) throws Exception;
}
public interface CheckpointCommitter extends Serializable {
void commitCheckpoint(long checkpointId) throws Exception;
boolean isCheckpointCommitted(long checkpointId) throws Exception;
}public abstract class StreamExecutionEnvironment {
// Environment configuration
public void setParallelism(int parallelism);
public int getParallelism();
public void setBufferTimeout(long timeoutMillis);
public long getBufferTimeout();
}
public class DataStreamSource<T> extends DataStream<T> {
// Source-specific methods
public DataStreamSource<T> setParallelism(int parallelism);
}
public class GroupedDataStream<T> {
// Keyed stream operations
public DataStream<T> reduce(ReduceFunction<T> reducer);
public DataStream<T> sum(int positionToSum);
public WindowedDataStream<T> window(WindowingHelper<T> helper);
}
public class WindowedDataStream<T> {
// Windowed operations
public DataStream<T> reduce(ReduceFunction<T> reducer);
public <R> DataStream<R> fold(R initialValue, FoldFunction<T, R> folder);
}
public class ConnectedDataStream<T1, T2> {
// Connected stream operations
public <R> DataStream<R> map(CoMapFunction<T1, T2, R> coMapper);
public <R> DataStream<R> flatMap(CoFlatMapFunction<T1, T2, R> coFlatMapper);
}
public class DataStreamSink<T> {
// Sink configuration
public DataStreamSink<T> setParallelism(int parallelism);
public DataStreamSink<T> name(String name);
}