or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

checkpointing-state.mddatastream-operations.mdexecution-environment.mdindex.mdsources-and-sinks.mdstream-operators.mdwindowing.md
tile.json

tessl/maven-org-apache-flink--flink-streaming-core

Apache Flink streaming core library providing fundamental building blocks for scalable stream data processing.

Workspace
tessl
Visibility
Public
Created
Last updated
Describes
mavenpkg:maven/org.apache.flink/flink-streaming-core@0.9.x

To install, run

npx @tessl/cli install tessl/maven-org-apache-flink--flink-streaming-core@0.9.0

index.mddocs/

Apache Flink Streaming Core

Apache Flink Streaming Core provides the fundamental building blocks for scalable stream data processing. It includes streaming APIs, operators, windowing, checkpointing, and runtime execution components for building real-time data processing pipelines with features like event-time processing, watermarks, state management, and exactly-once semantics.

Package Information

  • Package Name: flink-streaming-core
  • Package Type: Maven
  • Language: Java
  • Installation:
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-streaming-core</artifactId>
      <version>0.9.1</version>
    </dependency>

Core Imports

// Core environment and data stream classes
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.GroupedDataStream;
import org.apache.flink.streaming.api.datastream.ConnectedDataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;

// Source and sink functions
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;

// Transformation functions
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.ReduceFunction;

// Type information and key selection
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.functions.KeySelector;

// Stream output selection and splitting
import org.apache.flink.streaming.api.collector.selector.OutputSelector;

Basic Usage

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.datastream.DataStream;

// Create execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

// Create a data stream from elements
DataStream<String> text = env.fromElements("Hello", "World", "Flink");

// Apply transformations
DataStream<String> processed = text
    .map(value -> value.toUpperCase())
    .filter(value -> value.length() > 4);

// Add a sink
processed.print();

// Execute the streaming job
env.execute("Basic Streaming Job");

Architecture

Flink Streaming Core follows a layered architecture:

  • Environment Layer: Entry points for creating streaming applications (StreamExecutionEnvironment)
  • DataStream API: High-level abstractions for stream processing (DataStream, GroupedDataStream, WindowedDataStream)
  • Function Interfaces: User-defined functions for sources, sinks, and transformations
  • Operator Layer: Internal implementations of streaming operators
  • Runtime Components: Low-level execution and partitioning mechanisms

Capabilities

Execution Environment

Entry points for creating and configuring streaming applications with support for local and remote execution.

public abstract class StreamExecutionEnvironment {
    public static StreamExecutionEnvironment getExecutionEnvironment();
    public static LocalStreamEnvironment createLocalEnvironment();
    public static RemoteStreamEnvironment createRemoteEnvironment(String host, int port, String... jarFiles);
    public JobExecutionResult execute() throws Exception;
    public JobExecutionResult execute(String jobName) throws Exception;
}

Execution Environment

Data Stream Operations

Core stream abstractions and transformation operations for processing data streams.

public class DataStream<T> {
    public <R> SingleOutputStreamOperator<R, ?> map(MapFunction<T, R> mapper);
    public <R> SingleOutputStreamOperator<R, ?> flatMap(FlatMapFunction<T, R> flatMapper);
    public SingleOutputStreamOperator<T, ?> filter(FilterFunction<T> filter);
    public GroupedDataStream<T> groupBy(KeySelector<T, ?> key);
    public DataStream<T> union(DataStream<T>... streams);
    public <R> ConnectedDataStream<T, R> connect(DataStream<R> dataStream);
    public SplitDataStream<T> split(OutputSelector<T> outputSelector);
}

Data Stream Operations

Source and Sink Functions

Interfaces and implementations for reading from and writing to external systems.

public interface SourceFunction<T> extends Function, Serializable {
    void run(SourceContext<T> ctx) throws Exception;
    void cancel();
}

public interface SinkFunction<T> extends Function, Serializable {
    void invoke(T value) throws Exception;
}

Sources and Sinks

Windowing Operations

Time and count-based windowing support for aggregating stream data over defined intervals.

public abstract class WindowingHelper<T> {
    public static <T> Time<T> of(long length, TimeUnit timeUnit);
    public static <T> Count<T> of(long windowSize);
}

public class WindowedDataStream<T> {
    public DataStream<T> reduce(ReduceFunction<T> reducer);
    public <R> DataStream<R> fold(R initialValue, FoldFunction<T, R> folder);
}

Windowing

Stream Operators

Internal operator implementations that execute the actual stream processing logic.

public interface StreamOperator<OUT> extends Serializable {
    void setup(Output<StreamRecord<OUT>> output, RuntimeContext runtimeContext);
    void open(Configuration parameters) throws Exception;
    void close() throws Exception;
}

public interface OneInputStreamOperator<IN, OUT> extends StreamOperator<OUT> {
    void processElement(StreamRecord<IN> element) throws Exception;
}

Stream Operators

Checkpointing and State

Fault tolerance mechanisms including checkpointing and state management for exactly-once processing.

public interface Checkpointed<T> extends Serializable {
    T snapshotState(long checkpointId, long checkpointTimestamp) throws Exception;
    void restoreState(T state) throws Exception;
}

public interface CheckpointCommitter extends Serializable {
    void commitCheckpoint(long checkpointId) throws Exception;
    boolean isCheckpointCommitted(long checkpointId) throws Exception;
}

Checkpointing and State

Types

public abstract class StreamExecutionEnvironment {
    // Environment configuration
    public void setParallelism(int parallelism);
    public int getParallelism();
    public void setBufferTimeout(long timeoutMillis);
    public long getBufferTimeout();
}

public class DataStreamSource<T> extends DataStream<T> {
    // Source-specific methods
    public DataStreamSource<T> setParallelism(int parallelism);
}

public class GroupedDataStream<T> {
    // Keyed stream operations
    public DataStream<T> reduce(ReduceFunction<T> reducer);
    public DataStream<T> sum(int positionToSum);
    public WindowedDataStream<T> window(WindowingHelper<T> helper);
}

public class WindowedDataStream<T> {
    // Windowed operations
    public DataStream<T> reduce(ReduceFunction<T> reducer);
    public <R> DataStream<R> fold(R initialValue, FoldFunction<T, R> folder);
}

public class ConnectedDataStream<T1, T2> {
    // Connected stream operations
    public <R> DataStream<R> map(CoMapFunction<T1, T2, R> coMapper);
    public <R> DataStream<R> flatMap(CoFlatMapFunction<T1, T2, R> coFlatMapper);
}

public class DataStreamSink<T> {
    // Sink configuration
    public DataStreamSink<T> setParallelism(int parallelism);
    public DataStreamSink<T> name(String name);
}