0
# Apache Flink Streaming Core
1
2
Apache Flink Streaming Core provides the fundamental building blocks for scalable stream data processing. It includes streaming APIs, operators, windowing, checkpointing, and runtime execution components for building real-time data processing pipelines with features like event-time processing, watermarks, state management, and exactly-once semantics.
3
4
## Package Information
5
6
- **Package Name**: flink-streaming-core
7
- **Package Type**: Maven
8
- **Language**: Java
9
- **Installation**:
10
```xml
11
<dependency>
12
<groupId>org.apache.flink</groupId>
13
<artifactId>flink-streaming-core</artifactId>
14
<version>0.9.1</version>
15
</dependency>
16
```
17
18
## Core Imports
19
20
```java
21
// Core environment and data stream classes
22
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
23
import org.apache.flink.streaming.api.datastream.DataStream;
24
import org.apache.flink.streaming.api.datastream.DataStreamSource;
25
import org.apache.flink.streaming.api.datastream.GroupedDataStream;
26
import org.apache.flink.streaming.api.datastream.ConnectedDataStream;
27
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
28
29
// Source and sink functions
30
import org.apache.flink.streaming.api.functions.source.SourceFunction;
31
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
32
33
// Transformation functions
34
import org.apache.flink.api.common.functions.MapFunction;
35
import org.apache.flink.api.common.functions.FlatMapFunction;
36
import org.apache.flink.api.common.functions.FilterFunction;
37
import org.apache.flink.api.common.functions.ReduceFunction;
38
39
// Type information and key selection
40
import org.apache.flink.api.common.typeinfo.TypeInformation;
41
import org.apache.flink.api.java.functions.KeySelector;
42
43
// Stream output selection and splitting
44
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
45
```
46
47
## Basic Usage
48
49
```java
50
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
51
import org.apache.flink.streaming.api.datastream.DataStream;
52
53
// Create execution environment
54
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
55
56
// Create a data stream from elements
57
DataStream<String> text = env.fromElements("Hello", "World", "Flink");
58
59
// Apply transformations
60
DataStream<String> processed = text
61
.map(value -> value.toUpperCase())
62
.filter(value -> value.length() > 4);
63
64
// Add a sink
65
processed.print();
66
67
// Execute the streaming job
68
env.execute("Basic Streaming Job");
69
```
70
71
## Architecture
72
73
Flink Streaming Core follows a layered architecture:
74
- **Environment Layer**: Entry points for creating streaming applications (`StreamExecutionEnvironment`)
75
- **DataStream API**: High-level abstractions for stream processing (`DataStream`, `GroupedDataStream`, `WindowedDataStream`)
76
- **Function Interfaces**: User-defined functions for sources, sinks, and transformations
77
- **Operator Layer**: Internal implementations of streaming operators
78
- **Runtime Components**: Low-level execution and partitioning mechanisms
79
80
## Capabilities
81
82
### Execution Environment
83
84
Entry points for creating and configuring streaming applications with support for local and remote execution.
85
86
```java { .api }
87
public abstract class StreamExecutionEnvironment {
88
public static StreamExecutionEnvironment getExecutionEnvironment();
89
public static LocalStreamEnvironment createLocalEnvironment();
90
public static RemoteStreamEnvironment createRemoteEnvironment(String host, int port, String... jarFiles);
91
public JobExecutionResult execute() throws Exception;
92
public JobExecutionResult execute(String jobName) throws Exception;
93
}
94
```
95
96
[Execution Environment](./execution-environment.md)
97
98
### Data Stream Operations
99
100
Core stream abstractions and transformation operations for processing data streams.
101
102
```java { .api }
103
public class DataStream<T> {
104
public <R> SingleOutputStreamOperator<R, ?> map(MapFunction<T, R> mapper);
105
public <R> SingleOutputStreamOperator<R, ?> flatMap(FlatMapFunction<T, R> flatMapper);
106
public SingleOutputStreamOperator<T, ?> filter(FilterFunction<T> filter);
107
public GroupedDataStream<T> groupBy(KeySelector<T, ?> key);
108
public DataStream<T> union(DataStream<T>... streams);
109
public <R> ConnectedDataStream<T, R> connect(DataStream<R> dataStream);
110
public SplitDataStream<T> split(OutputSelector<T> outputSelector);
111
}
112
```
113
114
[Data Stream Operations](./datastream-operations.md)
115
116
### Source and Sink Functions
117
118
Interfaces and implementations for reading from and writing to external systems.
119
120
```java { .api }
121
public interface SourceFunction<T> extends Function, Serializable {
122
void run(SourceContext<T> ctx) throws Exception;
123
void cancel();
124
}
125
126
public interface SinkFunction<T> extends Function, Serializable {
127
void invoke(T value) throws Exception;
128
}
129
```
130
131
[Sources and Sinks](./sources-and-sinks.md)
132
133
### Windowing Operations
134
135
Time and count-based windowing support for aggregating stream data over defined intervals.
136
137
```java { .api }
138
public abstract class WindowingHelper<T> {
139
public static <T> Time<T> of(long length, TimeUnit timeUnit);
140
public static <T> Count<T> of(long windowSize);
141
}
142
143
public class WindowedDataStream<T> {
144
public DataStream<T> reduce(ReduceFunction<T> reducer);
145
public <R> DataStream<R> fold(R initialValue, FoldFunction<T, R> folder);
146
}
147
```
148
149
[Windowing](./windowing.md)
150
151
### Stream Operators
152
153
Internal operator implementations that execute the actual stream processing logic.
154
155
```java { .api }
156
public interface StreamOperator<OUT> extends Serializable {
157
void setup(Output<StreamRecord<OUT>> output, RuntimeContext runtimeContext);
158
void open(Configuration parameters) throws Exception;
159
void close() throws Exception;
160
}
161
162
public interface OneInputStreamOperator<IN, OUT> extends StreamOperator<OUT> {
163
void processElement(StreamRecord<IN> element) throws Exception;
164
}
165
```
166
167
[Stream Operators](./stream-operators.md)
168
169
### Checkpointing and State
170
171
Fault tolerance mechanisms including checkpointing and state management for exactly-once processing.
172
173
```java { .api }
174
public interface Checkpointed<T> extends Serializable {
175
T snapshotState(long checkpointId, long checkpointTimestamp) throws Exception;
176
void restoreState(T state) throws Exception;
177
}
178
179
public interface CheckpointCommitter extends Serializable {
180
void commitCheckpoint(long checkpointId) throws Exception;
181
boolean isCheckpointCommitted(long checkpointId) throws Exception;
182
}
183
```
184
185
[Checkpointing and State](./checkpointing-state.md)
186
187
## Types
188
189
```java { .api }
190
public abstract class StreamExecutionEnvironment {
191
// Environment configuration
192
public void setParallelism(int parallelism);
193
public int getParallelism();
194
public void setBufferTimeout(long timeoutMillis);
195
public long getBufferTimeout();
196
}
197
198
public class DataStreamSource<T> extends DataStream<T> {
199
// Source-specific methods
200
public DataStreamSource<T> setParallelism(int parallelism);
201
}
202
203
public class GroupedDataStream<T> {
204
// Keyed stream operations
205
public DataStream<T> reduce(ReduceFunction<T> reducer);
206
public DataStream<T> sum(int positionToSum);
207
public WindowedDataStream<T> window(WindowingHelper<T> helper);
208
}
209
210
public class WindowedDataStream<T> {
211
// Windowed operations
212
public DataStream<T> reduce(ReduceFunction<T> reducer);
213
public <R> DataStream<R> fold(R initialValue, FoldFunction<T, R> folder);
214
}
215
216
public class ConnectedDataStream<T1, T2> {
217
// Connected stream operations
218
public <R> DataStream<R> map(CoMapFunction<T1, T2, R> coMapper);
219
public <R> DataStream<R> flatMap(CoFlatMapFunction<T1, T2, R> coFlatMapper);
220
}
221
222
public class DataStreamSink<T> {
223
// Sink configuration
224
public DataStreamSink<T> setParallelism(int parallelism);
225
public DataStreamSink<T> name(String name);
226
}
227
```