tessl install tessl/maven-org-apache-spark--spark-streaming_2-11@1.6.0Apache Spark Streaming library for processing live data streams with fault-tolerance and high throughput.
Window operations allow you to apply transformations over a sliding window of data, enabling analysis across multiple batches of streaming data.
def window(windowDuration: Duration, slideDuration: Duration): DStream[T]
def reduceByWindow(
reduceFunc: (T, T) => T,
windowDuration: Duration,
slideDuration: Duration
): DStream[T]
def reduceByWindow(
reduceFunc: (T, T) => T,
invReduceFunc: (T, T) => T,
windowDuration: Duration,
slideDuration: Duration
): DStream[T]
def countByWindow(windowDuration: Duration, slideDuration: Duration): DStream[Long]def groupByKeyAndWindow(windowDuration: Duration, slideDuration: Duration): DStream[(K, Iterable[V])]
def reduceByKeyAndWindow(
func: (V, V) => V,
windowDuration: Duration,
slideDuration: Duration
): DStream[(K, V)]
def countByKeyAndWindow(windowDuration: Duration, slideDuration: Duration): DStream[(K, Long)]Example:
val windowedWordCounts = wordPairs
.reduceByKeyAndWindow((a: Int, b: Int) => a + b, Seconds(30), Seconds(10))