Java API for Apache Flink's Table ecosystem, enabling type-safe table operations and SQL query execution.
—
Aggregation operations allow you to compute summary statistics and perform complex analytical queries on grouped data. The Table API provides both built-in aggregate functions and support for user-defined aggregate functions.
Group table data by one or more columns to prepare for aggregation.
/**
* Groups the table by specified fields
* @param fields Expressions representing grouping columns
* @return GroupedTable for aggregation operations
*/
public GroupedTable groupBy(Expression... fields);Usage Examples:
import static org.apache.flink.table.api.Expressions.*;
// Single column grouping
GroupedTable byDepartment = sourceTable.groupBy($("department"));
// Multiple column grouping
GroupedTable byDeptAndLevel = sourceTable.groupBy($("department"), $("level"));
// Grouping by computed expressions
GroupedTable byYearMonth = sourceTable.groupBy(
$("hire_date").extract(IntervalUnit.YEAR).as("hire_year"),
$("hire_date").extract(IntervalUnit.MONTH).as("hire_month")
);
// Grouping with conditional expressions
GroupedTable bySalaryBand = sourceTable.groupBy(
when($("salary").isGreater(100000), "High")
.when($("salary").isGreater(50000), "Medium")
.otherwise("Low").as("salary_band")
);Apply built-in aggregate functions to grouped data.
/**
* Built-in aggregate functions available through Expressions class
*/
// Count functions
public static Expression count(Object field);
public static Expression countDistinct(Object field);
// Sum functions
public static Expression sum(Object field);
public static Expression sumDistinct(Object field);
// Average functions
public static Expression avg(Object field);
public static Expression avgDistinct(Object field);
// Min/Max functions
public static Expression min(Object field);
public static Expression max(Object field);
// Statistical functions
public static Expression stddev(Object field);
public static Expression variance(Object field);
// Collect functions
public static Expression collect(Object field);
public static Expression listagg(Object field, Object separator);Usage Examples:
// Basic aggregations with groupBy
Table departmentStats = sourceTable
.groupBy($("department"))
.select(
$("department"),
count($("employee_id")).as("employee_count"),
avg($("salary")).as("avg_salary"),
sum($("salary")).as("total_salary"),
min($("hire_date")).as("earliest_hire"),
max($("hire_date")).as("latest_hire")
);
// Multiple aggregations
Table salesSummary = sourceTable
.groupBy($("region"), $("product_category"))
.select(
$("region"),
$("product_category"),
count($("*")).as("order_count"),
sum($("amount")).as("total_revenue"),
avg($("amount")).as("avg_order_value"),
countDistinct($("customer_id")).as("unique_customers")
);
// Statistical aggregations
Table performanceStats = sourceTable
.groupBy($("team"))
.select(
$("team"),
avg($("performance_score")).as("avg_performance"),
stddev($("performance_score")).as("score_stddev"),
variance($("performance_score")).as("score_variance")
);The GroupedTable interface provides methods for performing aggregations on grouped data.
public interface GroupedTable {
/**
* Performs selection with aggregation functions on grouped data
* @param fields Selection expressions including aggregate functions
* @return Table with aggregated results
*/
Table select(Expression... fields);
/**
* Applies a single aggregate function to the group
* @param aggregateFunction User-defined aggregate function expression
* @return AggregatedTable for further selection
*/
AggregatedTable aggregate(Expression aggregateFunction);
/**
* Applies a table aggregate function that can emit multiple rows per group
* @param tableAggregateFunction User-defined table aggregate function
* @return FlatAggregateTable for further selection
*/
FlatAggregateTable flatAggregate(Expression tableAggregateFunction);
}Usage Examples:
// Standard aggregation selection
Table groupedResults = sourceTable
.groupBy($("category"))
.select(
$("category"),
count($("*")).as("item_count"),
avg($("price")).as("avg_price")
);
// User-defined aggregate function
// Assuming you have a custom aggregate function MyAvgFunction
Table customAgg = sourceTable
.groupBy($("department"))
.aggregate(call(MyAvgFunction.class, $("salary")))
.select($("department"), $("f0").as("custom_avg"));
// Table aggregate function (returns multiple rows per group)
// Assuming you have a custom table aggregate function TopNFunction
Table topResults = sourceTable
.groupBy($("category"))
.flatAggregate(call(TopNFunction.class, $("score"), 3))
.select($("category"), $("f0").as("top_score"), $("f1").as("rank"));Results from user-defined aggregate functions.
public interface AggregatedTable {
/**
* Selects fields from the aggregated result
* @param fields Fields to select from aggregation result
* @return Table with selected aggregated data
*/
Table select(Expression... fields);
}Results from table aggregate functions that can produce multiple output rows per group.
public interface FlatAggregateTable {
/**
* Selects fields from the flat aggregated result
* @param fields Fields to select from flat aggregation result
* @return Table with selected flat aggregated data
*/
Table select(Expression... fields);
}Filter grouped data based on aggregate conditions (achieved through filter after aggregation).
Usage Examples:
// Having equivalent - filter after aggregation
Table filteredGroups = sourceTable
.groupBy($("department"))
.select(
$("department"),
count($("*")).as("emp_count"),
avg($("salary")).as("avg_salary")
)
.filter($("emp_count").isGreater(10)) // HAVING equivalent
.filter($("avg_salary").isGreater(50000));Advanced aggregation scenarios with multiple grouping levels and conditional aggregations.
Usage Examples:
// Conditional aggregation
Table conditionalAgg = sourceTable
.groupBy($("department"))
.select(
$("department"),
count($("*")).as("total_employees"),
sum(when($("salary").isGreater(100000), 1).otherwise(0)).as("high_earners"),
sum(when($("gender").isEqual("F"), 1).otherwise(0)).as("female_count"),
avg(when($("level").isEqual("Senior"), $("salary")).otherwise(null)).as("senior_avg_salary")
);
// Multi-level grouping with rollup-like operations
Table hierarchicalAgg = sourceTable
.groupBy($("region"), $("department"))
.select(
$("region"),
$("department"),
count($("*")).as("count"),
sum($("sales")).as("total_sales")
);
// Percentage calculations within groups
Table percentageCalc = sourceTable
.groupBy($("department"))
.select(
$("department"),
$("employee_id"),
$("salary"),
$("salary").dividedBy(sum($("salary")).over($("department"))).multiply(100).as("salary_percentage")
);Combine multiple aggregate functions and perform calculations on aggregated results.
Usage Examples:
// Combined aggregations with calculations
Table combinedAgg = sourceTable
.groupBy($("team"))
.select(
$("team"),
count($("*")).as("member_count"),
sum($("goals")).as("total_goals"),
sum($("goals")).dividedBy(count($("*"))).as("goals_per_member"),
max($("goals")).minus(min($("goals"))).as("goal_range")
);
// Nested aggregation calculations
Table nestedCalc = sourceTable
.groupBy($("department"))
.select(
$("department"),
avg($("salary")).as("dept_avg"),
sum($("salary")).as("dept_total"),
count($("*")).as("dept_size"),
// Calculate standard deviation manually
sqrt(
avg($("salary").multiply($("salary")))
.minus(avg($("salary")).multiply(avg($("salary"))))
).as("salary_stddev")
);Aggregations over the entire table without explicit grouping.
Usage Examples:
// Global aggregations (no GROUP BY)
Table globalStats = sourceTable.select(
count($("*")).as("total_records"),
sum($("amount")).as("grand_total"),
avg($("amount")).as("overall_average"),
min($("created_date")).as("earliest_date"),
max($("created_date")).as("latest_date")
);
// Adding rank or row number
Table rankedResults = sourceTable.select(
$("*"),
row_number().over(orderBy($("salary").desc())).as("salary_rank"),
rank().over(orderBy($("score").desc())).as("score_rank")
);Perform aggregations on distinct values only.
Usage Examples:
// Distinct count and sum
Table distinctAgg = sourceTable
.groupBy($("category"))
.select(
$("category"),
count($("*")).as("total_orders"),
countDistinct($("customer_id")).as("unique_customers"),
countDistinct($("product_id")).as("unique_products"),
sumDistinct($("discount_amount")).as("unique_discount_total")
);
// Multiple distinct aggregations
Table multiDistinct = sourceTable
.groupBy($("region"))
.select(
$("region"),
countDistinct($("customer_id")).as("unique_customers"),
countDistinct($("product_category")).as("categories_sold"),
avgDistinct($("customer_rating")).as("avg_unique_ratings")
);// Count functions
Expression count(Object field); // Count non-null values
Expression countDistinct(Object field); // Count distinct non-null values
// Numeric aggregations
Expression sum(Object field); // Sum of values
Expression sumDistinct(Object field); // Sum of distinct values
Expression avg(Object field); // Average of values
Expression avgDistinct(Object field); // Average of distinct values
Expression min(Object field); // Minimum value
Expression max(Object field); // Maximum value
// Statistical functions
Expression stddev(Object field); // Standard deviation
Expression stddevPop(Object field); // Population standard deviation
Expression stddevSamp(Object field); // Sample standard deviation
Expression variance(Object field); // Variance
Expression varPop(Object field); // Population variance
Expression varSamp(Object field); // Sample variance
// String aggregations
Expression listagg(Object field, Object separator); // Concatenate values with separator
Expression collect(Object field); // Collect values into collection
// Boolean aggregations
Expression boolAnd(Object field); // Logical AND of boolean values
Expression boolOr(Object field); // Logical OR of boolean values
// First/Last functions
Expression first(Object field); // First value in group
Expression last(Object field); // Last value in groupInstall with Tessl CLI
npx tessl i tessl/maven-org-apache-flink--flink-table-api-java