or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

aggregation-grouping.mdcatalog-management.mdexpressions.mdindex.mdsql-integration.mdtable-environment.mdtable-operations.mduser-defined-functions.mdwindow-operations.md

aggregation-grouping.mddocs/

0

# Aggregation and Grouping

1

2

Aggregation operations allow you to compute summary statistics and perform complex analytical queries on grouped data. The Table API provides both built-in aggregate functions and support for user-defined aggregate functions.

3

4

## Capabilities

5

6

### Basic Grouping

7

8

Group table data by one or more columns to prepare for aggregation.

9

10

```java { .api }

11

/**

12

* Groups the table by specified fields

13

* @param fields Expressions representing grouping columns

14

* @return GroupedTable for aggregation operations

15

*/

16

public GroupedTable groupBy(Expression... fields);

17

```

18

19

**Usage Examples:**

20

21

```java

22

import static org.apache.flink.table.api.Expressions.*;

23

24

// Single column grouping

25

GroupedTable byDepartment = sourceTable.groupBy($("department"));

26

27

// Multiple column grouping

28

GroupedTable byDeptAndLevel = sourceTable.groupBy($("department"), $("level"));

29

30

// Grouping by computed expressions

31

GroupedTable byYearMonth = sourceTable.groupBy(

32

$("hire_date").extract(IntervalUnit.YEAR).as("hire_year"),

33

$("hire_date").extract(IntervalUnit.MONTH).as("hire_month")

34

);

35

36

// Grouping with conditional expressions

37

GroupedTable bySalaryBand = sourceTable.groupBy(

38

when($("salary").isGreater(100000), "High")

39

.when($("salary").isGreater(50000), "Medium")

40

.otherwise("Low").as("salary_band")

41

);

42

```

43

44

### Aggregate Functions

45

46

Apply built-in aggregate functions to grouped data.

47

48

```java { .api }

49

/**

50

* Built-in aggregate functions available through Expressions class

51

*/

52

53

// Count functions

54

public static Expression count(Object field);

55

public static Expression countDistinct(Object field);

56

57

// Sum functions

58

public static Expression sum(Object field);

59

public static Expression sumDistinct(Object field);

60

61

// Average functions

62

public static Expression avg(Object field);

63

public static Expression avgDistinct(Object field);

64

65

// Min/Max functions

66

public static Expression min(Object field);

67

public static Expression max(Object field);

68

69

// Statistical functions

70

public static Expression stddev(Object field);

71

public static Expression variance(Object field);

72

73

// Collect functions

74

public static Expression collect(Object field);

75

public static Expression listagg(Object field, Object separator);

76

```

77

78

**Usage Examples:**

79

80

```java

81

// Basic aggregations with groupBy

82

Table departmentStats = sourceTable

83

.groupBy($("department"))

84

.select(

85

$("department"),

86

count($("employee_id")).as("employee_count"),

87

avg($("salary")).as("avg_salary"),

88

sum($("salary")).as("total_salary"),

89

min($("hire_date")).as("earliest_hire"),

90

max($("hire_date")).as("latest_hire")

91

);

92

93

// Multiple aggregations

94

Table salesSummary = sourceTable

95

.groupBy($("region"), $("product_category"))

96

.select(

97

$("region"),

98

$("product_category"),

99

count($("*")).as("order_count"),

100

sum($("amount")).as("total_revenue"),

101

avg($("amount")).as("avg_order_value"),

102

countDistinct($("customer_id")).as("unique_customers")

103

);

104

105

// Statistical aggregations

106

Table performanceStats = sourceTable

107

.groupBy($("team"))

108

.select(

109

$("team"),

110

avg($("performance_score")).as("avg_performance"),

111

stddev($("performance_score")).as("score_stddev"),

112

variance($("performance_score")).as("score_variance")

113

);

114

```

115

116

### GroupedTable Operations

117

118

The GroupedTable interface provides methods for performing aggregations on grouped data.

119

120

```java { .api }

121

public interface GroupedTable {

122

/**

123

* Performs selection with aggregation functions on grouped data

124

* @param fields Selection expressions including aggregate functions

125

* @return Table with aggregated results

126

*/

127

Table select(Expression... fields);

128

129

/**

130

* Applies a single aggregate function to the group

131

* @param aggregateFunction User-defined aggregate function expression

132

* @return AggregatedTable for further selection

133

*/

134

AggregatedTable aggregate(Expression aggregateFunction);

135

136

/**

137

* Applies a table aggregate function that can emit multiple rows per group

138

* @param tableAggregateFunction User-defined table aggregate function

139

* @return FlatAggregateTable for further selection

140

*/

141

FlatAggregateTable flatAggregate(Expression tableAggregateFunction);

142

}

143

```

144

145

**Usage Examples:**

146

147

```java

148

// Standard aggregation selection

149

Table groupedResults = sourceTable

150

.groupBy($("category"))

151

.select(

152

$("category"),

153

count($("*")).as("item_count"),

154

avg($("price")).as("avg_price")

155

);

156

157

// User-defined aggregate function

158

// Assuming you have a custom aggregate function MyAvgFunction

159

Table customAgg = sourceTable

160

.groupBy($("department"))

161

.aggregate(call(MyAvgFunction.class, $("salary")))

162

.select($("department"), $("f0").as("custom_avg"));

163

164

// Table aggregate function (returns multiple rows per group)

165

// Assuming you have a custom table aggregate function TopNFunction

166

Table topResults = sourceTable

167

.groupBy($("category"))

168

.flatAggregate(call(TopNFunction.class, $("score"), 3))

169

.select($("category"), $("f0").as("top_score"), $("f1").as("rank"));

170

```

171

172

### AggregatedTable Operations

173

174

Results from user-defined aggregate functions.

175

176

```java { .api }

177

public interface AggregatedTable {

178

/**

179

* Selects fields from the aggregated result

180

* @param fields Fields to select from aggregation result

181

* @return Table with selected aggregated data

182

*/

183

Table select(Expression... fields);

184

}

185

```

186

187

### FlatAggregateTable Operations

188

189

Results from table aggregate functions that can produce multiple output rows per group.

190

191

```java { .api }

192

public interface FlatAggregateTable {

193

/**

194

* Selects fields from the flat aggregated result

195

* @param fields Fields to select from flat aggregation result

196

* @return Table with selected flat aggregated data

197

*/

198

Table select(Expression... fields);

199

}

200

```

201

202

### Having Clauses

203

204

Filter grouped data based on aggregate conditions (achieved through filter after aggregation).

205

206

**Usage Examples:**

207

208

```java

209

// Having equivalent - filter after aggregation

210

Table filteredGroups = sourceTable

211

.groupBy($("department"))

212

.select(

213

$("department"),

214

count($("*")).as("emp_count"),

215

avg($("salary")).as("avg_salary")

216

)

217

.filter($("emp_count").isGreater(10)) // HAVING equivalent

218

.filter($("avg_salary").isGreater(50000));

219

```

220

221

### Complex Aggregation Patterns

222

223

Advanced aggregation scenarios with multiple grouping levels and conditional aggregations.

224

225

**Usage Examples:**

226

227

```java

228

// Conditional aggregation

229

Table conditionalAgg = sourceTable

230

.groupBy($("department"))

231

.select(

232

$("department"),

233

count($("*")).as("total_employees"),

234

sum(when($("salary").isGreater(100000), 1).otherwise(0)).as("high_earners"),

235

sum(when($("gender").isEqual("F"), 1).otherwise(0)).as("female_count"),

236

avg(when($("level").isEqual("Senior"), $("salary")).otherwise(null)).as("senior_avg_salary")

237

);

238

239

// Multi-level grouping with rollup-like operations

240

Table hierarchicalAgg = sourceTable

241

.groupBy($("region"), $("department"))

242

.select(

243

$("region"),

244

$("department"),

245

count($("*")).as("count"),

246

sum($("sales")).as("total_sales")

247

);

248

249

// Percentage calculations within groups

250

Table percentageCalc = sourceTable

251

.groupBy($("department"))

252

.select(

253

$("department"),

254

$("employee_id"),

255

$("salary"),

256

$("salary").dividedBy(sum($("salary")).over($("department"))).multiply(100).as("salary_percentage")

257

);

258

```

259

260

### Aggregate Function Combinations

261

262

Combine multiple aggregate functions and perform calculations on aggregated results.

263

264

**Usage Examples:**

265

266

```java

267

// Combined aggregations with calculations

268

Table combinedAgg = sourceTable

269

.groupBy($("team"))

270

.select(

271

$("team"),

272

count($("*")).as("member_count"),

273

sum($("goals")).as("total_goals"),

274

sum($("goals")).dividedBy(count($("*"))).as("goals_per_member"),

275

max($("goals")).minus(min($("goals"))).as("goal_range")

276

);

277

278

// Nested aggregation calculations

279

Table nestedCalc = sourceTable

280

.groupBy($("department"))

281

.select(

282

$("department"),

283

avg($("salary")).as("dept_avg"),

284

sum($("salary")).as("dept_total"),

285

count($("*")).as("dept_size"),

286

// Calculate standard deviation manually

287

sqrt(

288

avg($("salary").multiply($("salary")))

289

.minus(avg($("salary")).multiply(avg($("salary"))))

290

).as("salary_stddev")

291

);

292

```

293

294

### Window Aggregate Functions (without GROUP BY)

295

296

Aggregations over the entire table without explicit grouping.

297

298

**Usage Examples:**

299

300

```java

301

// Global aggregations (no GROUP BY)

302

Table globalStats = sourceTable.select(

303

count($("*")).as("total_records"),

304

sum($("amount")).as("grand_total"),

305

avg($("amount")).as("overall_average"),

306

min($("created_date")).as("earliest_date"),

307

max($("created_date")).as("latest_date")

308

);

309

310

// Adding rank or row number

311

Table rankedResults = sourceTable.select(

312

$("*"),

313

row_number().over(orderBy($("salary").desc())).as("salary_rank"),

314

rank().over(orderBy($("score").desc())).as("score_rank")

315

);

316

```

317

318

### Distinct Aggregations

319

320

Perform aggregations on distinct values only.

321

322

**Usage Examples:**

323

324

```java

325

// Distinct count and sum

326

Table distinctAgg = sourceTable

327

.groupBy($("category"))

328

.select(

329

$("category"),

330

count($("*")).as("total_orders"),

331

countDistinct($("customer_id")).as("unique_customers"),

332

countDistinct($("product_id")).as("unique_products"),

333

sumDistinct($("discount_amount")).as("unique_discount_total")

334

);

335

336

// Multiple distinct aggregations

337

Table multiDistinct = sourceTable

338

.groupBy($("region"))

339

.select(

340

$("region"),

341

countDistinct($("customer_id")).as("unique_customers"),

342

countDistinct($("product_category")).as("categories_sold"),

343

avgDistinct($("customer_rating")).as("avg_unique_ratings")

344

);

345

```

346

347

## Built-in Aggregate Functions Reference

348

349

```java { .api }

350

// Count functions

351

Expression count(Object field); // Count non-null values

352

Expression countDistinct(Object field); // Count distinct non-null values

353

354

// Numeric aggregations

355

Expression sum(Object field); // Sum of values

356

Expression sumDistinct(Object field); // Sum of distinct values

357

Expression avg(Object field); // Average of values

358

Expression avgDistinct(Object field); // Average of distinct values

359

Expression min(Object field); // Minimum value

360

Expression max(Object field); // Maximum value

361

362

// Statistical functions

363

Expression stddev(Object field); // Standard deviation

364

Expression stddevPop(Object field); // Population standard deviation

365

Expression stddevSamp(Object field); // Sample standard deviation

366

Expression variance(Object field); // Variance

367

Expression varPop(Object field); // Population variance

368

Expression varSamp(Object field); // Sample variance

369

370

// String aggregations

371

Expression listagg(Object field, Object separator); // Concatenate values with separator

372

Expression collect(Object field); // Collect values into collection

373

374

// Boolean aggregations

375

Expression boolAnd(Object field); // Logical AND of boolean values

376

Expression boolOr(Object field); // Logical OR of boolean values

377

378

// First/Last functions

379

Expression first(Object field); // First value in group

380

Expression last(Object field); // Last value in group

381

```