or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

aggregation-windows.mdbackends.mdconfiguration.mdexpressions.mdindex.mdselectors.mdsql-integration.mdtable-construction.mdtable-operations.mdtemporal.mdudfs.md

aggregation-windows.mddocs/

0

# Aggregation and Window Functions

1

2

Statistical aggregation functions and window operations including ranking, cumulative calculations, and frame-based computations for advanced analytics.

3

4

## Capabilities

5

6

### Basic Aggregations

7

8

Core statistical aggregation functions.

9

10

```python { .api }

11

def sum(arg):

12

"""Sum of values."""

13

14

def mean(arg):

15

"""Average of values."""

16

17

def count(arg=None):

18

"""

19

Count of values.

20

21

Parameters:

22

- arg: expression to count, or None to count rows

23

24

Returns:

25

Count expression

26

"""

27

28

def min(arg):

29

"""Minimum value."""

30

31

def max(arg):

32

"""Maximum value."""

33

34

def std(arg):

35

"""Standard deviation."""

36

37

def var(arg):

38

"""Variance."""

39

```

40

41

**Usage Examples:**

42

```python

43

# Basic aggregations

44

result = table.aggregate(

45

total_sales=table.sales.sum(),

46

avg_price=table.price.mean(),

47

num_orders=table.count(),

48

min_date=table.order_date.min(),

49

max_date=table.order_date.max()

50

)

51

52

# Grouped aggregations

53

by_category = (

54

table

55

.group_by('category')

56

.aggregate(

57

revenue=table.sales.sum(),

58

avg_rating=table.rating.mean(),

59

order_count=table.count()

60

)

61

)

62

```

63

64

### Window Specifications

65

66

Define window frames for analytical functions.

67

68

```python { .api }

69

def window(

70

group_by=None,

71

order_by=None,

72

preceding=None,

73

following=None,

74

rows=None,

75

range=None

76

):

77

"""

78

Create window specification.

79

80

Parameters:

81

- group_by: columns to partition by

82

- order_by: columns to order by

83

- preceding: number of preceding rows/range

84

- following: number of following rows/range

85

- rows: tuple of (preceding, following) for row frame

86

- range: tuple of (preceding, following) for range frame

87

88

Returns:

89

Window specification

90

"""

91

92

def rows_window(preceding=None, following=None):

93

"""Create row-based window frame."""

94

95

def range_window(preceding=None, following=None):

96

"""Create range-based window frame."""

97

98

def cumulative_window():

99

"""Create cumulative window (unbounded preceding to current row)."""

100

101

def trailing_window(n):

102

"""Create trailing window of n rows."""

103

104

def trailing_range_window(interval):

105

"""Create trailing range window."""

106

```

107

108

**Usage Examples:**

109

```python

110

import ibis

111

112

# Define window specifications

113

win = ibis.window(

114

group_by='department',

115

order_by='salary',

116

rows=(2, 2) # 2 preceding, 2 following

117

)

118

119

# Row-based window

120

row_win = ibis.rows_window(preceding=5, following=0)

121

122

# Cumulative window

123

cum_win = ibis.cumulative_window()

124

125

# Trailing window

126

trail_win = ibis.trailing_window(10)

127

```

128

129

### Ranking Functions

130

131

Window functions for ranking and ordering.

132

133

```python { .api }

134

def row_number():

135

"""

136

Assign unique row numbers within window.

137

138

Returns:

139

Row number expression (1, 2, 3, ...)

140

"""

141

142

def rank():

143

"""

144

Assign rank with gaps for ties.

145

146

Returns:

147

Rank expression (1, 2, 2, 4, ...)

148

"""

149

150

def dense_rank():

151

"""

152

Assign rank without gaps for ties.

153

154

Returns:

155

Dense rank expression (1, 2, 2, 3, ...)

156

"""

157

158

def percent_rank():

159

"""

160

Percentile rank (0 to 1).

161

162

Returns:

163

Percent rank expression

164

"""

165

166

def cume_dist():

167

"""

168

Cumulative distribution (0 to 1).

169

170

Returns:

171

Cumulative distribution expression

172

"""

173

174

def ntile(n):

175

"""

176

Divide rows into n buckets.

177

178

Parameters:

179

- n: int, number of buckets

180

181

Returns:

182

Bucket number expression (1 to n)

183

"""

184

```

185

186

**Usage Examples:**

187

```python

188

# Ranking within groups

189

result = table.select(

190

'name', 'department', 'salary',

191

rank=ibis.rank().over(

192

group_by='department',

193

order_by=ibis.desc('salary')

194

),

195

row_num=ibis.row_number().over(

196

group_by='department',

197

order_by=ibis.desc('salary')

198

),

199

percentile=ibis.percent_rank().over(

200

order_by='salary'

201

),

202

quartile=ibis.ntile(4).over(

203

order_by='salary'

204

)

205

)

206

```

207

208

### Aggregate Window Functions

209

210

Apply aggregations over window frames.

211

212

**Usage Examples:**

213

```python

214

# Moving averages and sums

215

result = table.select(

216

'date', 'sales',

217

moving_avg=table.sales.mean().over(

218

order_by='date',

219

rows=(6, 0) # 7-day moving average

220

),

221

running_total=table.sales.sum().over(

222

order_by='date',

223

rows=(None, 0) # Cumulative sum

224

),

225

pct_of_total=table.sales / table.sales.sum().over()

226

)

227

228

# Comparisons with previous/next values

229

result = table.select(

230

'date', 'value',

231

prev_value=table.value.lag(1).over(order_by='date'),

232

next_value=table.value.lead(1).over(order_by='date'),

233

change=table.value - table.value.lag(1).over(order_by='date')

234

)

235

```

236

237

### Lag and Lead Functions

238

239

Access previous and next row values.

240

241

```python { .api }

242

expr.lag(offset=1, default=None):

243

"""

244

Get value from previous row.

245

246

Parameters:

247

- offset: int, number of rows back

248

- default: value to use when no previous row exists

249

250

Returns:

251

Lagged value expression

252

"""

253

254

expr.lead(offset=1, default=None):

255

"""

256

Get value from next row.

257

258

Parameters:

259

- offset: int, number of rows forward

260

- default: value to use when no next row exists

261

262

Returns:

263

Lead value expression

264

"""

265

266

expr.first():

267

"""Get first value in window frame."""

268

269

expr.last():

270

"""Get last value in window frame."""

271

```

272

273

**Usage Examples:**

274

```python

275

# Time series calculations

276

result = table.select(

277

'date', 'price',

278

prev_price=table.price.lag(1).over(order_by='date'),

279

price_change=table.price - table.price.lag(1).over(order_by='date'),

280

next_price=table.price.lead(1).over(order_by='date'),

281

first_price=table.price.first().over(

282

group_by='symbol',

283

order_by='date'

284

),

285

last_price=table.price.last().over(

286

group_by='symbol',

287

order_by='date'

288

)

289

)

290

```

291

292

### Statistical Window Functions

293

294

Advanced statistical calculations over windows.

295

296

**Usage Examples:**

297

```python

298

# Rolling statistics

299

result = table.select(

300

'date', 'value',

301

rolling_std=table.value.std().over(

302

order_by='date',

303

rows=(29, 0) # 30-day rolling standard deviation

304

),

305

rolling_var=table.value.var().over(

306

order_by='date',

307

rows=(29, 0)

308

),

309

z_score=(

310

(table.value - table.value.mean().over(rows=(29, 0))) /

311

table.value.std().over(rows=(29, 0))

312

).over(order_by='date')

313

)

314

```

315

316

### Conditional Aggregations

317

318

Aggregations with filtering conditions.

319

320

**Usage Examples:**

321

```python

322

# Conditional aggregations

323

result = table.aggregate(

324

total_sales=table.sales.sum(),

325

high_value_sales=table.sales.sum(where=table.sales > 1000),

326

avg_rating=table.rating.mean(),

327

avg_high_rating=table.rating.mean(where=table.rating >= 4.0),

328

premium_count=table.count(where=table.tier == 'premium')

329

)

330

331

# Window conditional aggregations

332

result = table.select(

333

'product', 'date', 'sales',

334

high_sales_pct=(

335

table.sales.sum(where=table.sales > 1000) /

336

table.sales.sum()

337

).over(group_by='product')

338

)

339

```

340

341

### Frame Boundary Specifications

342

343

Control window frame boundaries precisely.

344

345

```python { .api }

346

def preceding(n):

347

"""

348

Specify preceding boundary.

349

350

Parameters:

351

- n: int or None (unbounded)

352

353

Returns:

354

Preceding boundary specification

355

"""

356

357

def following(n):

358

"""

359

Specify following boundary.

360

361

Parameters:

362

- n: int or None (unbounded)

363

364

Returns:

365

Following boundary specification

366

"""

367

368

def range(start, end):

369

"""

370

Create range-based frame.

371

372

Parameters:

373

- start: boundary specification

374

- end: boundary specification

375

376

Returns:

377

Range frame specification

378

"""

379

```

380

381

**Usage Examples:**

382

```python

383

# Precise frame control

384

win1 = ibis.window(

385

order_by='date',

386

rows=(ibis.preceding(5), ibis.following(2))

387

)

388

389

win2 = ibis.window(

390

order_by='timestamp',

391

range=(

392

ibis.preceding(ibis.interval(hours=1)),

393

ibis.following(ibis.interval(minutes=30))

394

)

395

)

396

397

# Use with aggregations

398

result = table.select(

399

'timestamp', 'value',

400

hour_avg=table.value.mean().over(win2)

401

)

402

```

403

404

### Multiple Window Operations

405

406

Combine multiple window operations efficiently.

407

408

**Usage Examples:**

409

```python

410

# Multiple windows in single query

411

dept_win = ibis.window(group_by='department', order_by='salary')

412

company_win = ibis.window(order_by='salary')

413

414

result = table.select(

415

'name', 'department', 'salary',

416

dept_rank=ibis.rank().over(dept_win),

417

dept_pct=ibis.percent_rank().over(dept_win),

418

company_rank=ibis.rank().over(company_win),

419

dept_avg=table.salary.mean().over(group_by='department'),

420

company_avg=table.salary.mean().over()

421

)

422

```