or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

aggregation-windows.mdbackends.mdconfiguration.mdexpressions.mdindex.mdselectors.mdsql-integration.mdtable-construction.mdtable-operations.mdtemporal.mdudfs.md

udfs.mddocs/

0

# User-Defined Functions

1

2

Comprehensive UDF system supporting scalar, aggregate, and analytic functions with type safety and backend compatibility.

3

4

## Capabilities

5

6

### Scalar UDFs

7

8

Create custom scalar functions that operate on individual values.

9

10

```python { .api }

11

@ibis.udf.scalar(signature, **kwargs)

12

def function_name(arg1, arg2, ...):

13

"""

14

Decorator for scalar UDFs.

15

16

Parameters:

17

- signature: function signature with input and output types

18

- **kwargs: additional UDF configuration

19

20

Returns:

21

Decorated function that can be used in expressions

22

"""

23

```

24

25

**Usage Examples:**

26

```python

27

import ibis

28

from ibis import udf

29

30

# Simple scalar UDF

31

@udf.scalar((int, int) -> int)

32

def add_one(x):

33

return x + 1

34

35

# Use in expressions

36

result = table.select(

37

'value',

38

incremented=add_one(table.value)

39

)

40

41

# UDF with multiple parameters

42

@udf.scalar((str, str) -> str)

43

def concat_with_separator(a, b):

44

return f"{a}|{b}"

45

46

result = table.select(

47

combined=concat_with_separator(table.first_name, table.last_name)

48

)

49

```

50

51

### String manipulation UDF:

52

```python

53

@udf.scalar(str -> str)

54

def reverse_string(s):

55

return s[::-1] if s else None

56

57

# Mathematical UDF

58

@udf.scalar(float -> float)

59

def sigmoid(x):

60

import math

61

return 1 / (1 + math.exp(-x))

62

63

result = table.select(

64

'text',

65

reversed=reverse_string(table.text),

66

probability=sigmoid(table.score)

67

)

68

```

69

70

### Aggregate UDFs

71

72

Create custom aggregation functions.

73

74

```python { .api }

75

@ibis.udf.aggregate(signature, **kwargs)

76

def aggregate_function_name():

77

"""

78

Decorator for aggregate UDFs.

79

80

Parameters:

81

- signature: function signature with input and output types

82

- **kwargs: additional UDF configuration

83

84

Returns:

85

Decorated aggregation function

86

"""

87

```

88

89

**Usage Examples:**

90

```python

91

# Custom aggregation

92

@udf.aggregate([int] -> float)

93

def geometric_mean(values):

94

import math

95

if not values:

96

return None

97

98

product = 1

99

for v in values:

100

if v <= 0:

101

return None

102

product *= v

103

104

return math.pow(product, 1/len(values))

105

106

# Use in aggregations

107

result = table.group_by('category').aggregate(

108

count=table.count(),

109

avg=table.value.mean(),

110

geom_mean=geometric_mean(table.value)

111

)

112

113

# Custom median implementation

114

@udf.aggregate([float] -> float)

115

def custom_median(values):

116

if not values:

117

return None

118

119

sorted_vals = sorted(values)

120

n = len(sorted_vals)

121

122

if n % 2 == 0:

123

return (sorted_vals[n//2 - 1] + sorted_vals[n//2]) / 2

124

else:

125

return sorted_vals[n//2]

126

```

127

128

### Elementwise UDFs

129

130

Apply functions element-wise to array or column values.

131

132

```python { .api }

133

@ibis.udf.elementwise(signature, **kwargs)

134

def elementwise_function():

135

"""

136

Decorator for elementwise UDFs.

137

138

Parameters:

139

- signature: function signature

140

- **kwargs: additional configuration

141

142

Returns:

143

Decorated elementwise function

144

"""

145

```

146

147

**Usage Examples:**

148

```python

149

# Array transformation

150

@udf.elementwise([int] -> [int])

151

def double_array_elements(arr):

152

return [x * 2 for x in arr] if arr else None

153

154

result = table.select(

155

'numbers',

156

doubled=double_array_elements(table.numbers)

157

)

158

159

# String array processing

160

@udf.elementwise([str] -> [str])

161

def uppercase_strings(strings):

162

return [s.upper() for s in strings] if strings else None

163

```

164

165

### Reduction UDFs

166

167

Create custom reduction operations.

168

169

```python { .api }

170

@ibis.udf.reduction(signature, **kwargs)

171

def reduction_function():

172

"""

173

Decorator for reduction UDFs.

174

175

Parameters:

176

- signature: function signature

177

- **kwargs: additional configuration

178

179

Returns:

180

Decorated reduction function

181

"""

182

```

183

184

**Usage Examples:**

185

```python

186

# Custom reduction

187

@udf.reduction([float] -> dict)

188

def statistics_summary(values):

189

if not values:

190

return None

191

192

import statistics

193

return {

194

'mean': statistics.mean(values),

195

'median': statistics.median(values),

196

'mode': statistics.mode(values) if len(set(values)) < len(values) else None,

197

'std': statistics.stdev(values) if len(values) > 1 else 0

198

}

199

200

result = table.aggregate(

201

stats=statistics_summary(table.scores)

202

)

203

```

204

205

### Analytic UDFs

206

207

Create custom analytic/window functions.

208

209

```python { .api }

210

@ibis.udf.analytic(signature, **kwargs)

211

def analytic_function():

212

"""

213

Decorator for analytic UDFs.

214

215

Parameters:

216

- signature: function signature

217

- **kwargs: additional configuration

218

219

Returns:

220

Decorated analytic function

221

"""

222

```

223

224

**Usage Examples:**

225

```python

226

# Custom ranking function

227

@udf.analytic([float] -> [int])

228

def custom_rank(values):

229

"""Custom ranking with specific tie-breaking logic."""

230

sorted_pairs = sorted(enumerate(values), key=lambda x: (-x[1], x[0]))

231

ranks = [0] * len(values)

232

233

for rank, (orig_idx, _) in enumerate(sorted_pairs, 1):

234

ranks[orig_idx] = rank

235

236

return ranks

237

238

# Use with window

239

result = table.select(

240

'name', 'score',

241

custom_ranking=custom_rank(table.score).over(

242

group_by='category',

243

order_by='score'

244

)

245

)

246

```

247

248

### UDF Type System

249

250

Specify precise types for UDF inputs and outputs.

251

252

```python { .api }

253

# Type specification examples

254

str -> str # String input, string output

255

(int, float) -> bool # Two inputs, boolean output

256

[int] -> float # Array input, scalar output

257

{str: int} -> [str] # Map input, array output

258

Optional[str] -> str # Nullable input

259

```

260

261

**Usage Examples:**

262

```python

263

# Complex type signatures

264

@udf.scalar((Optional[str], int) -> Optional[str])

265

def truncate_string(s, max_len):

266

if s is None:

267

return None

268

return s[:max_len] if len(s) > max_len else s

269

270

# Array operations

271

@udf.scalar([float] -> float)

272

def array_variance(arr):

273

if not arr or len(arr) < 2:

274

return None

275

276

mean = sum(arr) / len(arr)

277

return sum((x - mean) ** 2 for x in arr) / (len(arr) - 1)

278

279

# Map operations

280

@udf.scalar({str: int} -> int)

281

def map_sum(mapping):

282

return sum(mapping.values()) if mapping else 0

283

```

284

285

### Backend-Specific UDFs

286

287

Create UDFs optimized for specific backends.

288

289

**Usage Examples:**

290

```python

291

# Backend-specific implementation

292

@udf.scalar(str -> str, backend='postgres')

293

def postgres_upper(s):

294

# PostgreSQL-specific implementation

295

return s.upper() if s else None

296

297

@udf.scalar(str -> str, backend='bigquery')

298

def bigquery_upper(s):

299

# BigQuery-specific implementation

300

return s.upper() if s else None

301

302

# SQL-based UDF (for SQL backends)

303

@udf.scalar(int -> int, sql="CASE WHEN {0} > 0 THEN {0} ELSE 0 END")

304

def positive_only(x):

305

pass # Implementation provided via SQL

306

307

result = table.select(

308

positive_value=positive_only(table.value)

309

)

310

```

311

312

### UDF with External Dependencies

313

314

Use external libraries in UDFs.

315

316

**Usage Examples:**

317

```python

318

# UDF with external dependencies

319

@udf.scalar(str -> float)

320

def sentiment_score(text):

321

# This would require textblob to be available

322

try:

323

from textblob import TextBlob

324

if not text:

325

return 0.0

326

blob = TextBlob(text)

327

return blob.sentiment.polarity

328

except ImportError:

329

return 0.0 # Fallback if library not available

330

331

# JSON processing UDF

332

@udf.scalar(str -> dict)

333

def parse_json_safe(json_str):

334

import json

335

try:

336

return json.loads(json_str) if json_str else {}

337

except json.JSONDecodeError:

338

return {}

339

340

result = table.select(

341

'review_text',

342

sentiment=sentiment_score(table.review_text),

343

metadata=parse_json_safe(table.metadata_json)

344

)

345

```

346

347

### UDF Error Handling

348

349

Handle errors gracefully in UDFs.

350

351

**Usage Examples:**

352

```python

353

# Safe division UDF

354

@udf.scalar((float, float) -> Optional[float])

355

def safe_divide(a, b):

356

try:

357

return a / b if b != 0 else None

358

except (TypeError, ZeroDivisionError):

359

return None

360

361

# String processing with error handling

362

@udf.scalar(str -> Optional[str])

363

def extract_domain(email):

364

try:

365

if email and '@' in email:

366

return email.split('@')[1].lower()

367

return None

368

except (AttributeError, IndexError):

369

return None

370

371

result = table.select(

372

'email',

373

domain=extract_domain(table.email),

374

ratio=safe_divide(table.numerator, table.denominator)

375

)

376

```

377

378

### Performance Considerations

379

380

Tips for efficient UDF implementation.

381

382

**Usage Examples:**

383

```python

384

# Vectorized operations when possible

385

@udf.scalar([float] -> float)

386

def efficient_mean(values):

387

# Use numpy for better performance if available

388

try:

389

import numpy as np

390

return float(np.mean(values)) if values else None

391

except ImportError:

392

return sum(values) / len(values) if values else None

393

394

# Caching expensive computations

395

@udf.scalar(str -> str)

396

def expensive_transformation(text):

397

# Cache results for repeated calls

398

if not hasattr(expensive_transformation, 'cache'):

399

expensive_transformation.cache = {}

400

401

if text in expensive_transformation.cache:

402

return expensive_transformation.cache[text]

403

404

# Expensive computation here

405

result = text.upper() # Simplified example

406

expensive_transformation.cache[text] = result

407

return result

408

```