Tessl Tile for pypi/ibis-framework@10.8.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

aggregation-windows.md backends.md configuration.md expressions.md index.md selectors.md sql-integration.md table-construction.md table-operations.md temporal.md udfs.md

udfs.mddocs/

0
# User-Defined Functions
1

2
Comprehensive UDF system supporting scalar, aggregate, and analytic functions with type safety and backend compatibility.
3

4
## Capabilities
5

6
### Scalar UDFs
7

8
Create custom scalar functions that operate on individual values.
9

10
```python { .api }
11
@ibis.udf.scalar(signature, **kwargs)
12
def function_name(arg1, arg2, ...):
13
    """
14
    Decorator for scalar UDFs.
15
    
16
    Parameters:
17
    - signature: function signature with input and output types
18
    - **kwargs: additional UDF configuration
19
    
20
    Returns:
21
    Decorated function that can be used in expressions
22
    """
23
```
24

25
**Usage Examples:**
26
```python
27
import ibis
28
from ibis import udf
29

30
# Simple scalar UDF
31
@udf.scalar((int, int) -> int)
32
def add_one(x):
33
    return x + 1
34

35
# Use in expressions
36
result = table.select(
37
    'value',
38
    incremented=add_one(table.value)
39
)
40

41
# UDF with multiple parameters
42
@udf.scalar((str, str) -> str)
43
def concat_with_separator(a, b):
44
    return f"{a}|{b}"
45

46
result = table.select(
47
    combined=concat_with_separator(table.first_name, table.last_name)
48
)
49
```
50

51
### String manipulation UDF:
52
```python
53
@udf.scalar(str -> str)
54
def reverse_string(s):
55
    return s[::-1] if s else None
56

57
# Mathematical UDF
58
@udf.scalar(float -> float)
59
def sigmoid(x):
60
    import math
61
    return 1 / (1 + math.exp(-x))
62

63
result = table.select(
64
    'text',
65
    reversed=reverse_string(table.text),
66
    probability=sigmoid(table.score)
67
)
68
```
69

70
### Aggregate UDFs
71

72
Create custom aggregation functions.
73

74
```python { .api }
75
@ibis.udf.aggregate(signature, **kwargs)
76
def aggregate_function_name():
77
    """
78
    Decorator for aggregate UDFs.
79
    
80
    Parameters:
81
    - signature: function signature with input and output types
82
    - **kwargs: additional UDF configuration
83
    
84
    Returns:
85
    Decorated aggregation function
86
    """
87
```
88

89
**Usage Examples:**
90
```python
91
# Custom aggregation
92
@udf.aggregate([int] -> float)
93
def geometric_mean(values):
94
    import math
95
    if not values:
96
        return None
97
    
98
    product = 1
99
    for v in values:
100
        if v <= 0:
101
            return None
102
        product *= v
103
    
104
    return math.pow(product, 1/len(values))
105

106
# Use in aggregations
107
result = table.group_by('category').aggregate(
108
    count=table.count(),
109
    avg=table.value.mean(),
110
    geom_mean=geometric_mean(table.value)
111
)
112

113
# Custom median implementation
114
@udf.aggregate([float] -> float)
115
def custom_median(values):
116
    if not values:
117
        return None
118
    
119
    sorted_vals = sorted(values)
120
    n = len(sorted_vals)
121
    
122
    if n % 2 == 0:
123
        return (sorted_vals[n//2 - 1] + sorted_vals[n//2]) / 2
124
    else:
125
        return sorted_vals[n//2]
126
```
127

128
### Elementwise UDFs
129

130
Apply functions element-wise to array or column values.
131

132
```python { .api }
133
@ibis.udf.elementwise(signature, **kwargs)
134
def elementwise_function():
135
    """
136
    Decorator for elementwise UDFs.
137
    
138
    Parameters:
139
    - signature: function signature
140
    - **kwargs: additional configuration
141
    
142
    Returns:
143
    Decorated elementwise function
144
    """
145
```
146

147
**Usage Examples:**
148
```python
149
# Array transformation
150
@udf.elementwise([int] -> [int])
151
def double_array_elements(arr):
152
    return [x * 2 for x in arr] if arr else None
153

154
result = table.select(
155
    'numbers',
156
    doubled=double_array_elements(table.numbers)
157
)
158

159
# String array processing
160
@udf.elementwise([str] -> [str])
161
def uppercase_strings(strings):
162
    return [s.upper() for s in strings] if strings else None
163
```
164

165
### Reduction UDFs
166

167
Create custom reduction operations.
168

169
```python { .api }
170
@ibis.udf.reduction(signature, **kwargs)
171
def reduction_function():
172
    """
173
    Decorator for reduction UDFs.
174
    
175
    Parameters:
176
    - signature: function signature
177
    - **kwargs: additional configuration
178
    
179
    Returns:
180
    Decorated reduction function
181
    """
182
```
183

184
**Usage Examples:**
185
```python
186
# Custom reduction
187
@udf.reduction([float] -> dict)
188
def statistics_summary(values):
189
    if not values:
190
        return None
191
    
192
    import statistics
193
    return {
194
        'mean': statistics.mean(values),
195
        'median': statistics.median(values),
196
        'mode': statistics.mode(values) if len(set(values)) < len(values) else None,
197
        'std': statistics.stdev(values) if len(values) > 1 else 0
198
    }
199

200
result = table.aggregate(
201
    stats=statistics_summary(table.scores)
202
)
203
```
204

205
### Analytic UDFs
206

207
Create custom analytic/window functions.
208

209
```python { .api }
210
@ibis.udf.analytic(signature, **kwargs)
211
def analytic_function():
212
    """
213
    Decorator for analytic UDFs.
214
    
215
    Parameters:
216
    - signature: function signature
217
    - **kwargs: additional configuration
218
    
219
    Returns:
220
    Decorated analytic function
221
    """
222
```
223

224
**Usage Examples:**
225
```python
226
# Custom ranking function
227
@udf.analytic([float] -> [int])
228
def custom_rank(values):
229
    """Custom ranking with specific tie-breaking logic."""
230
    sorted_pairs = sorted(enumerate(values), key=lambda x: (-x[1], x[0]))
231
    ranks = [0] * len(values)
232
    
233
    for rank, (orig_idx, _) in enumerate(sorted_pairs, 1):
234
        ranks[orig_idx] = rank
235
    
236
    return ranks
237

238
# Use with window
239
result = table.select(
240
    'name', 'score',
241
    custom_ranking=custom_rank(table.score).over(
242
        group_by='category',
243
        order_by='score'
244
    )
245
)
246
```
247

248
### UDF Type System
249

250
Specify precise types for UDF inputs and outputs.
251

252
```python { .api }
253
# Type specification examples
254
str -> str                    # String input, string output
255
(int, float) -> bool         # Two inputs, boolean output
256
[int] -> float               # Array input, scalar output
257
{str: int} -> [str]          # Map input, array output
258
Optional[str] -> str         # Nullable input
259
```
260

261
**Usage Examples:**
262
```python
263
# Complex type signatures
264
@udf.scalar((Optional[str], int) -> Optional[str])
265
def truncate_string(s, max_len):
266
    if s is None:
267
        return None
268
    return s[:max_len] if len(s) > max_len else s
269

270
# Array operations
271
@udf.scalar([float] -> float)
272
def array_variance(arr):
273
    if not arr or len(arr) < 2:
274
        return None
275
    
276
    mean = sum(arr) / len(arr)
277
    return sum((x - mean) ** 2 for x in arr) / (len(arr) - 1)
278

279
# Map operations
280
@udf.scalar({str: int} -> int)
281
def map_sum(mapping):
282
    return sum(mapping.values()) if mapping else 0
283
```
284

285
### Backend-Specific UDFs
286

287
Create UDFs optimized for specific backends.
288

289
**Usage Examples:**
290
```python
291
# Backend-specific implementation
292
@udf.scalar(str -> str, backend='postgres')
293
def postgres_upper(s):
294
    # PostgreSQL-specific implementation
295
    return s.upper() if s else None
296

297
@udf.scalar(str -> str, backend='bigquery') 
298
def bigquery_upper(s):
299
    # BigQuery-specific implementation
300
    return s.upper() if s else None
301

302
# SQL-based UDF (for SQL backends)
303
@udf.scalar(int -> int, sql="CASE WHEN {0} > 0 THEN {0} ELSE 0 END")
304
def positive_only(x):
305
    pass  # Implementation provided via SQL
306

307
result = table.select(
308
    positive_value=positive_only(table.value)
309
)
310
```
311

312
### UDF with External Dependencies
313

314
Use external libraries in UDFs.
315

316
**Usage Examples:**
317
```python
318
# UDF with external dependencies
319
@udf.scalar(str -> float)
320
def sentiment_score(text):
321
    # This would require textblob to be available
322
    try:
323
        from textblob import TextBlob
324
        if not text:
325
            return 0.0
326
        blob = TextBlob(text)
327
        return blob.sentiment.polarity
328
    except ImportError:
329
        return 0.0  # Fallback if library not available
330

331
# JSON processing UDF
332
@udf.scalar(str -> dict)
333
def parse_json_safe(json_str):
334
    import json
335
    try:
336
        return json.loads(json_str) if json_str else {}
337
    except json.JSONDecodeError:
338
        return {}
339

340
result = table.select(
341
    'review_text',
342
    sentiment=sentiment_score(table.review_text),
343
    metadata=parse_json_safe(table.metadata_json)
344
)
345
```
346

347
### UDF Error Handling
348

349
Handle errors gracefully in UDFs.
350

351
**Usage Examples:**
352
```python
353
# Safe division UDF
354
@udf.scalar((float, float) -> Optional[float])
355
def safe_divide(a, b):
356
    try:
357
        return a / b if b != 0 else None
358
    except (TypeError, ZeroDivisionError):
359
        return None
360

361
# String processing with error handling
362
@udf.scalar(str -> Optional[str])
363
def extract_domain(email):
364
    try:
365
        if email and '@' in email:
366
            return email.split('@')[1].lower()
367
        return None
368
    except (AttributeError, IndexError):
369
        return None
370

371
result = table.select(
372
    'email',
373
    domain=extract_domain(table.email),
374
    ratio=safe_divide(table.numerator, table.denominator)
375
)
376
```
377

378
### Performance Considerations
379

380
Tips for efficient UDF implementation.
381

382
**Usage Examples:**
383
```python
384
# Vectorized operations when possible
385
@udf.scalar([float] -> float)
386
def efficient_mean(values):
387
    # Use numpy for better performance if available
388
    try:
389
        import numpy as np
390
        return float(np.mean(values)) if values else None
391
    except ImportError:
392
        return sum(values) / len(values) if values else None
393

394
# Caching expensive computations
395
@udf.scalar(str -> str)
396
def expensive_transformation(text):
397
    # Cache results for repeated calls
398
    if not hasattr(expensive_transformation, 'cache'):
399
        expensive_transformation.cache = {}
400
    
401
    if text in expensive_transformation.cache:
402
        return expensive_transformation.cache[text]
403
    
404
    # Expensive computation here
405
    result = text.upper()  # Simplified example
406
    expensive_transformation.cache[text] = result
407
    return result
408
```

Version

Tile

Files

udfs.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

udfs.mddocs/