0
# Compute Functions
1
2
High-performance vectorized compute operations providing 200+ functions optimized for columnar data. The compute engine enables efficient mathematical operations, string processing, temporal calculations, aggregations, and filtering on Arrow arrays and tables.
3
4
## Capabilities
5
6
### Core Compute Infrastructure
7
8
Function registration, execution, and expression system for building complex computational pipelines with lazy evaluation and optimization.
9
10
```python { .api }
11
def call_function(name, args, options=None, memory_pool=None):
12
"""
13
Call compute function by name.
14
15
Parameters:
16
- name: str, function name
17
- args: list, function arguments (arrays, scalars, tables)
18
- options: FunctionOptions, function-specific options
19
- memory_pool: MemoryPool, memory pool for allocations
20
21
Returns:
22
Array, Scalar, or Table: Result of computation
23
"""
24
25
def get_function(name):
26
"""
27
Get registered function by name.
28
29
Parameters:
30
- name: str, function name
31
32
Returns:
33
Function: Registered function object
34
"""
35
36
def list_functions():
37
"""
38
List all available function names.
39
40
Returns:
41
list of str: Available function names
42
"""
43
44
def function_registry():
45
"""
46
Get global function registry.
47
48
Returns:
49
FunctionRegistry: Global function registry
50
"""
51
52
class Expression:
53
"""
54
Compute expression for lazy evaluation and optimization.
55
"""
56
57
def equals(self, other):
58
"""Check equality with another expression."""
59
60
def to_string(self):
61
"""String representation of expression."""
62
63
def field(name):
64
"""
65
Create field reference expression.
66
67
Parameters:
68
- name: str, field name
69
70
Returns:
71
Expression: Field reference expression
72
"""
73
74
def scalar(value):
75
"""
76
Create scalar literal expression.
77
78
Parameters:
79
- value: scalar value
80
81
Returns:
82
Expression: Scalar literal expression
83
"""
84
85
class Function:
86
"""Base class for compute functions."""
87
88
@property
89
def name(self):
90
"""Function name."""
91
92
@property
93
def arity(self):
94
"""Function arity (number of arguments)."""
95
96
@property
97
def doc(self):
98
"""Function documentation."""
99
100
class FunctionOptions:
101
"""Base class for function options."""
102
103
class FunctionRegistry:
104
"""Registry of available compute functions."""
105
106
def get_function(self, name):
107
"""Get function by name."""
108
109
def get_function_names(self):
110
"""Get all function names."""
111
```
112
113
### Mathematical Operations
114
115
Arithmetic operations, mathematical functions, and numeric computations optimized for columnar data processing.
116
117
```python { .api }
118
# Arithmetic operations
119
def add(x, y):
120
"""Element-wise addition."""
121
122
def subtract(x, y):
123
"""Element-wise subtraction."""
124
125
def multiply(x, y):
126
"""Element-wise multiplication."""
127
128
def divide(x, y):
129
"""Element-wise division."""
130
131
def power(base, exponent):
132
"""Element-wise exponentiation."""
133
134
def negate(x):
135
"""Element-wise negation."""
136
137
def abs(x):
138
"""Element-wise absolute value."""
139
140
def sign(x):
141
"""Element-wise sign (-1, 0, 1)."""
142
143
# Mathematical functions
144
def sqrt(x):
145
"""Element-wise square root."""
146
147
def exp(x):
148
"""Element-wise exponential (e^x)."""
149
150
def ln(x):
151
"""Element-wise natural logarithm."""
152
153
def log10(x):
154
"""Element-wise base-10 logarithm."""
155
156
def log2(x):
157
"""Element-wise base-2 logarithm."""
158
159
def log1p(x):
160
"""Element-wise log(1 + x)."""
161
162
def floor(x):
163
"""Element-wise floor."""
164
165
def ceil(x):
166
"""Element-wise ceiling."""
167
168
def trunc(x):
169
"""Element-wise truncation toward zero."""
170
171
def round(x, ndigits=0, round_mode='half_to_even'):
172
"""
173
Element-wise rounding.
174
175
Parameters:
176
- x: Array, input array
177
- ndigits: int, number of decimal places
178
- round_mode: str, rounding mode
179
180
Returns:
181
Array: Rounded array
182
"""
183
184
# Trigonometric functions
185
def sin(x):
186
"""Element-wise sine."""
187
188
def cos(x):
189
"""Element-wise cosine."""
190
191
def tan(x):
192
"""Element-wise tangent."""
193
194
def asin(x):
195
"""Element-wise arcsine."""
196
197
def acos(x):
198
"""Element-wise arccosine."""
199
200
def atan(x):
201
"""Element-wise arctangent."""
202
203
def atan2(y, x):
204
"""Element-wise arctangent of y/x."""
205
206
# Bitwise operations
207
def bit_wise_and(x, y):
208
"""Element-wise bitwise AND."""
209
210
def bit_wise_or(x, y):
211
"""Element-wise bitwise OR."""
212
213
def bit_wise_xor(x, y):
214
"""Element-wise bitwise XOR."""
215
216
def bit_wise_not(x):
217
"""Element-wise bitwise NOT."""
218
219
def shift_left(x, y):
220
"""Element-wise left bit shift."""
221
222
def shift_right(x, y):
223
"""Element-wise right bit shift."""
224
```
225
226
### Comparison and Logical Operations
227
228
Element-wise comparisons, logical operations, and boolean functions for filtering and conditional logic.
229
230
```python { .api }
231
# Comparison operations
232
def equal(x, y):
233
"""Element-wise equality comparison."""
234
235
def not_equal(x, y):
236
"""Element-wise inequality comparison."""
237
238
def less(x, y):
239
"""Element-wise less than comparison."""
240
241
def less_equal(x, y):
242
"""Element-wise less than or equal comparison."""
243
244
def greater(x, y):
245
"""Element-wise greater than comparison."""
246
247
def greater_equal(x, y):
248
"""Element-wise greater than or equal comparison."""
249
250
# Logical operations
251
def and_(x, y):
252
"""Element-wise logical AND."""
253
254
def or_(x, y):
255
"""Element-wise logical OR."""
256
257
def xor(x, y):
258
"""Element-wise logical XOR."""
259
260
def invert(x):
261
"""Element-wise logical NOT."""
262
263
# Null handling
264
def is_null(x):
265
"""Check for null values."""
266
267
def is_valid(x):
268
"""Check for non-null values."""
269
270
def is_nan(x):
271
"""Check for NaN values (floating point)."""
272
273
def is_finite(x):
274
"""Check for finite values."""
275
276
def is_infinite(x):
277
"""Check for infinite values."""
278
279
def fill_null(values, fill_value):
280
"""Fill null values with specified value."""
281
282
def coalesce(*arrays):
283
"""Return first non-null value from arrays."""
284
285
def choose(indices, *arrays):
286
"""Choose values from arrays based on indices."""
287
288
def if_else(condition, left, right):
289
"""Conditional selection (ternary operator)."""
290
291
def case_when(*args):
292
"""
293
Multi-branch conditional selection.
294
295
Parameters:
296
- args: alternating condition/value pairs, optional else value
297
298
Returns:
299
Array: Selected values based on conditions
300
"""
301
```
302
303
### Aggregation Functions
304
305
Statistical and aggregation functions for computing summary statistics and reductions over arrays and groups.
306
307
```python { .api }
308
# Basic aggregations
309
def sum(array, skip_nulls=True, min_count=1):
310
"""
311
Sum of array elements.
312
313
Parameters:
314
- array: Array, input array
315
- skip_nulls: bool, ignore null values
316
- min_count: int, minimum non-null values required
317
318
Returns:
319
Scalar: Sum of elements
320
"""
321
322
def mean(array, skip_nulls=True, min_count=1):
323
"""Mean of array elements."""
324
325
def count(array, mode='only_valid'):
326
"""
327
Count array elements.
328
329
Parameters:
330
- array: Array, input array
331
- mode: str, counting mode ('only_valid', 'only_null', 'all')
332
333
Returns:
334
Scalar: Count of elements
335
"""
336
337
def count_distinct(array, mode='only_valid'):
338
"""Count distinct elements."""
339
340
def min(array, skip_nulls=True, min_count=1):
341
"""Minimum value."""
342
343
def max(array, skip_nulls=True, min_count=1):
344
"""Maximum value."""
345
346
def min_max(array, skip_nulls=True, min_count=1):
347
"""
348
Minimum and maximum values.
349
350
Returns:
351
StructScalar: Struct with 'min' and 'max' fields
352
"""
353
354
def any(array, skip_nulls=True, min_count=1):
355
"""Logical OR reduction (any true values)."""
356
357
def all(array, skip_nulls=True, min_count=1):
358
"""Logical AND reduction (all true values)."""
359
360
# Statistical functions
361
def variance(array, ddof=0, skip_nulls=True, min_count=1):
362
"""
363
Variance of array elements.
364
365
Parameters:
366
- array: Array, input array
367
- ddof: int, delta degrees of freedom
368
- skip_nulls: bool, ignore null values
369
- min_count: int, minimum non-null values required
370
371
Returns:
372
Scalar: Variance
373
"""
374
375
def stddev(array, ddof=0, skip_nulls=True, min_count=1):
376
"""Standard deviation."""
377
378
def quantile(array, q=0.5, interpolation='linear', skip_nulls=True, min_count=1):
379
"""
380
Quantile of array elements.
381
382
Parameters:
383
- array: Array, input array
384
- q: float or list, quantile(s) to compute (0.0 to 1.0)
385
- interpolation: str, interpolation method
386
- skip_nulls: bool, ignore null values
387
- min_count: int, minimum non-null values required
388
389
Returns:
390
Scalar or Array: Quantile value(s)
391
"""
392
393
def mode(array, n=1, skip_nulls=True, min_count=1):
394
"""
395
Mode (most frequent values).
396
397
Parameters:
398
- array: Array, input array
399
- n: int, number of modes to return
400
- skip_nulls: bool, ignore null values
401
- min_count: int, minimum non-null values required
402
403
Returns:
404
StructArray: Modes with counts
405
"""
406
407
def tdigest(array, q=None, delta=100, buffer_size=500, skip_nulls=True, min_count=1):
408
"""
409
T-Digest quantile approximation.
410
411
Parameters:
412
- array: Array, input array
413
- q: list of float, quantiles to compute
414
- delta: int, compression parameter
415
- buffer_size: int, buffer size
416
- skip_nulls: bool, ignore null values
417
- min_count: int, minimum non-null values required
418
419
Returns:
420
Array: Approximate quantiles
421
"""
422
423
# Product and cumulative operations
424
def product(array, skip_nulls=True, min_count=1):
425
"""Product of array elements."""
426
427
def cumulative_sum(array, start=None, skip_nulls=True):
428
"""
429
Cumulative sum.
430
431
Parameters:
432
- array: Array, input array
433
- start: scalar, starting value
434
- skip_nulls: bool, ignore null values
435
436
Returns:
437
Array: Cumulative sums
438
"""
439
440
def cumulative_sum_checked(array, start=None, skip_nulls=True):
441
"""Cumulative sum with overflow checking."""
442
443
def cumulative_prod(array, start=None, skip_nulls=True):
444
"""Cumulative product."""
445
446
def cumulative_max(array, skip_nulls=True):
447
"""Cumulative maximum."""
448
449
def cumulative_min(array, skip_nulls=True):
450
"""Cumulative minimum."""
451
```
452
453
### Array Operations
454
455
Functions for array manipulation, filtering, sorting, and selection operations.
456
457
```python { .api }
458
def take(data, indices, boundscheck=True):
459
"""
460
Select elements by indices.
461
462
Parameters:
463
- data: Array, input array
464
- indices: Array, selection indices
465
- boundscheck: bool, check index bounds
466
467
Returns:
468
Array: Selected elements
469
"""
470
471
def filter(data, selection_filter, null_selection_behavior='drop'):
472
"""
473
Filter array by boolean mask.
474
475
Parameters:
476
- data: Array, input array
477
- selection_filter: Array, boolean selection mask
478
- null_selection_behavior: str, how to handle nulls in mask
479
480
Returns:
481
Array: Filtered elements
482
"""
483
484
def slice(array, start, stop=None, step=1):
485
"""
486
Slice array.
487
488
Parameters:
489
- array: Array, input array
490
- start: int, start index
491
- stop: int, stop index (exclusive)
492
- step: int, step size
493
494
Returns:
495
Array: Sliced array
496
"""
497
498
def array_sort_indices(array, order='ascending', null_placement='at_end'):
499
"""
500
Get indices that would sort array.
501
502
Parameters:
503
- array: Array, input array
504
- order: str, sort order ('ascending', 'descending')
505
- null_placement: str, null placement ('at_start', 'at_end')
506
507
Returns:
508
Array: Sort indices
509
"""
510
511
def sort_indices(arrays, orders=None, null_placement=None):
512
"""
513
Get indices for sorting by multiple arrays.
514
515
Parameters:
516
- arrays: list of Array, sort keys
517
- orders: list of str, sort orders for each key
518
- null_placement: list of str, null placement for each key
519
520
Returns:
521
Array: Sort indices
522
"""
523
524
def partition_nth_indices(array, pivot, null_placement='at_end'):
525
"""
526
Partition array around nth element.
527
528
Parameters:
529
- array: Array, input array
530
- pivot: int, pivot index
531
- null_placement: str, null placement
532
533
Returns:
534
Array: Partition indices
535
"""
536
537
def top_k_unstable(array, k, sort_keys=None):
538
"""
539
Select top k elements (unstable sort).
540
541
Parameters:
542
- array: Array, input array
543
- k: int, number of elements to select
544
- sort_keys: list, sort keys for selection
545
546
Returns:
547
Array: Top k elements
548
"""
549
550
def bottom_k_unstable(array, k, sort_keys=None):
551
"""
552
Select bottom k elements (unstable sort).
553
554
Parameters:
555
- array: Array, input array
556
- k: int, number of elements to select
557
- sort_keys: list, sort keys for selection
558
559
Returns:
560
Array: Bottom k elements
561
"""
562
563
def unique(array):
564
"""
565
Get unique values.
566
567
Parameters:
568
- array: Array, input array
569
570
Returns:
571
Array: Unique values
572
"""
573
574
def value_counts(array):
575
"""
576
Count occurrences of each value.
577
578
Parameters:
579
- array: Array, input array
580
581
Returns:
582
StructArray: Values and their counts
583
"""
584
585
def dictionary_encode(array, null_encoding_behavior='mask'):
586
"""
587
Dictionary encode array.
588
589
Parameters:
590
- array: Array, input array
591
- null_encoding_behavior: str, null handling
592
593
Returns:
594
DictionaryArray: Dictionary encoded array
595
"""
596
597
def run_end_encode(array):
598
"""
599
Run-end encode array.
600
601
Parameters:
602
- array: Array, input array
603
604
Returns:
605
RunEndEncodedArray: Run-end encoded array
606
"""
607
```
608
609
### String Functions
610
611
Comprehensive string processing functions for text manipulation, pattern matching, and string transformations.
612
613
```python { .api }
614
# String length and properties
615
def utf8_length(strings):
616
"""UTF-8 character length of strings."""
617
618
def binary_length(strings):
619
"""Byte length of binary/string arrays."""
620
621
def utf8_is_alnum(strings):
622
"""Check if strings are alphanumeric."""
623
624
def utf8_is_alpha(strings):
625
"""Check if strings are alphabetic."""
626
627
def utf8_is_decimal(strings):
628
"""Check if strings are decimal."""
629
630
def utf8_is_digit(strings):
631
"""Check if strings contain only digits."""
632
633
def utf8_is_lower(strings):
634
"""Check if strings are lowercase."""
635
636
def utf8_is_numeric(strings):
637
"""Check if strings are numeric."""
638
639
def utf8_is_printable(strings):
640
"""Check if strings are printable."""
641
642
def utf8_is_space(strings):
643
"""Check if strings are whitespace."""
644
645
def utf8_is_title(strings):
646
"""Check if strings are titlecased."""
647
648
def utf8_is_upper(strings):
649
"""Check if strings are uppercase."""
650
651
# String transformations
652
def utf8_upper(strings):
653
"""Convert strings to uppercase."""
654
655
def utf8_lower(strings):
656
"""Convert strings to lowercase."""
657
658
def utf8_swapcase(strings):
659
"""Swap case of strings."""
660
661
def utf8_capitalize(strings):
662
"""Capitalize first character."""
663
664
def utf8_title(strings):
665
"""Convert to title case."""
666
667
def ascii_upper(strings):
668
"""Convert ASCII strings to uppercase."""
669
670
def ascii_lower(strings):
671
"""Convert ASCII strings to lowercase."""
672
673
def ascii_swapcase(strings):
674
"""Swap case of ASCII strings."""
675
676
def ascii_capitalize(strings):
677
"""Capitalize ASCII strings."""
678
679
# String padding and trimming
680
def utf8_ltrim(strings, characters=' '):
681
"""
682
Left trim strings.
683
684
Parameters:
685
- strings: Array, input strings
686
- characters: str, characters to trim
687
688
Returns:
689
Array: Left-trimmed strings
690
"""
691
692
def utf8_rtrim(strings, characters=' '):
693
"""Right trim strings."""
694
695
def utf8_trim(strings, characters=' '):
696
"""Trim strings from both ends."""
697
698
def utf8_ltrim_whitespace(strings):
699
"""Left trim whitespace."""
700
701
def utf8_rtrim_whitespace(strings):
702
"""Right trim whitespace."""
703
704
def utf8_trim_whitespace(strings):
705
"""Trim whitespace from both ends."""
706
707
def utf8_center(strings, width, padding=' '):
708
"""
709
Center strings with padding.
710
711
Parameters:
712
- strings: Array, input strings
713
- width: int, total width
714
- padding: str, padding character
715
716
Returns:
717
Array: Centered strings
718
"""
719
720
def utf8_lpad(strings, width, padding=' '):
721
"""Left pad strings."""
722
723
def utf8_rpad(strings, width, padding=' '):
724
"""Right pad strings."""
725
726
# String slicing and extraction
727
def utf8_slice_codeunits(strings, start, stop=None, step=1):
728
"""
729
Slice strings by code units.
730
731
Parameters:
732
- strings: Array, input strings
733
- start: int, start position
734
- stop: int, stop position
735
- step: int, step size
736
737
Returns:
738
Array: Sliced strings
739
"""
740
741
def utf8_reverse(strings):
742
"""Reverse strings."""
743
744
def utf8_replace_slice(strings, start, stop, replacement):
745
"""
746
Replace slice of strings.
747
748
Parameters:
749
- strings: Array, input strings
750
- start: int, start position
751
- stop: int, stop position
752
- replacement: str, replacement string
753
754
Returns:
755
Array: Strings with replaced slices
756
"""
757
758
# String searching and matching
759
def match_substring(strings, pattern, ignore_case=False):
760
"""
761
Check if strings contain substring.
762
763
Parameters:
764
- strings: Array, input strings
765
- pattern: str, substring pattern
766
- ignore_case: bool, case insensitive matching
767
768
Returns:
769
BooleanArray: Match results
770
"""
771
772
def match_substring_regex(strings, pattern, ignore_case=False):
773
"""
774
Check if strings match regex pattern.
775
776
Parameters:
777
- strings: Array, input strings
778
- pattern: str, regex pattern
779
- ignore_case: bool, case insensitive matching
780
781
Returns:
782
BooleanArray: Match results
783
"""
784
785
def find_substring(strings, pattern, ignore_case=False):
786
"""
787
Find first occurrence of substring.
788
789
Parameters:
790
- strings: Array, input strings
791
- pattern: str, substring pattern
792
- ignore_case: bool, case insensitive search
793
794
Returns:
795
Int32Array: First occurrence indices (-1 if not found)
796
"""
797
798
def find_substring_regex(strings, pattern, ignore_case=False):
799
"""Find first regex match."""
800
801
def count_substring(strings, pattern, ignore_case=False):
802
"""
803
Count occurrences of substring.
804
805
Parameters:
806
- strings: Array, input strings
807
- pattern: str, substring pattern
808
- ignore_case: bool, case insensitive counting
809
810
Returns:
811
Int32Array: Occurrence counts
812
"""
813
814
def count_substring_regex(strings, pattern, ignore_case=False):
815
"""Count regex matches."""
816
817
# String replacement
818
def replace_substring(strings, pattern, replacement, max_replacements=-1):
819
"""
820
Replace substring occurrences.
821
822
Parameters:
823
- strings: Array, input strings
824
- pattern: str, substring to replace
825
- replacement: str, replacement string
826
- max_replacements: int, maximum replacements (-1 for all)
827
828
Returns:
829
Array: Strings with replacements
830
"""
831
832
def replace_substring_regex(strings, pattern, replacement, max_replacements=-1):
833
"""Replace regex matches."""
834
835
def extract_regex(strings, pattern):
836
"""
837
Extract regex groups.
838
839
Parameters:
840
- strings: Array, input strings
841
- pattern: str, regex pattern with groups
842
843
Returns:
844
StructArray: Extracted groups
845
"""
846
847
# String splitting and joining
848
def split_pattern(strings, pattern, max_splits=-1, reverse=False):
849
"""
850
Split strings by pattern.
851
852
Parameters:
853
- strings: Array, input strings
854
- pattern: str, split pattern
855
- max_splits: int, maximum splits (-1 for unlimited)
856
- reverse: bool, split from right
857
858
Returns:
859
ListArray: Split components
860
"""
861
862
def split_pattern_regex(strings, pattern, max_splits=-1, reverse=False):
863
"""Split strings by regex pattern."""
864
865
def binary_join(lists, separator):
866
"""
867
Join binary arrays with separator.
868
869
Parameters:
870
- lists: ListArray, lists of binary values
871
- separator: bytes, join separator
872
873
Returns:
874
Array: Joined binary values
875
"""
876
877
def binary_join_element_wise(left, right, separator):
878
"""Element-wise binary join."""
879
```
880
881
### Temporal Functions
882
883
Date, time, and timestamp manipulation functions for temporal data processing and calendar operations.
884
885
```python { .api }
886
# Date/time extraction
887
def year(timestamps):
888
"""Extract year from timestamps."""
889
890
def month(timestamps):
891
"""Extract month from timestamps."""
892
893
def day(timestamps):
894
"""Extract day from timestamps."""
895
896
def day_of_week(timestamps, count_from_zero=True, week_start=1):
897
"""
898
Extract day of week.
899
900
Parameters:
901
- timestamps: Array, timestamp array
902
- count_from_zero: bool, whether to count from 0
903
- week_start: int, first day of week (1=Monday, 7=Sunday)
904
905
Returns:
906
Int32Array: Day of week values
907
"""
908
909
def day_of_year(timestamps):
910
"""Extract day of year."""
911
912
def iso_week(timestamps):
913
"""Extract ISO week number."""
914
915
def iso_year(timestamps):
916
"""Extract ISO year."""
917
918
def quarter(timestamps):
919
"""Extract quarter."""
920
921
def hour(timestamps):
922
"""Extract hour from timestamps."""
923
924
def minute(timestamps):
925
"""Extract minute from timestamps."""
926
927
def second(timestamps):
928
"""Extract second from timestamps."""
929
930
def millisecond(timestamps):
931
"""Extract millisecond from timestamps."""
932
933
def microsecond(timestamps):
934
"""Extract microsecond from timestamps."""
935
936
def nanosecond(timestamps):
937
"""Extract nanosecond from timestamps."""
938
939
def subsecond(timestamps):
940
"""Extract fractional seconds."""
941
942
# Temporal arithmetic
943
def years_between(start, end):
944
"""Calculate years between timestamps."""
945
946
def month_interval_between(start, end):
947
"""Calculate month intervals between timestamps."""
948
949
def day_time_interval_between(start, end):
950
"""Calculate day-time intervals between timestamps."""
951
952
def weeks_between(start, end):
953
"""Calculate weeks between timestamps."""
954
955
def days_between(start, end):
956
"""Calculate days between timestamps."""
957
958
def hours_between(start, end):
959
"""Calculate hours between timestamps."""
960
961
def minutes_between(start, end):
962
"""Calculate minutes between timestamps."""
963
964
def seconds_between(start, end):
965
"""Calculate seconds between timestamps."""
966
967
def milliseconds_between(start, end):
968
"""Calculate milliseconds between timestamps."""
969
970
def microseconds_between(start, end):
971
"""Calculate microseconds between timestamps."""
972
973
def nanoseconds_between(start, end):
974
"""Calculate nanoseconds between timestamps."""
975
976
# Temporal rounding and truncation
977
def floor_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False):
978
"""
979
Floor timestamps to temporal unit.
980
981
Parameters:
982
- timestamps: Array, timestamp array
983
- unit: str, temporal unit ('year', 'month', 'day', 'hour', etc.)
984
- week_starts_monday: bool, week start day
985
- ceil_is_strictly_greater: bool, ceiling behavior
986
- calendar_based_origin: bool, use calendar-based origin
987
988
Returns:
989
Array: Floored timestamps
990
"""
991
992
def ceil_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False):
993
"""Ceil timestamps to temporal unit."""
994
995
def round_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False):
996
"""Round timestamps to temporal unit."""
997
998
# String parsing and formatting
999
def strftime(timestamps, format='%Y-%m-%d %H:%M:%S', locale='C'):
1000
"""
1001
Format timestamps as strings.
1002
1003
Parameters:
1004
- timestamps: Array, timestamp array
1005
- format: str, strftime format string
1006
- locale: str, locale for formatting
1007
1008
Returns:
1009
StringArray: Formatted timestamp strings
1010
"""
1011
1012
def strptime(strings, format, unit, error_is_null=False):
1013
"""
1014
Parse strings as timestamps.
1015
1016
Parameters:
1017
- strings: Array, string array
1018
- format: str, strptime format string
1019
- unit: str, timestamp unit
1020
- error_is_null: bool, return null on parse errors
1021
1022
Returns:
1023
TimestampArray: Parsed timestamps
1024
"""
1025
1026
# Timezone operations
1027
def assume_timezone(timestamps, timezone, ambiguous='raise', nonexistent='raise'):
1028
"""
1029
Assume timezone for naive timestamps.
1030
1031
Parameters:
1032
- timestamps: Array, naive timestamp array
1033
- timezone: str, timezone identifier
1034
- ambiguous: str, how to handle ambiguous times
1035
- nonexistent: str, how to handle nonexistent times
1036
1037
Returns:
1038
TimestampArray: Timezone-aware timestamps
1039
"""
1040
1041
def local_timestamp(timestamps):
1042
"""Convert to local timezone."""
1043
```
1044
1045
### Type Conversion Functions
1046
1047
Functions for casting and converting between different Arrow data types with configurable safety and behavior options.
1048
1049
```python { .api }
1050
def cast(array, target_type, safe=True, options=None):
1051
"""
1052
Cast array to different type.
1053
1054
Parameters:
1055
- array: Array, input array
1056
- target_type: DataType, target type
1057
- safe: bool, check for data loss
1058
- options: CastOptions, casting options
1059
1060
Returns:
1061
Array: Cast array
1062
"""
1063
1064
def can_cast(from_type, to_type):
1065
"""
1066
Check if type can be cast.
1067
1068
Parameters:
1069
- from_type: DataType, source type
1070
- to_type: DataType, target type
1071
1072
Returns:
1073
bool: Whether cast is supported
1074
"""
1075
1076
class CastOptions:
1077
"""
1078
Options for type casting.
1079
1080
Attributes:
1081
- safe: Whether to check for data loss
1082
- allow_int_overflow: Allow integer overflow
1083
- allow_time_truncate: Allow time truncation
1084
- allow_time_overflow: Allow time overflow
1085
- allow_decimal_truncate: Allow decimal truncation
1086
- allow_float_truncate: Allow float truncation
1087
"""
1088
```
1089
1090
### Random Number Generation
1091
1092
Functions for generating random numbers and sampling from distributions.
1093
1094
```python { .api }
1095
def random(n, initializer=None, options=None):
1096
"""
1097
Generate random numbers.
1098
1099
Parameters:
1100
- n: int, number of random values
1101
- initializer: int, random seed
1102
- options: RandomOptions, generation options
1103
1104
Returns:
1105
Array: Random values
1106
"""
1107
1108
class RandomOptions:
1109
"""
1110
Options for random number generation.
1111
1112
Attributes:
1113
- initializer: Random seed
1114
- distribution: Distribution type
1115
"""
1116
```
1117
1118
## Usage Examples
1119
1120
### Basic Computations
1121
1122
```python
1123
import pyarrow as pa
1124
import pyarrow.compute as pc
1125
1126
# Create sample data
1127
numbers = pa.array([1, 2, 3, 4, 5, None, 7, 8, 9, 10])
1128
strings = pa.array(['apple', 'banana', 'cherry', None, 'date'])
1129
1130
# Arithmetic operations
1131
doubled = pc.multiply(numbers, 2)
1132
sum_result = pc.sum(numbers)
1133
mean_result = pc.mean(numbers)
1134
1135
# String operations
1136
lengths = pc.utf8_length(strings)
1137
upper_strings = pc.utf8_upper(strings)
1138
contains_a = pc.match_substring(strings, 'a')
1139
1140
# Filtering and selection
1141
filtered = pc.filter(numbers, pc.greater(numbers, 5))
1142
top_3 = pc.top_k_unstable(numbers, 3)
1143
```
1144
1145
### Table Operations
1146
1147
```python
1148
import pyarrow as pa
1149
import pyarrow.compute as pc
1150
1151
# Create table
1152
table = pa.table({
1153
'id': [1, 2, 3, 4, 5],
1154
'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
1155
'age': [25, 30, 35, 28, 32],
1156
'salary': [50000, 60000, 70000, 55000, 65000]
1157
})
1158
1159
# Filter table
1160
adults = table.filter(pc.greater_equal(table['age'], 30))
1161
1162
# Add computed column
1163
table_with_bonus = table.add_column(
1164
'bonus',
1165
pc.multiply(table['salary'], 0.1)
1166
)
1167
1168
# Aggregations
1169
total_salary = pc.sum(table['salary'])
1170
avg_age = pc.mean(table['age'])
1171
age_stats = pc.quantile(table['age'], [0.25, 0.5, 0.75])
1172
```
1173
1174
### Complex Expressions
1175
1176
```python
1177
import pyarrow as pa
1178
import pyarrow.compute as pc
1179
1180
# Create table with temporal data
1181
table = pa.table({
1182
'timestamp': pa.array([
1183
'2023-01-15 10:30:00',
1184
'2023-02-20 14:45:00',
1185
'2023-03-10 09:15:00',
1186
'2023-04-05 16:20:00'
1187
], type=pa.timestamp('s')),
1188
'value': [100, 200, 150, 300]
1189
})
1190
1191
# Extract temporal components
1192
table = table.add_column('year', pc.year(table['timestamp']))
1193
table = table.add_column('month', pc.month(table['timestamp']))
1194
table = table.add_column('day_of_week', pc.day_of_week(table['timestamp']))
1195
1196
# Complex filtering
1197
high_value_weekdays = table.filter(
1198
pc.and_(
1199
pc.greater(table['value'], 150),
1200
pc.less(table['day_of_week'], 5) # Monday=0 to Friday=4
1201
)
1202
)
1203
1204
# Conditional expressions
1205
table = table.add_column(
1206
'category',
1207
pc.case_when(
1208
pc.less(table['value'], 150), 'low',
1209
pc.less(table['value'], 250), 'medium',
1210
'high'
1211
)
1212
)
1213
```
1214
1215
### User-Defined Functions
1216
1217
```python
1218
import pyarrow as pa
1219
import pyarrow.compute as pc
1220
1221
# Register scalar UDF
1222
def double_and_add_one(x):
1223
return pc.add(pc.multiply(x, 2), 1)
1224
1225
pc.register_scalar_function(
1226
double_and_add_one,
1227
'double_and_add_one',
1228
doc='Double input and add one'
1229
)
1230
1231
# Use registered function
1232
result = pc.call_function('double_and_add_one', [pa.array([1, 2, 3, 4, 5])])
1233
print(result) # [3, 5, 7, 9, 11]
1234
```