Python library for Apache Arrow columnar memory format and computing libraries
—
High-performance vectorized compute operations providing 200+ functions optimized for columnar data. The compute engine enables efficient mathematical operations, string processing, temporal calculations, aggregations, and filtering on Arrow arrays and tables.
Function registration, execution, and expression system for building complex computational pipelines with lazy evaluation and optimization.
def call_function(name, args, options=None, memory_pool=None):
"""
Call compute function by name.
Parameters:
- name: str, function name
- args: list, function arguments (arrays, scalars, tables)
- options: FunctionOptions, function-specific options
- memory_pool: MemoryPool, memory pool for allocations
Returns:
Array, Scalar, or Table: Result of computation
"""
def get_function(name):
"""
Get registered function by name.
Parameters:
- name: str, function name
Returns:
Function: Registered function object
"""
def list_functions():
"""
List all available function names.
Returns:
list of str: Available function names
"""
def function_registry():
"""
Get global function registry.
Returns:
FunctionRegistry: Global function registry
"""
class Expression:
"""
Compute expression for lazy evaluation and optimization.
"""
def equals(self, other):
"""Check equality with another expression."""
def to_string(self):
"""String representation of expression."""
def field(name):
"""
Create field reference expression.
Parameters:
- name: str, field name
Returns:
Expression: Field reference expression
"""
def scalar(value):
"""
Create scalar literal expression.
Parameters:
- value: scalar value
Returns:
Expression: Scalar literal expression
"""
class Function:
"""Base class for compute functions."""
@property
def name(self):
"""Function name."""
@property
def arity(self):
"""Function arity (number of arguments)."""
@property
def doc(self):
"""Function documentation."""
class FunctionOptions:
"""Base class for function options."""
class FunctionRegistry:
"""Registry of available compute functions."""
def get_function(self, name):
"""Get function by name."""
def get_function_names(self):
"""Get all function names."""Arithmetic operations, mathematical functions, and numeric computations optimized for columnar data processing.
# Arithmetic operations
def add(x, y):
"""Element-wise addition."""
def subtract(x, y):
"""Element-wise subtraction."""
def multiply(x, y):
"""Element-wise multiplication."""
def divide(x, y):
"""Element-wise division."""
def power(base, exponent):
"""Element-wise exponentiation."""
def negate(x):
"""Element-wise negation."""
def abs(x):
"""Element-wise absolute value."""
def sign(x):
"""Element-wise sign (-1, 0, 1)."""
# Mathematical functions
def sqrt(x):
"""Element-wise square root."""
def exp(x):
"""Element-wise exponential (e^x)."""
def ln(x):
"""Element-wise natural logarithm."""
def log10(x):
"""Element-wise base-10 logarithm."""
def log2(x):
"""Element-wise base-2 logarithm."""
def log1p(x):
"""Element-wise log(1 + x)."""
def floor(x):
"""Element-wise floor."""
def ceil(x):
"""Element-wise ceiling."""
def trunc(x):
"""Element-wise truncation toward zero."""
def round(x, ndigits=0, round_mode='half_to_even'):
"""
Element-wise rounding.
Parameters:
- x: Array, input array
- ndigits: int, number of decimal places
- round_mode: str, rounding mode
Returns:
Array: Rounded array
"""
# Trigonometric functions
def sin(x):
"""Element-wise sine."""
def cos(x):
"""Element-wise cosine."""
def tan(x):
"""Element-wise tangent."""
def asin(x):
"""Element-wise arcsine."""
def acos(x):
"""Element-wise arccosine."""
def atan(x):
"""Element-wise arctangent."""
def atan2(y, x):
"""Element-wise arctangent of y/x."""
# Bitwise operations
def bit_wise_and(x, y):
"""Element-wise bitwise AND."""
def bit_wise_or(x, y):
"""Element-wise bitwise OR."""
def bit_wise_xor(x, y):
"""Element-wise bitwise XOR."""
def bit_wise_not(x):
"""Element-wise bitwise NOT."""
def shift_left(x, y):
"""Element-wise left bit shift."""
def shift_right(x, y):
"""Element-wise right bit shift."""Element-wise comparisons, logical operations, and boolean functions for filtering and conditional logic.
# Comparison operations
def equal(x, y):
"""Element-wise equality comparison."""
def not_equal(x, y):
"""Element-wise inequality comparison."""
def less(x, y):
"""Element-wise less than comparison."""
def less_equal(x, y):
"""Element-wise less than or equal comparison."""
def greater(x, y):
"""Element-wise greater than comparison."""
def greater_equal(x, y):
"""Element-wise greater than or equal comparison."""
# Logical operations
def and_(x, y):
"""Element-wise logical AND."""
def or_(x, y):
"""Element-wise logical OR."""
def xor(x, y):
"""Element-wise logical XOR."""
def invert(x):
"""Element-wise logical NOT."""
# Null handling
def is_null(x):
"""Check for null values."""
def is_valid(x):
"""Check for non-null values."""
def is_nan(x):
"""Check for NaN values (floating point)."""
def is_finite(x):
"""Check for finite values."""
def is_infinite(x):
"""Check for infinite values."""
def fill_null(values, fill_value):
"""Fill null values with specified value."""
def coalesce(*arrays):
"""Return first non-null value from arrays."""
def choose(indices, *arrays):
"""Choose values from arrays based on indices."""
def if_else(condition, left, right):
"""Conditional selection (ternary operator)."""
def case_when(*args):
"""
Multi-branch conditional selection.
Parameters:
- args: alternating condition/value pairs, optional else value
Returns:
Array: Selected values based on conditions
"""Statistical and aggregation functions for computing summary statistics and reductions over arrays and groups.
# Basic aggregations
def sum(array, skip_nulls=True, min_count=1):
"""
Sum of array elements.
Parameters:
- array: Array, input array
- skip_nulls: bool, ignore null values
- min_count: int, minimum non-null values required
Returns:
Scalar: Sum of elements
"""
def mean(array, skip_nulls=True, min_count=1):
"""Mean of array elements."""
def count(array, mode='only_valid'):
"""
Count array elements.
Parameters:
- array: Array, input array
- mode: str, counting mode ('only_valid', 'only_null', 'all')
Returns:
Scalar: Count of elements
"""
def count_distinct(array, mode='only_valid'):
"""Count distinct elements."""
def min(array, skip_nulls=True, min_count=1):
"""Minimum value."""
def max(array, skip_nulls=True, min_count=1):
"""Maximum value."""
def min_max(array, skip_nulls=True, min_count=1):
"""
Minimum and maximum values.
Returns:
StructScalar: Struct with 'min' and 'max' fields
"""
def any(array, skip_nulls=True, min_count=1):
"""Logical OR reduction (any true values)."""
def all(array, skip_nulls=True, min_count=1):
"""Logical AND reduction (all true values)."""
# Statistical functions
def variance(array, ddof=0, skip_nulls=True, min_count=1):
"""
Variance of array elements.
Parameters:
- array: Array, input array
- ddof: int, delta degrees of freedom
- skip_nulls: bool, ignore null values
- min_count: int, minimum non-null values required
Returns:
Scalar: Variance
"""
def stddev(array, ddof=0, skip_nulls=True, min_count=1):
"""Standard deviation."""
def quantile(array, q=0.5, interpolation='linear', skip_nulls=True, min_count=1):
"""
Quantile of array elements.
Parameters:
- array: Array, input array
- q: float or list, quantile(s) to compute (0.0 to 1.0)
- interpolation: str, interpolation method
- skip_nulls: bool, ignore null values
- min_count: int, minimum non-null values required
Returns:
Scalar or Array: Quantile value(s)
"""
def mode(array, n=1, skip_nulls=True, min_count=1):
"""
Mode (most frequent values).
Parameters:
- array: Array, input array
- n: int, number of modes to return
- skip_nulls: bool, ignore null values
- min_count: int, minimum non-null values required
Returns:
StructArray: Modes with counts
"""
def tdigest(array, q=None, delta=100, buffer_size=500, skip_nulls=True, min_count=1):
"""
T-Digest quantile approximation.
Parameters:
- array: Array, input array
- q: list of float, quantiles to compute
- delta: int, compression parameter
- buffer_size: int, buffer size
- skip_nulls: bool, ignore null values
- min_count: int, minimum non-null values required
Returns:
Array: Approximate quantiles
"""
# Product and cumulative operations
def product(array, skip_nulls=True, min_count=1):
"""Product of array elements."""
def cumulative_sum(array, start=None, skip_nulls=True):
"""
Cumulative sum.
Parameters:
- array: Array, input array
- start: scalar, starting value
- skip_nulls: bool, ignore null values
Returns:
Array: Cumulative sums
"""
def cumulative_sum_checked(array, start=None, skip_nulls=True):
"""Cumulative sum with overflow checking."""
def cumulative_prod(array, start=None, skip_nulls=True):
"""Cumulative product."""
def cumulative_max(array, skip_nulls=True):
"""Cumulative maximum."""
def cumulative_min(array, skip_nulls=True):
"""Cumulative minimum."""Functions for array manipulation, filtering, sorting, and selection operations.
def take(data, indices, boundscheck=True):
"""
Select elements by indices.
Parameters:
- data: Array, input array
- indices: Array, selection indices
- boundscheck: bool, check index bounds
Returns:
Array: Selected elements
"""
def filter(data, selection_filter, null_selection_behavior='drop'):
"""
Filter array by boolean mask.
Parameters:
- data: Array, input array
- selection_filter: Array, boolean selection mask
- null_selection_behavior: str, how to handle nulls in mask
Returns:
Array: Filtered elements
"""
def slice(array, start, stop=None, step=1):
"""
Slice array.
Parameters:
- array: Array, input array
- start: int, start index
- stop: int, stop index (exclusive)
- step: int, step size
Returns:
Array: Sliced array
"""
def array_sort_indices(array, order='ascending', null_placement='at_end'):
"""
Get indices that would sort array.
Parameters:
- array: Array, input array
- order: str, sort order ('ascending', 'descending')
- null_placement: str, null placement ('at_start', 'at_end')
Returns:
Array: Sort indices
"""
def sort_indices(arrays, orders=None, null_placement=None):
"""
Get indices for sorting by multiple arrays.
Parameters:
- arrays: list of Array, sort keys
- orders: list of str, sort orders for each key
- null_placement: list of str, null placement for each key
Returns:
Array: Sort indices
"""
def partition_nth_indices(array, pivot, null_placement='at_end'):
"""
Partition array around nth element.
Parameters:
- array: Array, input array
- pivot: int, pivot index
- null_placement: str, null placement
Returns:
Array: Partition indices
"""
def top_k_unstable(array, k, sort_keys=None):
"""
Select top k elements (unstable sort).
Parameters:
- array: Array, input array
- k: int, number of elements to select
- sort_keys: list, sort keys for selection
Returns:
Array: Top k elements
"""
def bottom_k_unstable(array, k, sort_keys=None):
"""
Select bottom k elements (unstable sort).
Parameters:
- array: Array, input array
- k: int, number of elements to select
- sort_keys: list, sort keys for selection
Returns:
Array: Bottom k elements
"""
def unique(array):
"""
Get unique values.
Parameters:
- array: Array, input array
Returns:
Array: Unique values
"""
def value_counts(array):
"""
Count occurrences of each value.
Parameters:
- array: Array, input array
Returns:
StructArray: Values and their counts
"""
def dictionary_encode(array, null_encoding_behavior='mask'):
"""
Dictionary encode array.
Parameters:
- array: Array, input array
- null_encoding_behavior: str, null handling
Returns:
DictionaryArray: Dictionary encoded array
"""
def run_end_encode(array):
"""
Run-end encode array.
Parameters:
- array: Array, input array
Returns:
RunEndEncodedArray: Run-end encoded array
"""Comprehensive string processing functions for text manipulation, pattern matching, and string transformations.
# String length and properties
def utf8_length(strings):
"""UTF-8 character length of strings."""
def binary_length(strings):
"""Byte length of binary/string arrays."""
def utf8_is_alnum(strings):
"""Check if strings are alphanumeric."""
def utf8_is_alpha(strings):
"""Check if strings are alphabetic."""
def utf8_is_decimal(strings):
"""Check if strings are decimal."""
def utf8_is_digit(strings):
"""Check if strings contain only digits."""
def utf8_is_lower(strings):
"""Check if strings are lowercase."""
def utf8_is_numeric(strings):
"""Check if strings are numeric."""
def utf8_is_printable(strings):
"""Check if strings are printable."""
def utf8_is_space(strings):
"""Check if strings are whitespace."""
def utf8_is_title(strings):
"""Check if strings are titlecased."""
def utf8_is_upper(strings):
"""Check if strings are uppercase."""
# String transformations
def utf8_upper(strings):
"""Convert strings to uppercase."""
def utf8_lower(strings):
"""Convert strings to lowercase."""
def utf8_swapcase(strings):
"""Swap case of strings."""
def utf8_capitalize(strings):
"""Capitalize first character."""
def utf8_title(strings):
"""Convert to title case."""
def ascii_upper(strings):
"""Convert ASCII strings to uppercase."""
def ascii_lower(strings):
"""Convert ASCII strings to lowercase."""
def ascii_swapcase(strings):
"""Swap case of ASCII strings."""
def ascii_capitalize(strings):
"""Capitalize ASCII strings."""
# String padding and trimming
def utf8_ltrim(strings, characters=' '):
"""
Left trim strings.
Parameters:
- strings: Array, input strings
- characters: str, characters to trim
Returns:
Array: Left-trimmed strings
"""
def utf8_rtrim(strings, characters=' '):
"""Right trim strings."""
def utf8_trim(strings, characters=' '):
"""Trim strings from both ends."""
def utf8_ltrim_whitespace(strings):
"""Left trim whitespace."""
def utf8_rtrim_whitespace(strings):
"""Right trim whitespace."""
def utf8_trim_whitespace(strings):
"""Trim whitespace from both ends."""
def utf8_center(strings, width, padding=' '):
"""
Center strings with padding.
Parameters:
- strings: Array, input strings
- width: int, total width
- padding: str, padding character
Returns:
Array: Centered strings
"""
def utf8_lpad(strings, width, padding=' '):
"""Left pad strings."""
def utf8_rpad(strings, width, padding=' '):
"""Right pad strings."""
# String slicing and extraction
def utf8_slice_codeunits(strings, start, stop=None, step=1):
"""
Slice strings by code units.
Parameters:
- strings: Array, input strings
- start: int, start position
- stop: int, stop position
- step: int, step size
Returns:
Array: Sliced strings
"""
def utf8_reverse(strings):
"""Reverse strings."""
def utf8_replace_slice(strings, start, stop, replacement):
"""
Replace slice of strings.
Parameters:
- strings: Array, input strings
- start: int, start position
- stop: int, stop position
- replacement: str, replacement string
Returns:
Array: Strings with replaced slices
"""
# String searching and matching
def match_substring(strings, pattern, ignore_case=False):
"""
Check if strings contain substring.
Parameters:
- strings: Array, input strings
- pattern: str, substring pattern
- ignore_case: bool, case insensitive matching
Returns:
BooleanArray: Match results
"""
def match_substring_regex(strings, pattern, ignore_case=False):
"""
Check if strings match regex pattern.
Parameters:
- strings: Array, input strings
- pattern: str, regex pattern
- ignore_case: bool, case insensitive matching
Returns:
BooleanArray: Match results
"""
def find_substring(strings, pattern, ignore_case=False):
"""
Find first occurrence of substring.
Parameters:
- strings: Array, input strings
- pattern: str, substring pattern
- ignore_case: bool, case insensitive search
Returns:
Int32Array: First occurrence indices (-1 if not found)
"""
def find_substring_regex(strings, pattern, ignore_case=False):
"""Find first regex match."""
def count_substring(strings, pattern, ignore_case=False):
"""
Count occurrences of substring.
Parameters:
- strings: Array, input strings
- pattern: str, substring pattern
- ignore_case: bool, case insensitive counting
Returns:
Int32Array: Occurrence counts
"""
def count_substring_regex(strings, pattern, ignore_case=False):
"""Count regex matches."""
# String replacement
def replace_substring(strings, pattern, replacement, max_replacements=-1):
"""
Replace substring occurrences.
Parameters:
- strings: Array, input strings
- pattern: str, substring to replace
- replacement: str, replacement string
- max_replacements: int, maximum replacements (-1 for all)
Returns:
Array: Strings with replacements
"""
def replace_substring_regex(strings, pattern, replacement, max_replacements=-1):
"""Replace regex matches."""
def extract_regex(strings, pattern):
"""
Extract regex groups.
Parameters:
- strings: Array, input strings
- pattern: str, regex pattern with groups
Returns:
StructArray: Extracted groups
"""
# String splitting and joining
def split_pattern(strings, pattern, max_splits=-1, reverse=False):
"""
Split strings by pattern.
Parameters:
- strings: Array, input strings
- pattern: str, split pattern
- max_splits: int, maximum splits (-1 for unlimited)
- reverse: bool, split from right
Returns:
ListArray: Split components
"""
def split_pattern_regex(strings, pattern, max_splits=-1, reverse=False):
"""Split strings by regex pattern."""
def binary_join(lists, separator):
"""
Join binary arrays with separator.
Parameters:
- lists: ListArray, lists of binary values
- separator: bytes, join separator
Returns:
Array: Joined binary values
"""
def binary_join_element_wise(left, right, separator):
"""Element-wise binary join."""Date, time, and timestamp manipulation functions for temporal data processing and calendar operations.
# Date/time extraction
def year(timestamps):
"""Extract year from timestamps."""
def month(timestamps):
"""Extract month from timestamps."""
def day(timestamps):
"""Extract day from timestamps."""
def day_of_week(timestamps, count_from_zero=True, week_start=1):
"""
Extract day of week.
Parameters:
- timestamps: Array, timestamp array
- count_from_zero: bool, whether to count from 0
- week_start: int, first day of week (1=Monday, 7=Sunday)
Returns:
Int32Array: Day of week values
"""
def day_of_year(timestamps):
"""Extract day of year."""
def iso_week(timestamps):
"""Extract ISO week number."""
def iso_year(timestamps):
"""Extract ISO year."""
def quarter(timestamps):
"""Extract quarter."""
def hour(timestamps):
"""Extract hour from timestamps."""
def minute(timestamps):
"""Extract minute from timestamps."""
def second(timestamps):
"""Extract second from timestamps."""
def millisecond(timestamps):
"""Extract millisecond from timestamps."""
def microsecond(timestamps):
"""Extract microsecond from timestamps."""
def nanosecond(timestamps):
"""Extract nanosecond from timestamps."""
def subsecond(timestamps):
"""Extract fractional seconds."""
# Temporal arithmetic
def years_between(start, end):
"""Calculate years between timestamps."""
def month_interval_between(start, end):
"""Calculate month intervals between timestamps."""
def day_time_interval_between(start, end):
"""Calculate day-time intervals between timestamps."""
def weeks_between(start, end):
"""Calculate weeks between timestamps."""
def days_between(start, end):
"""Calculate days between timestamps."""
def hours_between(start, end):
"""Calculate hours between timestamps."""
def minutes_between(start, end):
"""Calculate minutes between timestamps."""
def seconds_between(start, end):
"""Calculate seconds between timestamps."""
def milliseconds_between(start, end):
"""Calculate milliseconds between timestamps."""
def microseconds_between(start, end):
"""Calculate microseconds between timestamps."""
def nanoseconds_between(start, end):
"""Calculate nanoseconds between timestamps."""
# Temporal rounding and truncation
def floor_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False):
"""
Floor timestamps to temporal unit.
Parameters:
- timestamps: Array, timestamp array
- unit: str, temporal unit ('year', 'month', 'day', 'hour', etc.)
- week_starts_monday: bool, week start day
- ceil_is_strictly_greater: bool, ceiling behavior
- calendar_based_origin: bool, use calendar-based origin
Returns:
Array: Floored timestamps
"""
def ceil_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False):
"""Ceil timestamps to temporal unit."""
def round_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False):
"""Round timestamps to temporal unit."""
# String parsing and formatting
def strftime(timestamps, format='%Y-%m-%d %H:%M:%S', locale='C'):
"""
Format timestamps as strings.
Parameters:
- timestamps: Array, timestamp array
- format: str, strftime format string
- locale: str, locale for formatting
Returns:
StringArray: Formatted timestamp strings
"""
def strptime(strings, format, unit, error_is_null=False):
"""
Parse strings as timestamps.
Parameters:
- strings: Array, string array
- format: str, strptime format string
- unit: str, timestamp unit
- error_is_null: bool, return null on parse errors
Returns:
TimestampArray: Parsed timestamps
"""
# Timezone operations
def assume_timezone(timestamps, timezone, ambiguous='raise', nonexistent='raise'):
"""
Assume timezone for naive timestamps.
Parameters:
- timestamps: Array, naive timestamp array
- timezone: str, timezone identifier
- ambiguous: str, how to handle ambiguous times
- nonexistent: str, how to handle nonexistent times
Returns:
TimestampArray: Timezone-aware timestamps
"""
def local_timestamp(timestamps):
"""Convert to local timezone."""Functions for casting and converting between different Arrow data types with configurable safety and behavior options.
def cast(array, target_type, safe=True, options=None):
"""
Cast array to different type.
Parameters:
- array: Array, input array
- target_type: DataType, target type
- safe: bool, check for data loss
- options: CastOptions, casting options
Returns:
Array: Cast array
"""
def can_cast(from_type, to_type):
"""
Check if type can be cast.
Parameters:
- from_type: DataType, source type
- to_type: DataType, target type
Returns:
bool: Whether cast is supported
"""
class CastOptions:
"""
Options for type casting.
Attributes:
- safe: Whether to check for data loss
- allow_int_overflow: Allow integer overflow
- allow_time_truncate: Allow time truncation
- allow_time_overflow: Allow time overflow
- allow_decimal_truncate: Allow decimal truncation
- allow_float_truncate: Allow float truncation
"""Functions for generating random numbers and sampling from distributions.
def random(n, initializer=None, options=None):
"""
Generate random numbers.
Parameters:
- n: int, number of random values
- initializer: int, random seed
- options: RandomOptions, generation options
Returns:
Array: Random values
"""
class RandomOptions:
"""
Options for random number generation.
Attributes:
- initializer: Random seed
- distribution: Distribution type
"""import pyarrow as pa
import pyarrow.compute as pc
# Create sample data
numbers = pa.array([1, 2, 3, 4, 5, None, 7, 8, 9, 10])
strings = pa.array(['apple', 'banana', 'cherry', None, 'date'])
# Arithmetic operations
doubled = pc.multiply(numbers, 2)
sum_result = pc.sum(numbers)
mean_result = pc.mean(numbers)
# String operations
lengths = pc.utf8_length(strings)
upper_strings = pc.utf8_upper(strings)
contains_a = pc.match_substring(strings, 'a')
# Filtering and selection
filtered = pc.filter(numbers, pc.greater(numbers, 5))
top_3 = pc.top_k_unstable(numbers, 3)import pyarrow as pa
import pyarrow.compute as pc
# Create table
table = pa.table({
'id': [1, 2, 3, 4, 5],
'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
'age': [25, 30, 35, 28, 32],
'salary': [50000, 60000, 70000, 55000, 65000]
})
# Filter table
adults = table.filter(pc.greater_equal(table['age'], 30))
# Add computed column
table_with_bonus = table.add_column(
'bonus',
pc.multiply(table['salary'], 0.1)
)
# Aggregations
total_salary = pc.sum(table['salary'])
avg_age = pc.mean(table['age'])
age_stats = pc.quantile(table['age'], [0.25, 0.5, 0.75])import pyarrow as pa
import pyarrow.compute as pc
# Create table with temporal data
table = pa.table({
'timestamp': pa.array([
'2023-01-15 10:30:00',
'2023-02-20 14:45:00',
'2023-03-10 09:15:00',
'2023-04-05 16:20:00'
], type=pa.timestamp('s')),
'value': [100, 200, 150, 300]
})
# Extract temporal components
table = table.add_column('year', pc.year(table['timestamp']))
table = table.add_column('month', pc.month(table['timestamp']))
table = table.add_column('day_of_week', pc.day_of_week(table['timestamp']))
# Complex filtering
high_value_weekdays = table.filter(
pc.and_(
pc.greater(table['value'], 150),
pc.less(table['day_of_week'], 5) # Monday=0 to Friday=4
)
)
# Conditional expressions
table = table.add_column(
'category',
pc.case_when(
pc.less(table['value'], 150), 'low',
pc.less(table['value'], 250), 'medium',
'high'
)
)import pyarrow as pa
import pyarrow.compute as pc
# Register scalar UDF
def double_and_add_one(x):
return pc.add(pc.multiply(x, 2), 1)
pc.register_scalar_function(
double_and_add_one,
'double_and_add_one',
doc='Double input and add one'
)
# Use registered function
result = pc.call_function('double_and_add_one', [pa.array([1, 2, 3, 4, 5])])
print(result) # [3, 5, 7, 9, 11]Install with Tessl CLI
npx tessl i tessl/pypi-pyarrow