CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pyarrow

Python library for Apache Arrow columnar memory format and computing libraries

Pending
Overview
Eval results
Files

compute-functions.mddocs/

Compute Functions

High-performance vectorized compute operations providing 200+ functions optimized for columnar data. The compute engine enables efficient mathematical operations, string processing, temporal calculations, aggregations, and filtering on Arrow arrays and tables.

Capabilities

Core Compute Infrastructure

Function registration, execution, and expression system for building complex computational pipelines with lazy evaluation and optimization.

def call_function(name, args, options=None, memory_pool=None):
    """
    Call compute function by name.
    
    Parameters:
    - name: str, function name
    - args: list, function arguments (arrays, scalars, tables)
    - options: FunctionOptions, function-specific options
    - memory_pool: MemoryPool, memory pool for allocations
    
    Returns:
    Array, Scalar, or Table: Result of computation
    """

def get_function(name):
    """
    Get registered function by name.
    
    Parameters:
    - name: str, function name
    
    Returns:
    Function: Registered function object
    """

def list_functions():
    """
    List all available function names.
    
    Returns:
    list of str: Available function names
    """

def function_registry():
    """
    Get global function registry.
    
    Returns:
    FunctionRegistry: Global function registry
    """

class Expression:
    """
    Compute expression for lazy evaluation and optimization.
    """
    
    def equals(self, other):
        """Check equality with another expression."""
    
    def to_string(self):
        """String representation of expression."""

def field(name):
    """
    Create field reference expression.
    
    Parameters:
    - name: str, field name
    
    Returns:
    Expression: Field reference expression
    """

def scalar(value):
    """
    Create scalar literal expression.
    
    Parameters:
    - value: scalar value
    
    Returns:
    Expression: Scalar literal expression
    """

class Function:
    """Base class for compute functions."""
    
    @property
    def name(self):
        """Function name."""
    
    @property
    def arity(self):
        """Function arity (number of arguments)."""
    
    @property
    def doc(self):
        """Function documentation."""

class FunctionOptions:
    """Base class for function options."""

class FunctionRegistry:
    """Registry of available compute functions."""
    
    def get_function(self, name):
        """Get function by name."""
    
    def get_function_names(self):
        """Get all function names."""

Mathematical Operations

Arithmetic operations, mathematical functions, and numeric computations optimized for columnar data processing.

# Arithmetic operations
def add(x, y):
    """Element-wise addition."""

def subtract(x, y):
    """Element-wise subtraction."""

def multiply(x, y):
    """Element-wise multiplication."""

def divide(x, y):
    """Element-wise division."""

def power(base, exponent):
    """Element-wise exponentiation."""

def negate(x):
    """Element-wise negation."""

def abs(x):
    """Element-wise absolute value."""

def sign(x):
    """Element-wise sign (-1, 0, 1)."""

# Mathematical functions
def sqrt(x):
    """Element-wise square root."""

def exp(x):
    """Element-wise exponential (e^x)."""

def ln(x):
    """Element-wise natural logarithm."""

def log10(x):
    """Element-wise base-10 logarithm."""

def log2(x):
    """Element-wise base-2 logarithm."""

def log1p(x):
    """Element-wise log(1 + x)."""

def floor(x):
    """Element-wise floor."""

def ceil(x):
    """Element-wise ceiling."""

def trunc(x):
    """Element-wise truncation toward zero."""

def round(x, ndigits=0, round_mode='half_to_even'):
    """
    Element-wise rounding.
    
    Parameters:
    - x: Array, input array
    - ndigits: int, number of decimal places
    - round_mode: str, rounding mode
    
    Returns:
    Array: Rounded array
    """

# Trigonometric functions
def sin(x):
    """Element-wise sine."""

def cos(x):
    """Element-wise cosine."""

def tan(x):
    """Element-wise tangent."""

def asin(x):
    """Element-wise arcsine."""

def acos(x):
    """Element-wise arccosine."""

def atan(x):
    """Element-wise arctangent."""

def atan2(y, x):
    """Element-wise arctangent of y/x."""

# Bitwise operations  
def bit_wise_and(x, y):
    """Element-wise bitwise AND."""

def bit_wise_or(x, y):
    """Element-wise bitwise OR."""

def bit_wise_xor(x, y):
    """Element-wise bitwise XOR."""

def bit_wise_not(x):
    """Element-wise bitwise NOT."""

def shift_left(x, y):
    """Element-wise left bit shift."""

def shift_right(x, y):
    """Element-wise right bit shift."""

Comparison and Logical Operations

Element-wise comparisons, logical operations, and boolean functions for filtering and conditional logic.

# Comparison operations
def equal(x, y):
    """Element-wise equality comparison."""

def not_equal(x, y):
    """Element-wise inequality comparison."""

def less(x, y):
    """Element-wise less than comparison."""

def less_equal(x, y):
    """Element-wise less than or equal comparison."""

def greater(x, y):
    """Element-wise greater than comparison."""

def greater_equal(x, y):
    """Element-wise greater than or equal comparison."""

# Logical operations
def and_(x, y):
    """Element-wise logical AND."""

def or_(x, y):
    """Element-wise logical OR."""

def xor(x, y):
    """Element-wise logical XOR."""

def invert(x):
    """Element-wise logical NOT."""

# Null handling
def is_null(x):
    """Check for null values."""

def is_valid(x):
    """Check for non-null values."""

def is_nan(x):
    """Check for NaN values (floating point)."""

def is_finite(x):
    """Check for finite values."""

def is_infinite(x):
    """Check for infinite values."""

def fill_null(values, fill_value):
    """Fill null values with specified value."""

def coalesce(*arrays):
    """Return first non-null value from arrays."""

def choose(indices, *arrays):
    """Choose values from arrays based on indices."""

def if_else(condition, left, right):
    """Conditional selection (ternary operator)."""

def case_when(*args):
    """
    Multi-branch conditional selection.
    
    Parameters:
    - args: alternating condition/value pairs, optional else value
    
    Returns:
    Array: Selected values based on conditions
    """

Aggregation Functions

Statistical and aggregation functions for computing summary statistics and reductions over arrays and groups.

# Basic aggregations
def sum(array, skip_nulls=True, min_count=1):
    """
    Sum of array elements.
    
    Parameters:
    - array: Array, input array
    - skip_nulls: bool, ignore null values
    - min_count: int, minimum non-null values required
    
    Returns:
    Scalar: Sum of elements
    """

def mean(array, skip_nulls=True, min_count=1):
    """Mean of array elements."""

def count(array, mode='only_valid'):
    """
    Count array elements.
    
    Parameters:
    - array: Array, input array
    - mode: str, counting mode ('only_valid', 'only_null', 'all')
    
    Returns:
    Scalar: Count of elements
    """

def count_distinct(array, mode='only_valid'):
    """Count distinct elements."""

def min(array, skip_nulls=True, min_count=1):
    """Minimum value."""

def max(array, skip_nulls=True, min_count=1):
    """Maximum value."""

def min_max(array, skip_nulls=True, min_count=1):
    """
    Minimum and maximum values.
    
    Returns:
    StructScalar: Struct with 'min' and 'max' fields
    """

def any(array, skip_nulls=True, min_count=1):
    """Logical OR reduction (any true values)."""

def all(array, skip_nulls=True, min_count=1):
    """Logical AND reduction (all true values)."""

# Statistical functions
def variance(array, ddof=0, skip_nulls=True, min_count=1):
    """
    Variance of array elements.
    
    Parameters:
    - array: Array, input array
    - ddof: int, delta degrees of freedom
    - skip_nulls: bool, ignore null values
    - min_count: int, minimum non-null values required
    
    Returns:
    Scalar: Variance
    """

def stddev(array, ddof=0, skip_nulls=True, min_count=1):
    """Standard deviation."""

def quantile(array, q=0.5, interpolation='linear', skip_nulls=True, min_count=1):
    """
    Quantile of array elements.
    
    Parameters:
    - array: Array, input array  
    - q: float or list, quantile(s) to compute (0.0 to 1.0)
    - interpolation: str, interpolation method
    - skip_nulls: bool, ignore null values
    - min_count: int, minimum non-null values required
    
    Returns:
    Scalar or Array: Quantile value(s)
    """

def mode(array, n=1, skip_nulls=True, min_count=1):
    """
    Mode (most frequent values).
    
    Parameters:
    - array: Array, input array
    - n: int, number of modes to return
    - skip_nulls: bool, ignore null values
    - min_count: int, minimum non-null values required
    
    Returns:
    StructArray: Modes with counts
    """

def tdigest(array, q=None, delta=100, buffer_size=500, skip_nulls=True, min_count=1):
    """
    T-Digest quantile approximation.
    
    Parameters:
    - array: Array, input array
    - q: list of float, quantiles to compute
    - delta: int, compression parameter
    - buffer_size: int, buffer size
    - skip_nulls: bool, ignore null values
    - min_count: int, minimum non-null values required
    
    Returns:
    Array: Approximate quantiles
    """

# Product and cumulative operations
def product(array, skip_nulls=True, min_count=1):
    """Product of array elements."""

def cumulative_sum(array, start=None, skip_nulls=True):
    """
    Cumulative sum.
    
    Parameters:
    - array: Array, input array
    - start: scalar, starting value
    - skip_nulls: bool, ignore null values
    
    Returns:
    Array: Cumulative sums
    """

def cumulative_sum_checked(array, start=None, skip_nulls=True):
    """Cumulative sum with overflow checking."""

def cumulative_prod(array, start=None, skip_nulls=True):
    """Cumulative product."""

def cumulative_max(array, skip_nulls=True):
    """Cumulative maximum."""

def cumulative_min(array, skip_nulls=True):
    """Cumulative minimum."""

Array Operations

Functions for array manipulation, filtering, sorting, and selection operations.

def take(data, indices, boundscheck=True):
    """
    Select elements by indices.
    
    Parameters:
    - data: Array, input array
    - indices: Array, selection indices
    - boundscheck: bool, check index bounds
    
    Returns:
    Array: Selected elements
    """

def filter(data, selection_filter, null_selection_behavior='drop'):
    """
    Filter array by boolean mask.
    
    Parameters:
    - data: Array, input array
    - selection_filter: Array, boolean selection mask
    - null_selection_behavior: str, how to handle nulls in mask
    
    Returns:
    Array: Filtered elements
    """

def slice(array, start, stop=None, step=1):
    """
    Slice array.
    
    Parameters:
    - array: Array, input array
    - start: int, start index
    - stop: int, stop index (exclusive)
    - step: int, step size
    
    Returns:
    Array: Sliced array
    """

def array_sort_indices(array, order='ascending', null_placement='at_end'):
    """
    Get indices that would sort array.
    
    Parameters:
    - array: Array, input array
    - order: str, sort order ('ascending', 'descending')
    - null_placement: str, null placement ('at_start', 'at_end')
    
    Returns:
    Array: Sort indices
    """

def sort_indices(arrays, orders=None, null_placement=None):
    """
    Get indices for sorting by multiple arrays.
    
    Parameters:
    - arrays: list of Array, sort keys
    - orders: list of str, sort orders for each key
    - null_placement: list of str, null placement for each key
    
    Returns:
    Array: Sort indices
    """

def partition_nth_indices(array, pivot, null_placement='at_end'):
    """
    Partition array around nth element.
    
    Parameters:
    - array: Array, input array
    - pivot: int, pivot index
    - null_placement: str, null placement
    
    Returns:
    Array: Partition indices
    """

def top_k_unstable(array, k, sort_keys=None):
    """
    Select top k elements (unstable sort).
    
    Parameters:
    - array: Array, input array
    - k: int, number of elements to select
    - sort_keys: list, sort keys for selection
    
    Returns:
    Array: Top k elements
    """

def bottom_k_unstable(array, k, sort_keys=None):
    """
    Select bottom k elements (unstable sort).
    
    Parameters:
    - array: Array, input array
    - k: int, number of elements to select
    - sort_keys: list, sort keys for selection
    
    Returns:
    Array: Bottom k elements
    """

def unique(array):
    """
    Get unique values.
    
    Parameters:
    - array: Array, input array
    
    Returns:
    Array: Unique values
    """

def value_counts(array):
    """
    Count occurrences of each value.
    
    Parameters:
    - array: Array, input array
    
    Returns:
    StructArray: Values and their counts
    """

def dictionary_encode(array, null_encoding_behavior='mask'):
    """
    Dictionary encode array.
    
    Parameters:
    - array: Array, input array
    - null_encoding_behavior: str, null handling
    
    Returns:
    DictionaryArray: Dictionary encoded array
    """

def run_end_encode(array):
    """
    Run-end encode array.
    
    Parameters:
    - array: Array, input array
    
    Returns:
    RunEndEncodedArray: Run-end encoded array
    """

String Functions

Comprehensive string processing functions for text manipulation, pattern matching, and string transformations.

# String length and properties
def utf8_length(strings):
    """UTF-8 character length of strings."""

def binary_length(strings): 
    """Byte length of binary/string arrays."""

def utf8_is_alnum(strings):
    """Check if strings are alphanumeric."""

def utf8_is_alpha(strings):
    """Check if strings are alphabetic."""

def utf8_is_decimal(strings):
    """Check if strings are decimal."""

def utf8_is_digit(strings):
    """Check if strings contain only digits."""

def utf8_is_lower(strings):
    """Check if strings are lowercase."""

def utf8_is_numeric(strings):
    """Check if strings are numeric."""

def utf8_is_printable(strings):
    """Check if strings are printable."""

def utf8_is_space(strings):
    """Check if strings are whitespace."""

def utf8_is_title(strings):
    """Check if strings are titlecased."""

def utf8_is_upper(strings):
    """Check if strings are uppercase."""

# String transformations
def utf8_upper(strings):
    """Convert strings to uppercase."""

def utf8_lower(strings):
    """Convert strings to lowercase."""

def utf8_swapcase(strings):
    """Swap case of strings."""

def utf8_capitalize(strings):
    """Capitalize first character."""

def utf8_title(strings):
    """Convert to title case."""

def ascii_upper(strings):
    """Convert ASCII strings to uppercase."""

def ascii_lower(strings):
    """Convert ASCII strings to lowercase."""

def ascii_swapcase(strings):
    """Swap case of ASCII strings."""

def ascii_capitalize(strings):
    """Capitalize ASCII strings."""

# String padding and trimming
def utf8_ltrim(strings, characters=' '):
    """
    Left trim strings.
    
    Parameters:
    - strings: Array, input strings
    - characters: str, characters to trim
    
    Returns:
    Array: Left-trimmed strings
    """

def utf8_rtrim(strings, characters=' '):
    """Right trim strings."""

def utf8_trim(strings, characters=' '):
    """Trim strings from both ends."""

def utf8_ltrim_whitespace(strings):
    """Left trim whitespace."""

def utf8_rtrim_whitespace(strings):
    """Right trim whitespace."""

def utf8_trim_whitespace(strings):
    """Trim whitespace from both ends."""

def utf8_center(strings, width, padding=' '):
    """
    Center strings with padding.
    
    Parameters:
    - strings: Array, input strings
    - width: int, total width
    - padding: str, padding character
    
    Returns:
    Array: Centered strings
    """

def utf8_lpad(strings, width, padding=' '):
    """Left pad strings."""

def utf8_rpad(strings, width, padding=' '):
    """Right pad strings."""

# String slicing and extraction
def utf8_slice_codeunits(strings, start, stop=None, step=1):
    """
    Slice strings by code units.
    
    Parameters:
    - strings: Array, input strings
    - start: int, start position
    - stop: int, stop position
    - step: int, step size
    
    Returns:
    Array: Sliced strings
    """

def utf8_reverse(strings):
    """Reverse strings."""

def utf8_replace_slice(strings, start, stop, replacement):
    """
    Replace slice of strings.
    
    Parameters:
    - strings: Array, input strings
    - start: int, start position
    - stop: int, stop position  
    - replacement: str, replacement string
    
    Returns:
    Array: Strings with replaced slices
    """

# String searching and matching
def match_substring(strings, pattern, ignore_case=False):
    """
    Check if strings contain substring.
    
    Parameters:
    - strings: Array, input strings
    - pattern: str, substring pattern
    - ignore_case: bool, case insensitive matching
    
    Returns:
    BooleanArray: Match results
    """

def match_substring_regex(strings, pattern, ignore_case=False):
    """
    Check if strings match regex pattern.
    
    Parameters:
    - strings: Array, input strings
    - pattern: str, regex pattern
    - ignore_case: bool, case insensitive matching
    
    Returns:
    BooleanArray: Match results
    """

def find_substring(strings, pattern, ignore_case=False):
    """
    Find first occurrence of substring.
    
    Parameters:
    - strings: Array, input strings
    - pattern: str, substring pattern
    - ignore_case: bool, case insensitive search
    
    Returns:
    Int32Array: First occurrence indices (-1 if not found)
    """

def find_substring_regex(strings, pattern, ignore_case=False):
    """Find first regex match."""

def count_substring(strings, pattern, ignore_case=False):
    """
    Count occurrences of substring.
    
    Parameters:
    - strings: Array, input strings
    - pattern: str, substring pattern
    - ignore_case: bool, case insensitive counting
    
    Returns:
    Int32Array: Occurrence counts
    """

def count_substring_regex(strings, pattern, ignore_case=False):
    """Count regex matches."""

# String replacement
def replace_substring(strings, pattern, replacement, max_replacements=-1):
    """
    Replace substring occurrences.
    
    Parameters:
    - strings: Array, input strings
    - pattern: str, substring to replace
    - replacement: str, replacement string
    - max_replacements: int, maximum replacements (-1 for all)
    
    Returns:
    Array: Strings with replacements
    """

def replace_substring_regex(strings, pattern, replacement, max_replacements=-1):
    """Replace regex matches."""

def extract_regex(strings, pattern):
    """
    Extract regex groups.
    
    Parameters:
    - strings: Array, input strings
    - pattern: str, regex pattern with groups
    
    Returns:
    StructArray: Extracted groups
    """

# String splitting and joining
def split_pattern(strings, pattern, max_splits=-1, reverse=False):
    """
    Split strings by pattern.
    
    Parameters:
    - strings: Array, input strings
    - pattern: str, split pattern
    - max_splits: int, maximum splits (-1 for unlimited)
    - reverse: bool, split from right
    
    Returns:
    ListArray: Split components
    """

def split_pattern_regex(strings, pattern, max_splits=-1, reverse=False):
    """Split strings by regex pattern."""

def binary_join(lists, separator):
    """
    Join binary arrays with separator.
    
    Parameters:
    - lists: ListArray, lists of binary values
    - separator: bytes, join separator
    
    Returns:
    Array: Joined binary values
    """

def binary_join_element_wise(left, right, separator):
    """Element-wise binary join."""

Temporal Functions

Date, time, and timestamp manipulation functions for temporal data processing and calendar operations.

# Date/time extraction
def year(timestamps):
    """Extract year from timestamps."""

def month(timestamps):
    """Extract month from timestamps."""

def day(timestamps):
    """Extract day from timestamps."""

def day_of_week(timestamps, count_from_zero=True, week_start=1):
    """
    Extract day of week.
    
    Parameters:
    - timestamps: Array, timestamp array
    - count_from_zero: bool, whether to count from 0
    - week_start: int, first day of week (1=Monday, 7=Sunday)
    
    Returns:
    Int32Array: Day of week values
    """

def day_of_year(timestamps):
    """Extract day of year."""

def iso_week(timestamps):
    """Extract ISO week number."""

def iso_year(timestamps):
    """Extract ISO year."""

def quarter(timestamps):
    """Extract quarter."""

def hour(timestamps):
    """Extract hour from timestamps."""

def minute(timestamps):
    """Extract minute from timestamps."""

def second(timestamps):
    """Extract second from timestamps."""

def millisecond(timestamps):
    """Extract millisecond from timestamps."""

def microsecond(timestamps):
    """Extract microsecond from timestamps."""

def nanosecond(timestamps):
    """Extract nanosecond from timestamps."""

def subsecond(timestamps):
    """Extract fractional seconds."""

# Temporal arithmetic
def years_between(start, end):
    """Calculate years between timestamps."""

def month_interval_between(start, end):
    """Calculate month intervals between timestamps."""

def day_time_interval_between(start, end):
    """Calculate day-time intervals between timestamps."""

def weeks_between(start, end):
    """Calculate weeks between timestamps."""

def days_between(start, end):
    """Calculate days between timestamps."""

def hours_between(start, end):
    """Calculate hours between timestamps."""

def minutes_between(start, end):
    """Calculate minutes between timestamps."""

def seconds_between(start, end):
    """Calculate seconds between timestamps."""

def milliseconds_between(start, end):
    """Calculate milliseconds between timestamps."""

def microseconds_between(start, end):
    """Calculate microseconds between timestamps."""

def nanoseconds_between(start, end):
    """Calculate nanoseconds between timestamps."""

# Temporal rounding and truncation  
def floor_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False):
    """
    Floor timestamps to temporal unit.
    
    Parameters:
    - timestamps: Array, timestamp array
    - unit: str, temporal unit ('year', 'month', 'day', 'hour', etc.)
    - week_starts_monday: bool, week start day
    - ceil_is_strictly_greater: bool, ceiling behavior
    - calendar_based_origin: bool, use calendar-based origin
    
    Returns:
    Array: Floored timestamps
    """

def ceil_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False):
    """Ceil timestamps to temporal unit."""

def round_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False):
    """Round timestamps to temporal unit."""

# String parsing and formatting
def strftime(timestamps, format='%Y-%m-%d %H:%M:%S', locale='C'):
    """
    Format timestamps as strings.
    
    Parameters:
    - timestamps: Array, timestamp array
    - format: str, strftime format string
    - locale: str, locale for formatting
    
    Returns:
    StringArray: Formatted timestamp strings
    """

def strptime(strings, format, unit, error_is_null=False):
    """
    Parse strings as timestamps.
    
    Parameters:
    - strings: Array, string array
    - format: str, strptime format string
    - unit: str, timestamp unit
    - error_is_null: bool, return null on parse errors
    
    Returns:
    TimestampArray: Parsed timestamps
    """

# Timezone operations
def assume_timezone(timestamps, timezone, ambiguous='raise', nonexistent='raise'):
    """
    Assume timezone for naive timestamps.
    
    Parameters:
    - timestamps: Array, naive timestamp array
    - timezone: str, timezone identifier
    - ambiguous: str, how to handle ambiguous times
    - nonexistent: str, how to handle nonexistent times
    
    Returns:
    TimestampArray: Timezone-aware timestamps
    """

def local_timestamp(timestamps):
    """Convert to local timezone."""

Type Conversion Functions

Functions for casting and converting between different Arrow data types with configurable safety and behavior options.

def cast(array, target_type, safe=True, options=None):
    """
    Cast array to different type.
    
    Parameters:
    - array: Array, input array
    - target_type: DataType, target type
    - safe: bool, check for data loss
    - options: CastOptions, casting options
    
    Returns:
    Array: Cast array
    """

def can_cast(from_type, to_type):
    """
    Check if type can be cast.
    
    Parameters:
    - from_type: DataType, source type
    - to_type: DataType, target type
    
    Returns:
    bool: Whether cast is supported
    """

class CastOptions:
    """
    Options for type casting.
    
    Attributes:
    - safe: Whether to check for data loss
    - allow_int_overflow: Allow integer overflow
    - allow_time_truncate: Allow time truncation
    - allow_time_overflow: Allow time overflow  
    - allow_decimal_truncate: Allow decimal truncation
    - allow_float_truncate: Allow float truncation
    """

Random Number Generation

Functions for generating random numbers and sampling from distributions.

def random(n, initializer=None, options=None):
    """
    Generate random numbers.
    
    Parameters:
    - n: int, number of random values
    - initializer: int, random seed
    - options: RandomOptions, generation options
    
    Returns:
    Array: Random values
    """

class RandomOptions:
    """
    Options for random number generation.
    
    Attributes:
    - initializer: Random seed
    - distribution: Distribution type
    """

Usage Examples

Basic Computations

import pyarrow as pa
import pyarrow.compute as pc

# Create sample data
numbers = pa.array([1, 2, 3, 4, 5, None, 7, 8, 9, 10])
strings = pa.array(['apple', 'banana', 'cherry', None, 'date'])

# Arithmetic operations
doubled = pc.multiply(numbers, 2)
sum_result = pc.sum(numbers)
mean_result = pc.mean(numbers)

# String operations
lengths = pc.utf8_length(strings)
upper_strings = pc.utf8_upper(strings)
contains_a = pc.match_substring(strings, 'a')

# Filtering and selection
filtered = pc.filter(numbers, pc.greater(numbers, 5))
top_3 = pc.top_k_unstable(numbers, 3)

Table Operations

import pyarrow as pa
import pyarrow.compute as pc

# Create table
table = pa.table({
    'id': [1, 2, 3, 4, 5],
    'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
    'age': [25, 30, 35, 28, 32],
    'salary': [50000, 60000, 70000, 55000, 65000]
})

# Filter table
adults = table.filter(pc.greater_equal(table['age'], 30))

# Add computed column
table_with_bonus = table.add_column(
    'bonus',
    pc.multiply(table['salary'], 0.1)
)

# Aggregations
total_salary = pc.sum(table['salary'])
avg_age = pc.mean(table['age'])
age_stats = pc.quantile(table['age'], [0.25, 0.5, 0.75])

Complex Expressions

import pyarrow as pa
import pyarrow.compute as pc

# Create table with temporal data
table = pa.table({
    'timestamp': pa.array([
        '2023-01-15 10:30:00',
        '2023-02-20 14:45:00', 
        '2023-03-10 09:15:00',
        '2023-04-05 16:20:00'
    ], type=pa.timestamp('s')),
    'value': [100, 200, 150, 300]
})

# Extract temporal components
table = table.add_column('year', pc.year(table['timestamp']))
table = table.add_column('month', pc.month(table['timestamp']))
table = table.add_column('day_of_week', pc.day_of_week(table['timestamp']))

# Complex filtering
high_value_weekdays = table.filter(
    pc.and_(
        pc.greater(table['value'], 150),
        pc.less(table['day_of_week'], 5)  # Monday=0 to Friday=4
    )
)

# Conditional expressions
table = table.add_column(
    'category',
    pc.case_when(
        pc.less(table['value'], 150), 'low',
        pc.less(table['value'], 250), 'medium',
        'high'
    )
)

User-Defined Functions

import pyarrow as pa
import pyarrow.compute as pc

# Register scalar UDF
def double_and_add_one(x):
    return pc.add(pc.multiply(x, 2), 1)

pc.register_scalar_function(
    double_and_add_one,
    'double_and_add_one',
    doc='Double input and add one'
)

# Use registered function
result = pc.call_function('double_and_add_one', [pa.array([1, 2, 3, 4, 5])])
print(result)  # [3, 5, 7, 9, 11]

Install with Tessl CLI

npx tessl i tessl/pypi-pyarrow

docs

advanced-features.md

arrow-flight.md

compute-functions.md

core-data-structures.md

data-types.md

dataset-operations.md

file-formats.md

index.md

memory-io.md

tile.json