or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-features.mdarrow-flight.mdcompute-functions.mdcore-data-structures.mddata-types.mddataset-operations.mdfile-formats.mdindex.mdmemory-io.md

compute-functions.mddocs/

0

# Compute Functions

1

2

High-performance vectorized compute operations providing 200+ functions optimized for columnar data. The compute engine enables efficient mathematical operations, string processing, temporal calculations, aggregations, and filtering on Arrow arrays and tables.

3

4

## Capabilities

5

6

### Core Compute Infrastructure

7

8

Function registration, execution, and expression system for building complex computational pipelines with lazy evaluation and optimization.

9

10

```python { .api }

11

def call_function(name, args, options=None, memory_pool=None):

12

"""

13

Call compute function by name.

14

15

Parameters:

16

- name: str, function name

17

- args: list, function arguments (arrays, scalars, tables)

18

- options: FunctionOptions, function-specific options

19

- memory_pool: MemoryPool, memory pool for allocations

20

21

Returns:

22

Array, Scalar, or Table: Result of computation

23

"""

24

25

def get_function(name):

26

"""

27

Get registered function by name.

28

29

Parameters:

30

- name: str, function name

31

32

Returns:

33

Function: Registered function object

34

"""

35

36

def list_functions():

37

"""

38

List all available function names.

39

40

Returns:

41

list of str: Available function names

42

"""

43

44

def function_registry():

45

"""

46

Get global function registry.

47

48

Returns:

49

FunctionRegistry: Global function registry

50

"""

51

52

class Expression:

53

"""

54

Compute expression for lazy evaluation and optimization.

55

"""

56

57

def equals(self, other):

58

"""Check equality with another expression."""

59

60

def to_string(self):

61

"""String representation of expression."""

62

63

def field(name):

64

"""

65

Create field reference expression.

66

67

Parameters:

68

- name: str, field name

69

70

Returns:

71

Expression: Field reference expression

72

"""

73

74

def scalar(value):

75

"""

76

Create scalar literal expression.

77

78

Parameters:

79

- value: scalar value

80

81

Returns:

82

Expression: Scalar literal expression

83

"""

84

85

class Function:

86

"""Base class for compute functions."""

87

88

@property

89

def name(self):

90

"""Function name."""

91

92

@property

93

def arity(self):

94

"""Function arity (number of arguments)."""

95

96

@property

97

def doc(self):

98

"""Function documentation."""

99

100

class FunctionOptions:

101

"""Base class for function options."""

102

103

class FunctionRegistry:

104

"""Registry of available compute functions."""

105

106

def get_function(self, name):

107

"""Get function by name."""

108

109

def get_function_names(self):

110

"""Get all function names."""

111

```

112

113

### Mathematical Operations

114

115

Arithmetic operations, mathematical functions, and numeric computations optimized for columnar data processing.

116

117

```python { .api }

118

# Arithmetic operations

119

def add(x, y):

120

"""Element-wise addition."""

121

122

def subtract(x, y):

123

"""Element-wise subtraction."""

124

125

def multiply(x, y):

126

"""Element-wise multiplication."""

127

128

def divide(x, y):

129

"""Element-wise division."""

130

131

def power(base, exponent):

132

"""Element-wise exponentiation."""

133

134

def negate(x):

135

"""Element-wise negation."""

136

137

def abs(x):

138

"""Element-wise absolute value."""

139

140

def sign(x):

141

"""Element-wise sign (-1, 0, 1)."""

142

143

# Mathematical functions

144

def sqrt(x):

145

"""Element-wise square root."""

146

147

def exp(x):

148

"""Element-wise exponential (e^x)."""

149

150

def ln(x):

151

"""Element-wise natural logarithm."""

152

153

def log10(x):

154

"""Element-wise base-10 logarithm."""

155

156

def log2(x):

157

"""Element-wise base-2 logarithm."""

158

159

def log1p(x):

160

"""Element-wise log(1 + x)."""

161

162

def floor(x):

163

"""Element-wise floor."""

164

165

def ceil(x):

166

"""Element-wise ceiling."""

167

168

def trunc(x):

169

"""Element-wise truncation toward zero."""

170

171

def round(x, ndigits=0, round_mode='half_to_even'):

172

"""

173

Element-wise rounding.

174

175

Parameters:

176

- x: Array, input array

177

- ndigits: int, number of decimal places

178

- round_mode: str, rounding mode

179

180

Returns:

181

Array: Rounded array

182

"""

183

184

# Trigonometric functions

185

def sin(x):

186

"""Element-wise sine."""

187

188

def cos(x):

189

"""Element-wise cosine."""

190

191

def tan(x):

192

"""Element-wise tangent."""

193

194

def asin(x):

195

"""Element-wise arcsine."""

196

197

def acos(x):

198

"""Element-wise arccosine."""

199

200

def atan(x):

201

"""Element-wise arctangent."""

202

203

def atan2(y, x):

204

"""Element-wise arctangent of y/x."""

205

206

# Bitwise operations

207

def bit_wise_and(x, y):

208

"""Element-wise bitwise AND."""

209

210

def bit_wise_or(x, y):

211

"""Element-wise bitwise OR."""

212

213

def bit_wise_xor(x, y):

214

"""Element-wise bitwise XOR."""

215

216

def bit_wise_not(x):

217

"""Element-wise bitwise NOT."""

218

219

def shift_left(x, y):

220

"""Element-wise left bit shift."""

221

222

def shift_right(x, y):

223

"""Element-wise right bit shift."""

224

```

225

226

### Comparison and Logical Operations

227

228

Element-wise comparisons, logical operations, and boolean functions for filtering and conditional logic.

229

230

```python { .api }

231

# Comparison operations

232

def equal(x, y):

233

"""Element-wise equality comparison."""

234

235

def not_equal(x, y):

236

"""Element-wise inequality comparison."""

237

238

def less(x, y):

239

"""Element-wise less than comparison."""

240

241

def less_equal(x, y):

242

"""Element-wise less than or equal comparison."""

243

244

def greater(x, y):

245

"""Element-wise greater than comparison."""

246

247

def greater_equal(x, y):

248

"""Element-wise greater than or equal comparison."""

249

250

# Logical operations

251

def and_(x, y):

252

"""Element-wise logical AND."""

253

254

def or_(x, y):

255

"""Element-wise logical OR."""

256

257

def xor(x, y):

258

"""Element-wise logical XOR."""

259

260

def invert(x):

261

"""Element-wise logical NOT."""

262

263

# Null handling

264

def is_null(x):

265

"""Check for null values."""

266

267

def is_valid(x):

268

"""Check for non-null values."""

269

270

def is_nan(x):

271

"""Check for NaN values (floating point)."""

272

273

def is_finite(x):

274

"""Check for finite values."""

275

276

def is_infinite(x):

277

"""Check for infinite values."""

278

279

def fill_null(values, fill_value):

280

"""Fill null values with specified value."""

281

282

def coalesce(*arrays):

283

"""Return first non-null value from arrays."""

284

285

def choose(indices, *arrays):

286

"""Choose values from arrays based on indices."""

287

288

def if_else(condition, left, right):

289

"""Conditional selection (ternary operator)."""

290

291

def case_when(*args):

292

"""

293

Multi-branch conditional selection.

294

295

Parameters:

296

- args: alternating condition/value pairs, optional else value

297

298

Returns:

299

Array: Selected values based on conditions

300

"""

301

```

302

303

### Aggregation Functions

304

305

Statistical and aggregation functions for computing summary statistics and reductions over arrays and groups.

306

307

```python { .api }

308

# Basic aggregations

309

def sum(array, skip_nulls=True, min_count=1):

310

"""

311

Sum of array elements.

312

313

Parameters:

314

- array: Array, input array

315

- skip_nulls: bool, ignore null values

316

- min_count: int, minimum non-null values required

317

318

Returns:

319

Scalar: Sum of elements

320

"""

321

322

def mean(array, skip_nulls=True, min_count=1):

323

"""Mean of array elements."""

324

325

def count(array, mode='only_valid'):

326

"""

327

Count array elements.

328

329

Parameters:

330

- array: Array, input array

331

- mode: str, counting mode ('only_valid', 'only_null', 'all')

332

333

Returns:

334

Scalar: Count of elements

335

"""

336

337

def count_distinct(array, mode='only_valid'):

338

"""Count distinct elements."""

339

340

def min(array, skip_nulls=True, min_count=1):

341

"""Minimum value."""

342

343

def max(array, skip_nulls=True, min_count=1):

344

"""Maximum value."""

345

346

def min_max(array, skip_nulls=True, min_count=1):

347

"""

348

Minimum and maximum values.

349

350

Returns:

351

StructScalar: Struct with 'min' and 'max' fields

352

"""

353

354

def any(array, skip_nulls=True, min_count=1):

355

"""Logical OR reduction (any true values)."""

356

357

def all(array, skip_nulls=True, min_count=1):

358

"""Logical AND reduction (all true values)."""

359

360

# Statistical functions

361

def variance(array, ddof=0, skip_nulls=True, min_count=1):

362

"""

363

Variance of array elements.

364

365

Parameters:

366

- array: Array, input array

367

- ddof: int, delta degrees of freedom

368

- skip_nulls: bool, ignore null values

369

- min_count: int, minimum non-null values required

370

371

Returns:

372

Scalar: Variance

373

"""

374

375

def stddev(array, ddof=0, skip_nulls=True, min_count=1):

376

"""Standard deviation."""

377

378

def quantile(array, q=0.5, interpolation='linear', skip_nulls=True, min_count=1):

379

"""

380

Quantile of array elements.

381

382

Parameters:

383

- array: Array, input array

384

- q: float or list, quantile(s) to compute (0.0 to 1.0)

385

- interpolation: str, interpolation method

386

- skip_nulls: bool, ignore null values

387

- min_count: int, minimum non-null values required

388

389

Returns:

390

Scalar or Array: Quantile value(s)

391

"""

392

393

def mode(array, n=1, skip_nulls=True, min_count=1):

394

"""

395

Mode (most frequent values).

396

397

Parameters:

398

- array: Array, input array

399

- n: int, number of modes to return

400

- skip_nulls: bool, ignore null values

401

- min_count: int, minimum non-null values required

402

403

Returns:

404

StructArray: Modes with counts

405

"""

406

407

def tdigest(array, q=None, delta=100, buffer_size=500, skip_nulls=True, min_count=1):

408

"""

409

T-Digest quantile approximation.

410

411

Parameters:

412

- array: Array, input array

413

- q: list of float, quantiles to compute

414

- delta: int, compression parameter

415

- buffer_size: int, buffer size

416

- skip_nulls: bool, ignore null values

417

- min_count: int, minimum non-null values required

418

419

Returns:

420

Array: Approximate quantiles

421

"""

422

423

# Product and cumulative operations

424

def product(array, skip_nulls=True, min_count=1):

425

"""Product of array elements."""

426

427

def cumulative_sum(array, start=None, skip_nulls=True):

428

"""

429

Cumulative sum.

430

431

Parameters:

432

- array: Array, input array

433

- start: scalar, starting value

434

- skip_nulls: bool, ignore null values

435

436

Returns:

437

Array: Cumulative sums

438

"""

439

440

def cumulative_sum_checked(array, start=None, skip_nulls=True):

441

"""Cumulative sum with overflow checking."""

442

443

def cumulative_prod(array, start=None, skip_nulls=True):

444

"""Cumulative product."""

445

446

def cumulative_max(array, skip_nulls=True):

447

"""Cumulative maximum."""

448

449

def cumulative_min(array, skip_nulls=True):

450

"""Cumulative minimum."""

451

```

452

453

### Array Operations

454

455

Functions for array manipulation, filtering, sorting, and selection operations.

456

457

```python { .api }

458

def take(data, indices, boundscheck=True):

459

"""

460

Select elements by indices.

461

462

Parameters:

463

- data: Array, input array

464

- indices: Array, selection indices

465

- boundscheck: bool, check index bounds

466

467

Returns:

468

Array: Selected elements

469

"""

470

471

def filter(data, selection_filter, null_selection_behavior='drop'):

472

"""

473

Filter array by boolean mask.

474

475

Parameters:

476

- data: Array, input array

477

- selection_filter: Array, boolean selection mask

478

- null_selection_behavior: str, how to handle nulls in mask

479

480

Returns:

481

Array: Filtered elements

482

"""

483

484

def slice(array, start, stop=None, step=1):

485

"""

486

Slice array.

487

488

Parameters:

489

- array: Array, input array

490

- start: int, start index

491

- stop: int, stop index (exclusive)

492

- step: int, step size

493

494

Returns:

495

Array: Sliced array

496

"""

497

498

def array_sort_indices(array, order='ascending', null_placement='at_end'):

499

"""

500

Get indices that would sort array.

501

502

Parameters:

503

- array: Array, input array

504

- order: str, sort order ('ascending', 'descending')

505

- null_placement: str, null placement ('at_start', 'at_end')

506

507

Returns:

508

Array: Sort indices

509

"""

510

511

def sort_indices(arrays, orders=None, null_placement=None):

512

"""

513

Get indices for sorting by multiple arrays.

514

515

Parameters:

516

- arrays: list of Array, sort keys

517

- orders: list of str, sort orders for each key

518

- null_placement: list of str, null placement for each key

519

520

Returns:

521

Array: Sort indices

522

"""

523

524

def partition_nth_indices(array, pivot, null_placement='at_end'):

525

"""

526

Partition array around nth element.

527

528

Parameters:

529

- array: Array, input array

530

- pivot: int, pivot index

531

- null_placement: str, null placement

532

533

Returns:

534

Array: Partition indices

535

"""

536

537

def top_k_unstable(array, k, sort_keys=None):

538

"""

539

Select top k elements (unstable sort).

540

541

Parameters:

542

- array: Array, input array

543

- k: int, number of elements to select

544

- sort_keys: list, sort keys for selection

545

546

Returns:

547

Array: Top k elements

548

"""

549

550

def bottom_k_unstable(array, k, sort_keys=None):

551

"""

552

Select bottom k elements (unstable sort).

553

554

Parameters:

555

- array: Array, input array

556

- k: int, number of elements to select

557

- sort_keys: list, sort keys for selection

558

559

Returns:

560

Array: Bottom k elements

561

"""

562

563

def unique(array):

564

"""

565

Get unique values.

566

567

Parameters:

568

- array: Array, input array

569

570

Returns:

571

Array: Unique values

572

"""

573

574

def value_counts(array):

575

"""

576

Count occurrences of each value.

577

578

Parameters:

579

- array: Array, input array

580

581

Returns:

582

StructArray: Values and their counts

583

"""

584

585

def dictionary_encode(array, null_encoding_behavior='mask'):

586

"""

587

Dictionary encode array.

588

589

Parameters:

590

- array: Array, input array

591

- null_encoding_behavior: str, null handling

592

593

Returns:

594

DictionaryArray: Dictionary encoded array

595

"""

596

597

def run_end_encode(array):

598

"""

599

Run-end encode array.

600

601

Parameters:

602

- array: Array, input array

603

604

Returns:

605

RunEndEncodedArray: Run-end encoded array

606

"""

607

```

608

609

### String Functions

610

611

Comprehensive string processing functions for text manipulation, pattern matching, and string transformations.

612

613

```python { .api }

614

# String length and properties

615

def utf8_length(strings):

616

"""UTF-8 character length of strings."""

617

618

def binary_length(strings):

619

"""Byte length of binary/string arrays."""

620

621

def utf8_is_alnum(strings):

622

"""Check if strings are alphanumeric."""

623

624

def utf8_is_alpha(strings):

625

"""Check if strings are alphabetic."""

626

627

def utf8_is_decimal(strings):

628

"""Check if strings are decimal."""

629

630

def utf8_is_digit(strings):

631

"""Check if strings contain only digits."""

632

633

def utf8_is_lower(strings):

634

"""Check if strings are lowercase."""

635

636

def utf8_is_numeric(strings):

637

"""Check if strings are numeric."""

638

639

def utf8_is_printable(strings):

640

"""Check if strings are printable."""

641

642

def utf8_is_space(strings):

643

"""Check if strings are whitespace."""

644

645

def utf8_is_title(strings):

646

"""Check if strings are titlecased."""

647

648

def utf8_is_upper(strings):

649

"""Check if strings are uppercase."""

650

651

# String transformations

652

def utf8_upper(strings):

653

"""Convert strings to uppercase."""

654

655

def utf8_lower(strings):

656

"""Convert strings to lowercase."""

657

658

def utf8_swapcase(strings):

659

"""Swap case of strings."""

660

661

def utf8_capitalize(strings):

662

"""Capitalize first character."""

663

664

def utf8_title(strings):

665

"""Convert to title case."""

666

667

def ascii_upper(strings):

668

"""Convert ASCII strings to uppercase."""

669

670

def ascii_lower(strings):

671

"""Convert ASCII strings to lowercase."""

672

673

def ascii_swapcase(strings):

674

"""Swap case of ASCII strings."""

675

676

def ascii_capitalize(strings):

677

"""Capitalize ASCII strings."""

678

679

# String padding and trimming

680

def utf8_ltrim(strings, characters=' '):

681

"""

682

Left trim strings.

683

684

Parameters:

685

- strings: Array, input strings

686

- characters: str, characters to trim

687

688

Returns:

689

Array: Left-trimmed strings

690

"""

691

692

def utf8_rtrim(strings, characters=' '):

693

"""Right trim strings."""

694

695

def utf8_trim(strings, characters=' '):

696

"""Trim strings from both ends."""

697

698

def utf8_ltrim_whitespace(strings):

699

"""Left trim whitespace."""

700

701

def utf8_rtrim_whitespace(strings):

702

"""Right trim whitespace."""

703

704

def utf8_trim_whitespace(strings):

705

"""Trim whitespace from both ends."""

706

707

def utf8_center(strings, width, padding=' '):

708

"""

709

Center strings with padding.

710

711

Parameters:

712

- strings: Array, input strings

713

- width: int, total width

714

- padding: str, padding character

715

716

Returns:

717

Array: Centered strings

718

"""

719

720

def utf8_lpad(strings, width, padding=' '):

721

"""Left pad strings."""

722

723

def utf8_rpad(strings, width, padding=' '):

724

"""Right pad strings."""

725

726

# String slicing and extraction

727

def utf8_slice_codeunits(strings, start, stop=None, step=1):

728

"""

729

Slice strings by code units.

730

731

Parameters:

732

- strings: Array, input strings

733

- start: int, start position

734

- stop: int, stop position

735

- step: int, step size

736

737

Returns:

738

Array: Sliced strings

739

"""

740

741

def utf8_reverse(strings):

742

"""Reverse strings."""

743

744

def utf8_replace_slice(strings, start, stop, replacement):

745

"""

746

Replace slice of strings.

747

748

Parameters:

749

- strings: Array, input strings

750

- start: int, start position

751

- stop: int, stop position

752

- replacement: str, replacement string

753

754

Returns:

755

Array: Strings with replaced slices

756

"""

757

758

# String searching and matching

759

def match_substring(strings, pattern, ignore_case=False):

760

"""

761

Check if strings contain substring.

762

763

Parameters:

764

- strings: Array, input strings

765

- pattern: str, substring pattern

766

- ignore_case: bool, case insensitive matching

767

768

Returns:

769

BooleanArray: Match results

770

"""

771

772

def match_substring_regex(strings, pattern, ignore_case=False):

773

"""

774

Check if strings match regex pattern.

775

776

Parameters:

777

- strings: Array, input strings

778

- pattern: str, regex pattern

779

- ignore_case: bool, case insensitive matching

780

781

Returns:

782

BooleanArray: Match results

783

"""

784

785

def find_substring(strings, pattern, ignore_case=False):

786

"""

787

Find first occurrence of substring.

788

789

Parameters:

790

- strings: Array, input strings

791

- pattern: str, substring pattern

792

- ignore_case: bool, case insensitive search

793

794

Returns:

795

Int32Array: First occurrence indices (-1 if not found)

796

"""

797

798

def find_substring_regex(strings, pattern, ignore_case=False):

799

"""Find first regex match."""

800

801

def count_substring(strings, pattern, ignore_case=False):

802

"""

803

Count occurrences of substring.

804

805

Parameters:

806

- strings: Array, input strings

807

- pattern: str, substring pattern

808

- ignore_case: bool, case insensitive counting

809

810

Returns:

811

Int32Array: Occurrence counts

812

"""

813

814

def count_substring_regex(strings, pattern, ignore_case=False):

815

"""Count regex matches."""

816

817

# String replacement

818

def replace_substring(strings, pattern, replacement, max_replacements=-1):

819

"""

820

Replace substring occurrences.

821

822

Parameters:

823

- strings: Array, input strings

824

- pattern: str, substring to replace

825

- replacement: str, replacement string

826

- max_replacements: int, maximum replacements (-1 for all)

827

828

Returns:

829

Array: Strings with replacements

830

"""

831

832

def replace_substring_regex(strings, pattern, replacement, max_replacements=-1):

833

"""Replace regex matches."""

834

835

def extract_regex(strings, pattern):

836

"""

837

Extract regex groups.

838

839

Parameters:

840

- strings: Array, input strings

841

- pattern: str, regex pattern with groups

842

843

Returns:

844

StructArray: Extracted groups

845

"""

846

847

# String splitting and joining

848

def split_pattern(strings, pattern, max_splits=-1, reverse=False):

849

"""

850

Split strings by pattern.

851

852

Parameters:

853

- strings: Array, input strings

854

- pattern: str, split pattern

855

- max_splits: int, maximum splits (-1 for unlimited)

856

- reverse: bool, split from right

857

858

Returns:

859

ListArray: Split components

860

"""

861

862

def split_pattern_regex(strings, pattern, max_splits=-1, reverse=False):

863

"""Split strings by regex pattern."""

864

865

def binary_join(lists, separator):

866

"""

867

Join binary arrays with separator.

868

869

Parameters:

870

- lists: ListArray, lists of binary values

871

- separator: bytes, join separator

872

873

Returns:

874

Array: Joined binary values

875

"""

876

877

def binary_join_element_wise(left, right, separator):

878

"""Element-wise binary join."""

879

```

880

881

### Temporal Functions

882

883

Date, time, and timestamp manipulation functions for temporal data processing and calendar operations.

884

885

```python { .api }

886

# Date/time extraction

887

def year(timestamps):

888

"""Extract year from timestamps."""

889

890

def month(timestamps):

891

"""Extract month from timestamps."""

892

893

def day(timestamps):

894

"""Extract day from timestamps."""

895

896

def day_of_week(timestamps, count_from_zero=True, week_start=1):

897

"""

898

Extract day of week.

899

900

Parameters:

901

- timestamps: Array, timestamp array

902

- count_from_zero: bool, whether to count from 0

903

- week_start: int, first day of week (1=Monday, 7=Sunday)

904

905

Returns:

906

Int32Array: Day of week values

907

"""

908

909

def day_of_year(timestamps):

910

"""Extract day of year."""

911

912

def iso_week(timestamps):

913

"""Extract ISO week number."""

914

915

def iso_year(timestamps):

916

"""Extract ISO year."""

917

918

def quarter(timestamps):

919

"""Extract quarter."""

920

921

def hour(timestamps):

922

"""Extract hour from timestamps."""

923

924

def minute(timestamps):

925

"""Extract minute from timestamps."""

926

927

def second(timestamps):

928

"""Extract second from timestamps."""

929

930

def millisecond(timestamps):

931

"""Extract millisecond from timestamps."""

932

933

def microsecond(timestamps):

934

"""Extract microsecond from timestamps."""

935

936

def nanosecond(timestamps):

937

"""Extract nanosecond from timestamps."""

938

939

def subsecond(timestamps):

940

"""Extract fractional seconds."""

941

942

# Temporal arithmetic

943

def years_between(start, end):

944

"""Calculate years between timestamps."""

945

946

def month_interval_between(start, end):

947

"""Calculate month intervals between timestamps."""

948

949

def day_time_interval_between(start, end):

950

"""Calculate day-time intervals between timestamps."""

951

952

def weeks_between(start, end):

953

"""Calculate weeks between timestamps."""

954

955

def days_between(start, end):

956

"""Calculate days between timestamps."""

957

958

def hours_between(start, end):

959

"""Calculate hours between timestamps."""

960

961

def minutes_between(start, end):

962

"""Calculate minutes between timestamps."""

963

964

def seconds_between(start, end):

965

"""Calculate seconds between timestamps."""

966

967

def milliseconds_between(start, end):

968

"""Calculate milliseconds between timestamps."""

969

970

def microseconds_between(start, end):

971

"""Calculate microseconds between timestamps."""

972

973

def nanoseconds_between(start, end):

974

"""Calculate nanoseconds between timestamps."""

975

976

# Temporal rounding and truncation

977

def floor_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False):

978

"""

979

Floor timestamps to temporal unit.

980

981

Parameters:

982

- timestamps: Array, timestamp array

983

- unit: str, temporal unit ('year', 'month', 'day', 'hour', etc.)

984

- week_starts_monday: bool, week start day

985

- ceil_is_strictly_greater: bool, ceiling behavior

986

- calendar_based_origin: bool, use calendar-based origin

987

988

Returns:

989

Array: Floored timestamps

990

"""

991

992

def ceil_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False):

993

"""Ceil timestamps to temporal unit."""

994

995

def round_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False):

996

"""Round timestamps to temporal unit."""

997

998

# String parsing and formatting

999

def strftime(timestamps, format='%Y-%m-%d %H:%M:%S', locale='C'):

1000

"""

1001

Format timestamps as strings.

1002

1003

Parameters:

1004

- timestamps: Array, timestamp array

1005

- format: str, strftime format string

1006

- locale: str, locale for formatting

1007

1008

Returns:

1009

StringArray: Formatted timestamp strings

1010

"""

1011

1012

def strptime(strings, format, unit, error_is_null=False):

1013

"""

1014

Parse strings as timestamps.

1015

1016

Parameters:

1017

- strings: Array, string array

1018

- format: str, strptime format string

1019

- unit: str, timestamp unit

1020

- error_is_null: bool, return null on parse errors

1021

1022

Returns:

1023

TimestampArray: Parsed timestamps

1024

"""

1025

1026

# Timezone operations

1027

def assume_timezone(timestamps, timezone, ambiguous='raise', nonexistent='raise'):

1028

"""

1029

Assume timezone for naive timestamps.

1030

1031

Parameters:

1032

- timestamps: Array, naive timestamp array

1033

- timezone: str, timezone identifier

1034

- ambiguous: str, how to handle ambiguous times

1035

- nonexistent: str, how to handle nonexistent times

1036

1037

Returns:

1038

TimestampArray: Timezone-aware timestamps

1039

"""

1040

1041

def local_timestamp(timestamps):

1042

"""Convert to local timezone."""

1043

```

1044

1045

### Type Conversion Functions

1046

1047

Functions for casting and converting between different Arrow data types with configurable safety and behavior options.

1048

1049

```python { .api }

1050

def cast(array, target_type, safe=True, options=None):

1051

"""

1052

Cast array to different type.

1053

1054

Parameters:

1055

- array: Array, input array

1056

- target_type: DataType, target type

1057

- safe: bool, check for data loss

1058

- options: CastOptions, casting options

1059

1060

Returns:

1061

Array: Cast array

1062

"""

1063

1064

def can_cast(from_type, to_type):

1065

"""

1066

Check if type can be cast.

1067

1068

Parameters:

1069

- from_type: DataType, source type

1070

- to_type: DataType, target type

1071

1072

Returns:

1073

bool: Whether cast is supported

1074

"""

1075

1076

class CastOptions:

1077

"""

1078

Options for type casting.

1079

1080

Attributes:

1081

- safe: Whether to check for data loss

1082

- allow_int_overflow: Allow integer overflow

1083

- allow_time_truncate: Allow time truncation

1084

- allow_time_overflow: Allow time overflow

1085

- allow_decimal_truncate: Allow decimal truncation

1086

- allow_float_truncate: Allow float truncation

1087

"""

1088

```

1089

1090

### Random Number Generation

1091

1092

Functions for generating random numbers and sampling from distributions.

1093

1094

```python { .api }

1095

def random(n, initializer=None, options=None):

1096

"""

1097

Generate random numbers.

1098

1099

Parameters:

1100

- n: int, number of random values

1101

- initializer: int, random seed

1102

- options: RandomOptions, generation options

1103

1104

Returns:

1105

Array: Random values

1106

"""

1107

1108

class RandomOptions:

1109

"""

1110

Options for random number generation.

1111

1112

Attributes:

1113

- initializer: Random seed

1114

- distribution: Distribution type

1115

"""

1116

```

1117

1118

## Usage Examples

1119

1120

### Basic Computations

1121

1122

```python

1123

import pyarrow as pa

1124

import pyarrow.compute as pc

1125

1126

# Create sample data

1127

numbers = pa.array([1, 2, 3, 4, 5, None, 7, 8, 9, 10])

1128

strings = pa.array(['apple', 'banana', 'cherry', None, 'date'])

1129

1130

# Arithmetic operations

1131

doubled = pc.multiply(numbers, 2)

1132

sum_result = pc.sum(numbers)

1133

mean_result = pc.mean(numbers)

1134

1135

# String operations

1136

lengths = pc.utf8_length(strings)

1137

upper_strings = pc.utf8_upper(strings)

1138

contains_a = pc.match_substring(strings, 'a')

1139

1140

# Filtering and selection

1141

filtered = pc.filter(numbers, pc.greater(numbers, 5))

1142

top_3 = pc.top_k_unstable(numbers, 3)

1143

```

1144

1145

### Table Operations

1146

1147

```python

1148

import pyarrow as pa

1149

import pyarrow.compute as pc

1150

1151

# Create table

1152

table = pa.table({

1153

'id': [1, 2, 3, 4, 5],

1154

'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],

1155

'age': [25, 30, 35, 28, 32],

1156

'salary': [50000, 60000, 70000, 55000, 65000]

1157

})

1158

1159

# Filter table

1160

adults = table.filter(pc.greater_equal(table['age'], 30))

1161

1162

# Add computed column

1163

table_with_bonus = table.add_column(

1164

'bonus',

1165

pc.multiply(table['salary'], 0.1)

1166

)

1167

1168

# Aggregations

1169

total_salary = pc.sum(table['salary'])

1170

avg_age = pc.mean(table['age'])

1171

age_stats = pc.quantile(table['age'], [0.25, 0.5, 0.75])

1172

```

1173

1174

### Complex Expressions

1175

1176

```python

1177

import pyarrow as pa

1178

import pyarrow.compute as pc

1179

1180

# Create table with temporal data

1181

table = pa.table({

1182

'timestamp': pa.array([

1183

'2023-01-15 10:30:00',

1184

'2023-02-20 14:45:00',

1185

'2023-03-10 09:15:00',

1186

'2023-04-05 16:20:00'

1187

], type=pa.timestamp('s')),

1188

'value': [100, 200, 150, 300]

1189

})

1190

1191

# Extract temporal components

1192

table = table.add_column('year', pc.year(table['timestamp']))

1193

table = table.add_column('month', pc.month(table['timestamp']))

1194

table = table.add_column('day_of_week', pc.day_of_week(table['timestamp']))

1195

1196

# Complex filtering

1197

high_value_weekdays = table.filter(

1198

pc.and_(

1199

pc.greater(table['value'], 150),

1200

pc.less(table['day_of_week'], 5) # Monday=0 to Friday=4

1201

)

1202

)

1203

1204

# Conditional expressions

1205

table = table.add_column(

1206

'category',

1207

pc.case_when(

1208

pc.less(table['value'], 150), 'low',

1209

pc.less(table['value'], 250), 'medium',

1210

'high'

1211

)

1212

)

1213

```

1214

1215

### User-Defined Functions

1216

1217

```python

1218

import pyarrow as pa

1219

import pyarrow.compute as pc

1220

1221

# Register scalar UDF

1222

def double_and_add_one(x):

1223

return pc.add(pc.multiply(x, 2), 1)

1224

1225

pc.register_scalar_function(

1226

double_and_add_one,

1227

'double_and_add_one',

1228

doc='Double input and add one'

1229

)

1230

1231

# Use registered function

1232

result = pc.call_function('double_and_add_one', [pa.array([1, 2, 3, 4, 5])])

1233

print(result) # [3, 5, 7, 9, 11]

1234

```