or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

builders.mddata-types.mdindex.mdio-operations.mdstreaming.mdtables.mdutilities.mdvectors.md
tile.json

vectors.mddocs/

Vectors

Vectors are immutable columnar arrays that store sequences of values efficiently in Apache Arrow's columnar memory format. They provide type-safe access to data with optimized operations for analytical workloads.

Capabilities

Vector Class

The core Vector class provides immutable, type-safe columnar data storage with efficient operations.

/**
 * Immutable columnar array of values with type T
 */
class Vector<T extends DataType = any> {
  /** The data type of elements in this vector */
  readonly type: T;
  
  /** Number of elements in the vector */
  readonly length: number;
  
  /** Number of null values in the vector */
  readonly nullCount: number;
  
  /** Underlying data storage */
  readonly data: Data<T>;
  
  /** TypedArray constructor for this vector's type */
  readonly ArrayType: T['ArrayType'];
  
  /** Get value at specified index */
  get(index: number): T['TValue'] | null;
  
  /** Set value at specified index (returns new vector) */
  set(index: number, value: T['TValue'] | null): void;
  
  /** Find first index of a value */
  indexOf(searchElement: T['TValue'], fromIndex?: number): number;
  
  /** Check if vector includes a value */
  includes(searchElement: T['TValue'], fromIndex?: number): boolean;
  
  /** Create a slice of the vector */
  slice(begin?: number, end?: number): Vector<T>;
  
  /** Concatenate with other vectors of same type */
  concat(...others: Vector<T>[]): Vector<T>;
  
  /** Convert to JavaScript array */
  toArray(): (T['TValue'] | null)[];
  
  /** Serialize to JSON */
  toJSON(): any[];
  
  /** String representation */
  toString(): string;
  
  // Iterator methods
  [Symbol.iterator](): IterableIterator<T['TValue'] | null>;
  values(): IterableIterator<T['TValue'] | null>;
  keys(): IterableIterator<number>;
  entries(): IterableIterator<[number, T['TValue'] | null]>;
  
  // Array-like properties
  readonly [index: number]: T['TValue'] | null;
}

Vector Creation Functions

Factory functions for creating vectors from various data sources.

/**
 * Create vector from Data object
 */
function makeVector<T extends DataType>(data: Data<T>): Vector<T>;
function makeVector<T extends DataType>(data: Vector<T>): Vector<T>;
function makeVector<T extends DataType>(data: DataProps<T>): Vector<T>;

/**
 * Create vector from JavaScript array with type inference
 */
function vectorFromArray<T = any>(
  data: readonly T[]
): Vector<DataTypeOf<T>>;

function vectorFromArray<T extends DataType>(
  data: readonly (T['TValue'] | null)[],
  type: T
): Vector<T>;

/**
 * Create vector from iterable with optional type
 */
function vectorFromArray<T = any>(
  data: Iterable<T>
): Vector<DataTypeOf<T>>;

Usage Examples:

import { vectorFromArray, Vector, Int32, Utf8, Float64 } from "apache-arrow";

// Type inference from array content
const numbers = vectorFromArray([1, 2, 3, null, 5]);
// Inferred as Vector<Dictionary<Utf8, Int32>> for efficiency

const strings = vectorFromArray(['hello', 'world', null]);
// Inferred as Vector<Dictionary<Utf8, Int32>>

const booleans = vectorFromArray([true, false, null, true]);
// Inferred as Vector<Bool>

// Explicit type specification
const int32Vector = vectorFromArray([1, 2, 3, 4, 5], new Int32());
const utf8Vector = vectorFromArray(['a', 'b', 'c'], new Utf8());
const float64Vector = vectorFromArray([1.1, 2.2, 3.3], new Float64());

Vector Data Access

Methods for accessing and querying vector data.

/**
 * Get value at index with null checking
 */
get(index: number): T['TValue'] | null;

/**
 * Check if value at index is null
 */
isValid(index: number): boolean;

/**
 * Get value at index (assumes non-null)
 */
getValue(index: number): T['TValue'];

/**
 * Find index of first occurrence of value
 */
indexOf(searchElement: T['TValue'], fromIndex?: number): number;

/**
 * Find index of last occurrence of value  
 */
lastIndexOf(searchElement: T['TValue'], fromIndex?: number): number;

/**
 * Check if vector contains value
 */
includes(searchElement: T['TValue'], fromIndex?: number): boolean;

/**
 * Find element matching predicate
 */
find(predicate: (value: T['TValue'] | null, index: number) => boolean): T['TValue'] | null;

/**
 * Find index of element matching predicate
 */
findIndex(predicate: (value: T['TValue'] | null, index: number) => boolean): number;

Usage Examples:

import { vectorFromArray } from "apache-arrow";

const vector = vectorFromArray([10, 20, null, 30, 40]);

console.log(vector.get(0));        // 10
console.log(vector.get(2));        // null
console.log(vector.isValid(2));    // false
console.log(vector.getValue(1));   // 20 (assumes non-null)

console.log(vector.indexOf(30));   // 3
console.log(vector.includes(20));  // true
console.log(vector.includes(99));  // false

const found = vector.find(value => value && value > 25); // 30
const foundIndex = vector.findIndex(value => value === 40); // 4

Vector Transformation

Methods for creating transformed copies of vectors.

/**
 * Create slice of vector
 */
slice(begin?: number, end?: number): Vector<T>;

/**
 * Concatenate vectors of same type
 */
concat(...others: Vector<T>[]): Vector<T>;

/**
 * Reverse the vector (creates new vector)
 */
reverse(): Vector<T>;

/**
 * Sort the vector (creates new vector)
 */
sort(compareFn?: (a: T['TValue'] | null, b: T['TValue'] | null) => number): Vector<T>;

/**
 * Filter vector elements (creates new vector)
 */
filter(predicate: (value: T['TValue'] | null, index: number) => boolean): Vector<T>;

/**
 * Map vector to new type (creates new vector)
 */
map<U>(
  callback: (value: T['TValue'] | null, index: number) => U,
  thisArg?: any
): Vector<DataTypeOf<U>>;

Usage Examples:

import { vectorFromArray } from "apache-arrow";

const numbers = vectorFromArray([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);

// Slicing
const slice1 = numbers.slice(2, 5);     // [3, 4, 5]
const slice2 = numbers.slice(5);        // [6, 7, 8, 9, 10]

// Concatenation
const moreNumbers = vectorFromArray([11, 12, 13]);
const combined = numbers.concat(moreNumbers); // [1,2,3,4,5,6,7,8,9,10,11,12,13]

// Filtering
const evenNumbers = numbers.filter(value => value !== null && value % 2 === 0);
// Result: [2, 4, 6, 8, 10]

// Mapping
const doubled = numbers.map(value => value !== null ? value * 2 : null);
// Result: [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

Vector Iteration

Methods for iterating over vector elements.

/**
 * Iterator returning values
 */
[Symbol.iterator](): IterableIterator<T['TValue'] | null>;

/**
 * Iterator returning values  
 */
values(): IterableIterator<T['TValue'] | null>;

/**
 * Iterator returning indices
 */
keys(): IterableIterator<number>;

/**
 * Iterator returning [index, value] pairs
 */
entries(): IterableIterator<[number, T['TValue'] | null]>;

/**
 * Execute function for each element
 */
forEach(
  callback: (value: T['TValue'] | null, index: number, vector: Vector<T>) => void,
  thisArg?: any
): void;

/**
 * Test if all elements pass predicate
 */
every(
  predicate: (value: T['TValue'] | null, index: number) => boolean,
  thisArg?: any
): boolean;

/**
 * Test if some elements pass predicate
 */
some(
  predicate: (value: T['TValue'] | null, index: number) => boolean,
  thisArg?: any
): boolean;

/**
 * Reduce vector to single value
 */
reduce<U>(
  callback: (accumulator: U, value: T['TValue'] | null, index: number) => U,
  initialValue?: U
): U;

Usage Examples:

import { vectorFromArray } from "apache-arrow";

const vector = vectorFromArray([1, 2, null, 4, 5]);

// For-of iteration
for (const value of vector) {
  console.log(value); // 1, 2, null, 4, 5
}

// Entries iteration
for (const [index, value] of vector.entries()) {
  console.log(`${index}: ${value}`); // 0: 1, 1: 2, 2: null, 3: 4, 4: 5
}

// forEach
vector.forEach((value, index) => {
  console.log(`Index ${index}: ${value}`);
});

// Reduce to sum (excluding nulls)
const sum = vector.reduce((acc, value) => {
  return acc + (value || 0);
}, 0); // Result: 12

// Test predicates
const hasNulls = vector.some(value => value === null); // true
const allPositive = vector.every(value => value === null || value > 0); // true

Vector Conversion

Methods for converting vectors to other formats.

/**
 * Convert to JavaScript array
 */
toArray(): (T['TValue'] | null)[];

/**
 * Convert to JSON serializable array
 */
toJSON(): any[];

/**
 * String representation for debugging
 */
toString(): string;

/**
 * Create Date objects from temporal vectors
 */
toDate(): Date[] | null; // Only available on temporal types

/**
 * Get typed array view of underlying data (when applicable)
 */
toTypedArray(): T['ArrayType'] | null;

Usage Examples:

import { vectorFromArray, TimestampMillisecond } from "apache-arrow";

const numbers = vectorFromArray([1, 2, 3, null, 5]);
const timestamps = vectorFromArray([
  1609459200000, // 2021-01-01
  1640995200000, // 2022-01-01
  null
], new TimestampMillisecond());

// Convert to arrays
console.log(numbers.toArray());    // [1, 2, 3, null, 5]
console.log(numbers.toJSON());     // [1, 2, 3, null, 5]

// String representation
console.log(numbers.toString());   // "Vector<Dictionary<Utf8, Int32>>[1, 2, 3, null, 5]"

// Temporal conversion
console.log(timestamps.toArray()); // [1609459200000, 1640995200000, null]

// Get underlying typed array (for numeric types)
const typedArray = numbers.data.values; // Access to underlying TypedArray

Vector Properties

Key properties providing metadata about the vector.

/**
 * Vector metadata properties
 */
interface VectorProperties<T> {
  /** Data type of the vector elements */
  readonly type: T;
  
  /** Total number of elements including nulls */
  readonly length: number;
  
  /** Number of null values in the vector */
  readonly nullCount: number;
  
  /** Underlying data storage container */
  readonly data: Data<T>;
  
  /** TypedArray constructor for this vector type */
  readonly ArrayType: T['ArrayType'];
  
  /** Array-like length property */
  readonly [index: number]: T['TValue'] | null;
  
  /** Chunks if the vector spans multiple record batches */
  readonly chunks?: Vector<T>[];
  
  /** Offset into the underlying data buffer */
  readonly offset?: number;
  
  /** Stride for accessing elements (usually 1) */
  readonly stride?: number;
}

Chunked Vectors

Vectors that span multiple record batches with chunked data access.

/**
 * Chunked vector spanning multiple record batches
 */
class ChunkedVector<T extends DataType = any> extends Vector<T> {
  /** Array of vector chunks */
  readonly chunks: Vector<T>[];
  
  /** Number of chunks */
  readonly numChunks: number;
  
  /** Get chunk containing the specified index */
  getChunk(index: number): Vector<T>;
  
  /** Get chunk index for element index */
  getChunkIndex(index: number): number;
  
  /** Search across all chunks */
  search(value: T['TValue'], fromIndex?: number): number;
}

Usage Examples:

import { Table, vectorFromArray } from "apache-arrow";

// Create table with multiple batches that creates chunked vectors
const table1 = tableFromArrays({ values: [1, 2, 3] });
const table2 = tableFromArrays({ values: [4, 5, 6] });
const combined = table1.concat(table2);

const chunkedVector = combined.getColumn('values');
console.log(chunkedVector.numChunks); // 2
console.log(chunkedVector.length);    // 6

// Access across chunks
console.log(chunkedVector.get(0)); // 1 (from first chunk)
console.log(chunkedVector.get(4)); // 5 (from second chunk)

Type-Specific Vector Operations

Numeric Vector Operations

Special operations available on numeric vectors.

/**
 * Numeric vector utilities
 */
interface NumericVectorOperations<T extends DataType> {
  /** Sum all non-null values */
  sum(): number | bigint;
  
  /** Find minimum value (excluding nulls) */
  min(): T['TValue'] | null;
  
  /** Find maximum value (excluding nulls) */
  max(): T['TValue'] | null;
  
  /** Calculate mean of non-null values */
  mean(): number;
  
  /** Count non-null values */
  count(): number;
}

String Vector Operations

Special operations for string vectors.

/**
 * String vector utilities
 */
interface StringVectorOperations {
  /** Join all non-null strings with separator */
  join(separator?: string): string;
  
  /** Check if any string matches pattern */
  matchesPattern(pattern: RegExp): boolean[];
  
  /** Get string lengths */
  lengths(): Vector<Int32>;
}

Best Practices

Memory Efficiency:

// Prefer vectorFromArray for small datasets
const small = vectorFromArray([1, 2, 3, 4, 5]);

// Use builders for large datasets or streaming construction
const builder = makeBuilder({ type: new Int32() });
for (let i = 0; i < 1000000; i++) {
  builder.append(i);
}
const large = builder.finish().toVector();

Type Safety:

// Explicit typing for better type inference
const typedVector = vectorFromArray([1, 2, 3], new Int32());
typedVector.get(0); // TypeScript knows this returns number | null

// Use type guards for safe operations
if (DataType.isInt(vector.type)) {
  // TypeScript knows vector contains integers
  const sum = vector.reduce((a, b) => (a || 0) + (b || 0), 0);
}

Performance Optimization:

// Avoid repeated array conversions
const array = vector.toArray(); // Convert once
for (let i = 0; i < array.length; i++) {
  // Use array[i] instead of vector.get(i)
}

// Use iterators for memory-efficient processing
for (const value of vector) {
  // Process value without creating intermediate arrays
}

Row Classes

Row classes provide JavaScript object interfaces for working with individual rows from structured data types like Map and Struct vectors.

MapRow

Provides a Map-like interface for working with individual Map vector entries.

/**
 * Row interface for Map vector entries
 */
class MapRow<K = any, V = any> {
  /** Get value by key */
  get(key: K): V | undefined;
  
  /** Set value by key */
  set(key: K, value: V): this;
  
  /** Check if key exists */
  has(key: K): boolean;
  
  /** Delete entry by key */
  delete(key: K): boolean;
  
  /** Clear all entries */
  clear(): void;
  
  /** Get all keys */
  keys(): IterableIterator<K>;
  
  /** Get all values */
  values(): IterableIterator<V>;
  
  /** Get all entries */
  entries(): IterableIterator<[K, V]>;
  
  /** Number of entries */
  readonly size: number;
  
  /** Iterate over entries */
  [Symbol.iterator](): IterableIterator<[K, V]>;
}

Usage:

import { vectorFromArray, Map_, Field, Utf8, Int32, MapRow } from "apache-arrow";

// Create a map vector
const mapVector = vectorFromArray([
  new Map([['a', 1], ['b', 2]]),
  new Map([['x', 10], ['y', 20]]),
], new Map_(
  new Field('entries', new Struct([
    new Field('key', new Utf8()),
    new Field('value', new Int32())
  ]))
));

// Access individual map rows
const firstRow = mapVector.get(0) as MapRow<string, number>;
console.log(firstRow.get('a')); // 1
console.log(firstRow.size);     // 2

// Iterate over map entries
for (const [key, value] of firstRow) {
  console.log(`${key}: ${value}`);
}

StructRow

Provides object-like interface for working with individual Struct vector entries.

/**
 * Row interface for Struct vector entries
 */
class StructRow<T = any> {
  /** Get field value by name */
  get<K extends keyof T>(key: K): T[K] | null;
  
  /** Set field value by name */
  set<K extends keyof T>(key: K, value: T[K] | null): this;
  
  /** Get field names */
  keys(): IterableIterator<keyof T>;
  
  /** Get field values */
  values(): IterableIterator<T[keyof T] | null>;
  
  /** Get field entries */
  entries(): IterableIterator<[keyof T, T[keyof T] | null]>;
  
  /** Convert to plain JavaScript object */
  toJSON(): { [K in keyof T]: T[K] | null };
  
  /** Iterate over fields */
  [Symbol.iterator](): IterableIterator<[keyof T, T[keyof T] | null]>;
}

/**
 * Type for struct row proxy object
 */
interface StructRowProxy<T = any> {
  [K in keyof T]: T[K] | null;
}

Usage:

import { vectorFromArray, Struct, Field, Utf8, Int32, Bool, StructRow } from "apache-arrow";

// Create a struct vector
const structVector = vectorFromArray([
  { name: 'Alice', age: 25, active: true },
  { name: 'Bob', age: 30, active: false },
], new Struct([
  new Field('name', new Utf8()),
  new Field('age', new Int32()),
  new Field('active', new Bool())
]));

// Access individual struct rows
const firstRow = structVector.get(0) as StructRow<{name: string, age: number, active: boolean}>;
console.log(firstRow.get('name'));   // 'Alice'
console.log(firstRow.get('age'));    // 25
console.log(firstRow.get('active')); // true

// Convert to plain object
const plainObject = firstRow.toJSON();
console.log(plainObject); // { name: 'Alice', age: 25, active: true }

// Iterate over fields
for (const [fieldName, value] of firstRow) {
  console.log(`${fieldName}: ${value}`);
}