Vectors are immutable columnar arrays that store sequences of values efficiently in Apache Arrow's columnar memory format. They provide type-safe access to data with optimized operations for analytical workloads.
The core Vector class provides immutable, type-safe columnar data storage with efficient operations.
/**
* Immutable columnar array of values with type T
*/
class Vector<T extends DataType = any> {
/** The data type of elements in this vector */
readonly type: T;
/** Number of elements in the vector */
readonly length: number;
/** Number of null values in the vector */
readonly nullCount: number;
/** Underlying data storage */
readonly data: Data<T>;
/** TypedArray constructor for this vector's type */
readonly ArrayType: T['ArrayType'];
/** Get value at specified index */
get(index: number): T['TValue'] | null;
/** Set value at specified index (returns new vector) */
set(index: number, value: T['TValue'] | null): void;
/** Find first index of a value */
indexOf(searchElement: T['TValue'], fromIndex?: number): number;
/** Check if vector includes a value */
includes(searchElement: T['TValue'], fromIndex?: number): boolean;
/** Create a slice of the vector */
slice(begin?: number, end?: number): Vector<T>;
/** Concatenate with other vectors of same type */
concat(...others: Vector<T>[]): Vector<T>;
/** Convert to JavaScript array */
toArray(): (T['TValue'] | null)[];
/** Serialize to JSON */
toJSON(): any[];
/** String representation */
toString(): string;
// Iterator methods
[Symbol.iterator](): IterableIterator<T['TValue'] | null>;
values(): IterableIterator<T['TValue'] | null>;
keys(): IterableIterator<number>;
entries(): IterableIterator<[number, T['TValue'] | null]>;
// Array-like properties
readonly [index: number]: T['TValue'] | null;
}Factory functions for creating vectors from various data sources.
/**
* Create vector from Data object
*/
function makeVector<T extends DataType>(data: Data<T>): Vector<T>;
function makeVector<T extends DataType>(data: Vector<T>): Vector<T>;
function makeVector<T extends DataType>(data: DataProps<T>): Vector<T>;
/**
* Create vector from JavaScript array with type inference
*/
function vectorFromArray<T = any>(
data: readonly T[]
): Vector<DataTypeOf<T>>;
function vectorFromArray<T extends DataType>(
data: readonly (T['TValue'] | null)[],
type: T
): Vector<T>;
/**
* Create vector from iterable with optional type
*/
function vectorFromArray<T = any>(
data: Iterable<T>
): Vector<DataTypeOf<T>>;Usage Examples:
import { vectorFromArray, Vector, Int32, Utf8, Float64 } from "apache-arrow";
// Type inference from array content
const numbers = vectorFromArray([1, 2, 3, null, 5]);
// Inferred as Vector<Dictionary<Utf8, Int32>> for efficiency
const strings = vectorFromArray(['hello', 'world', null]);
// Inferred as Vector<Dictionary<Utf8, Int32>>
const booleans = vectorFromArray([true, false, null, true]);
// Inferred as Vector<Bool>
// Explicit type specification
const int32Vector = vectorFromArray([1, 2, 3, 4, 5], new Int32());
const utf8Vector = vectorFromArray(['a', 'b', 'c'], new Utf8());
const float64Vector = vectorFromArray([1.1, 2.2, 3.3], new Float64());Methods for accessing and querying vector data.
/**
* Get value at index with null checking
*/
get(index: number): T['TValue'] | null;
/**
* Check if value at index is null
*/
isValid(index: number): boolean;
/**
* Get value at index (assumes non-null)
*/
getValue(index: number): T['TValue'];
/**
* Find index of first occurrence of value
*/
indexOf(searchElement: T['TValue'], fromIndex?: number): number;
/**
* Find index of last occurrence of value
*/
lastIndexOf(searchElement: T['TValue'], fromIndex?: number): number;
/**
* Check if vector contains value
*/
includes(searchElement: T['TValue'], fromIndex?: number): boolean;
/**
* Find element matching predicate
*/
find(predicate: (value: T['TValue'] | null, index: number) => boolean): T['TValue'] | null;
/**
* Find index of element matching predicate
*/
findIndex(predicate: (value: T['TValue'] | null, index: number) => boolean): number;Usage Examples:
import { vectorFromArray } from "apache-arrow";
const vector = vectorFromArray([10, 20, null, 30, 40]);
console.log(vector.get(0)); // 10
console.log(vector.get(2)); // null
console.log(vector.isValid(2)); // false
console.log(vector.getValue(1)); // 20 (assumes non-null)
console.log(vector.indexOf(30)); // 3
console.log(vector.includes(20)); // true
console.log(vector.includes(99)); // false
const found = vector.find(value => value && value > 25); // 30
const foundIndex = vector.findIndex(value => value === 40); // 4Methods for creating transformed copies of vectors.
/**
* Create slice of vector
*/
slice(begin?: number, end?: number): Vector<T>;
/**
* Concatenate vectors of same type
*/
concat(...others: Vector<T>[]): Vector<T>;
/**
* Reverse the vector (creates new vector)
*/
reverse(): Vector<T>;
/**
* Sort the vector (creates new vector)
*/
sort(compareFn?: (a: T['TValue'] | null, b: T['TValue'] | null) => number): Vector<T>;
/**
* Filter vector elements (creates new vector)
*/
filter(predicate: (value: T['TValue'] | null, index: number) => boolean): Vector<T>;
/**
* Map vector to new type (creates new vector)
*/
map<U>(
callback: (value: T['TValue'] | null, index: number) => U,
thisArg?: any
): Vector<DataTypeOf<U>>;Usage Examples:
import { vectorFromArray } from "apache-arrow";
const numbers = vectorFromArray([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
// Slicing
const slice1 = numbers.slice(2, 5); // [3, 4, 5]
const slice2 = numbers.slice(5); // [6, 7, 8, 9, 10]
// Concatenation
const moreNumbers = vectorFromArray([11, 12, 13]);
const combined = numbers.concat(moreNumbers); // [1,2,3,4,5,6,7,8,9,10,11,12,13]
// Filtering
const evenNumbers = numbers.filter(value => value !== null && value % 2 === 0);
// Result: [2, 4, 6, 8, 10]
// Mapping
const doubled = numbers.map(value => value !== null ? value * 2 : null);
// Result: [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]Methods for iterating over vector elements.
/**
* Iterator returning values
*/
[Symbol.iterator](): IterableIterator<T['TValue'] | null>;
/**
* Iterator returning values
*/
values(): IterableIterator<T['TValue'] | null>;
/**
* Iterator returning indices
*/
keys(): IterableIterator<number>;
/**
* Iterator returning [index, value] pairs
*/
entries(): IterableIterator<[number, T['TValue'] | null]>;
/**
* Execute function for each element
*/
forEach(
callback: (value: T['TValue'] | null, index: number, vector: Vector<T>) => void,
thisArg?: any
): void;
/**
* Test if all elements pass predicate
*/
every(
predicate: (value: T['TValue'] | null, index: number) => boolean,
thisArg?: any
): boolean;
/**
* Test if some elements pass predicate
*/
some(
predicate: (value: T['TValue'] | null, index: number) => boolean,
thisArg?: any
): boolean;
/**
* Reduce vector to single value
*/
reduce<U>(
callback: (accumulator: U, value: T['TValue'] | null, index: number) => U,
initialValue?: U
): U;Usage Examples:
import { vectorFromArray } from "apache-arrow";
const vector = vectorFromArray([1, 2, null, 4, 5]);
// For-of iteration
for (const value of vector) {
console.log(value); // 1, 2, null, 4, 5
}
// Entries iteration
for (const [index, value] of vector.entries()) {
console.log(`${index}: ${value}`); // 0: 1, 1: 2, 2: null, 3: 4, 4: 5
}
// forEach
vector.forEach((value, index) => {
console.log(`Index ${index}: ${value}`);
});
// Reduce to sum (excluding nulls)
const sum = vector.reduce((acc, value) => {
return acc + (value || 0);
}, 0); // Result: 12
// Test predicates
const hasNulls = vector.some(value => value === null); // true
const allPositive = vector.every(value => value === null || value > 0); // trueMethods for converting vectors to other formats.
/**
* Convert to JavaScript array
*/
toArray(): (T['TValue'] | null)[];
/**
* Convert to JSON serializable array
*/
toJSON(): any[];
/**
* String representation for debugging
*/
toString(): string;
/**
* Create Date objects from temporal vectors
*/
toDate(): Date[] | null; // Only available on temporal types
/**
* Get typed array view of underlying data (when applicable)
*/
toTypedArray(): T['ArrayType'] | null;Usage Examples:
import { vectorFromArray, TimestampMillisecond } from "apache-arrow";
const numbers = vectorFromArray([1, 2, 3, null, 5]);
const timestamps = vectorFromArray([
1609459200000, // 2021-01-01
1640995200000, // 2022-01-01
null
], new TimestampMillisecond());
// Convert to arrays
console.log(numbers.toArray()); // [1, 2, 3, null, 5]
console.log(numbers.toJSON()); // [1, 2, 3, null, 5]
// String representation
console.log(numbers.toString()); // "Vector<Dictionary<Utf8, Int32>>[1, 2, 3, null, 5]"
// Temporal conversion
console.log(timestamps.toArray()); // [1609459200000, 1640995200000, null]
// Get underlying typed array (for numeric types)
const typedArray = numbers.data.values; // Access to underlying TypedArrayKey properties providing metadata about the vector.
/**
* Vector metadata properties
*/
interface VectorProperties<T> {
/** Data type of the vector elements */
readonly type: T;
/** Total number of elements including nulls */
readonly length: number;
/** Number of null values in the vector */
readonly nullCount: number;
/** Underlying data storage container */
readonly data: Data<T>;
/** TypedArray constructor for this vector type */
readonly ArrayType: T['ArrayType'];
/** Array-like length property */
readonly [index: number]: T['TValue'] | null;
/** Chunks if the vector spans multiple record batches */
readonly chunks?: Vector<T>[];
/** Offset into the underlying data buffer */
readonly offset?: number;
/** Stride for accessing elements (usually 1) */
readonly stride?: number;
}Vectors that span multiple record batches with chunked data access.
/**
* Chunked vector spanning multiple record batches
*/
class ChunkedVector<T extends DataType = any> extends Vector<T> {
/** Array of vector chunks */
readonly chunks: Vector<T>[];
/** Number of chunks */
readonly numChunks: number;
/** Get chunk containing the specified index */
getChunk(index: number): Vector<T>;
/** Get chunk index for element index */
getChunkIndex(index: number): number;
/** Search across all chunks */
search(value: T['TValue'], fromIndex?: number): number;
}Usage Examples:
import { Table, vectorFromArray } from "apache-arrow";
// Create table with multiple batches that creates chunked vectors
const table1 = tableFromArrays({ values: [1, 2, 3] });
const table2 = tableFromArrays({ values: [4, 5, 6] });
const combined = table1.concat(table2);
const chunkedVector = combined.getColumn('values');
console.log(chunkedVector.numChunks); // 2
console.log(chunkedVector.length); // 6
// Access across chunks
console.log(chunkedVector.get(0)); // 1 (from first chunk)
console.log(chunkedVector.get(4)); // 5 (from second chunk)Special operations available on numeric vectors.
/**
* Numeric vector utilities
*/
interface NumericVectorOperations<T extends DataType> {
/** Sum all non-null values */
sum(): number | bigint;
/** Find minimum value (excluding nulls) */
min(): T['TValue'] | null;
/** Find maximum value (excluding nulls) */
max(): T['TValue'] | null;
/** Calculate mean of non-null values */
mean(): number;
/** Count non-null values */
count(): number;
}Special operations for string vectors.
/**
* String vector utilities
*/
interface StringVectorOperations {
/** Join all non-null strings with separator */
join(separator?: string): string;
/** Check if any string matches pattern */
matchesPattern(pattern: RegExp): boolean[];
/** Get string lengths */
lengths(): Vector<Int32>;
}Memory Efficiency:
// Prefer vectorFromArray for small datasets
const small = vectorFromArray([1, 2, 3, 4, 5]);
// Use builders for large datasets or streaming construction
const builder = makeBuilder({ type: new Int32() });
for (let i = 0; i < 1000000; i++) {
builder.append(i);
}
const large = builder.finish().toVector();Type Safety:
// Explicit typing for better type inference
const typedVector = vectorFromArray([1, 2, 3], new Int32());
typedVector.get(0); // TypeScript knows this returns number | null
// Use type guards for safe operations
if (DataType.isInt(vector.type)) {
// TypeScript knows vector contains integers
const sum = vector.reduce((a, b) => (a || 0) + (b || 0), 0);
}Performance Optimization:
// Avoid repeated array conversions
const array = vector.toArray(); // Convert once
for (let i = 0; i < array.length; i++) {
// Use array[i] instead of vector.get(i)
}
// Use iterators for memory-efficient processing
for (const value of vector) {
// Process value without creating intermediate arrays
}Row classes provide JavaScript object interfaces for working with individual rows from structured data types like Map and Struct vectors.
Provides a Map-like interface for working with individual Map vector entries.
/**
* Row interface for Map vector entries
*/
class MapRow<K = any, V = any> {
/** Get value by key */
get(key: K): V | undefined;
/** Set value by key */
set(key: K, value: V): this;
/** Check if key exists */
has(key: K): boolean;
/** Delete entry by key */
delete(key: K): boolean;
/** Clear all entries */
clear(): void;
/** Get all keys */
keys(): IterableIterator<K>;
/** Get all values */
values(): IterableIterator<V>;
/** Get all entries */
entries(): IterableIterator<[K, V]>;
/** Number of entries */
readonly size: number;
/** Iterate over entries */
[Symbol.iterator](): IterableIterator<[K, V]>;
}Usage:
import { vectorFromArray, Map_, Field, Utf8, Int32, MapRow } from "apache-arrow";
// Create a map vector
const mapVector = vectorFromArray([
new Map([['a', 1], ['b', 2]]),
new Map([['x', 10], ['y', 20]]),
], new Map_(
new Field('entries', new Struct([
new Field('key', new Utf8()),
new Field('value', new Int32())
]))
));
// Access individual map rows
const firstRow = mapVector.get(0) as MapRow<string, number>;
console.log(firstRow.get('a')); // 1
console.log(firstRow.size); // 2
// Iterate over map entries
for (const [key, value] of firstRow) {
console.log(`${key}: ${value}`);
}Provides object-like interface for working with individual Struct vector entries.
/**
* Row interface for Struct vector entries
*/
class StructRow<T = any> {
/** Get field value by name */
get<K extends keyof T>(key: K): T[K] | null;
/** Set field value by name */
set<K extends keyof T>(key: K, value: T[K] | null): this;
/** Get field names */
keys(): IterableIterator<keyof T>;
/** Get field values */
values(): IterableIterator<T[keyof T] | null>;
/** Get field entries */
entries(): IterableIterator<[keyof T, T[keyof T] | null]>;
/** Convert to plain JavaScript object */
toJSON(): { [K in keyof T]: T[K] | null };
/** Iterate over fields */
[Symbol.iterator](): IterableIterator<[keyof T, T[keyof T] | null]>;
}
/**
* Type for struct row proxy object
*/
interface StructRowProxy<T = any> {
[K in keyof T]: T[K] | null;
}Usage:
import { vectorFromArray, Struct, Field, Utf8, Int32, Bool, StructRow } from "apache-arrow";
// Create a struct vector
const structVector = vectorFromArray([
{ name: 'Alice', age: 25, active: true },
{ name: 'Bob', age: 30, active: false },
], new Struct([
new Field('name', new Utf8()),
new Field('age', new Int32()),
new Field('active', new Bool())
]));
// Access individual struct rows
const firstRow = structVector.get(0) as StructRow<{name: string, age: number, active: boolean}>;
console.log(firstRow.get('name')); // 'Alice'
console.log(firstRow.get('age')); // 25
console.log(firstRow.get('active')); // true
// Convert to plain object
const plainObject = firstRow.toJSON();
console.log(plainObject); // { name: 'Alice', age: 25, active: true }
// Iterate over fields
for (const [fieldName, value] of firstRow) {
console.log(`${fieldName}: ${value}`);
}