Apache Arrow JavaScript is a high-performance columnar analytics library that provides efficient in-memory data structures for working with structured data. Built on the Arrow columnar memory format, it enables zero-copy data sharing and high-performance analytics across languages and systems, with support for reading, writing, and manipulating Arrow data in JavaScript and TypeScript environments.
npm install apache-arrow// ESM imports
import {
Table,
Vector,
RecordBatch,
Schema,
Field,
MapRow,
StructRow,
tableFromArrays,
tableFromJSON,
vectorFromArray
} from "apache-arrow";For CommonJS:
// CommonJS imports
const {
Table,
Vector,
RecordBatch,
Schema,
Field,
MapRow,
StructRow,
tableFromArrays,
tableFromJSON,
vectorFromArray
} = require("apache-arrow");Platform-specific imports:
// DOM/Browser-specific features
import * as Arrow from "apache-arrow/Arrow.dom";
// Node.js-specific features
import * as Arrow from "apache-arrow/Arrow.node";import { tableFromArrays, tableFromJSON, vectorFromArray } from "apache-arrow";
// Create table from arrays
const table = tableFromArrays({
name: ['Alice', 'Bob', 'Charlie'],
age: [25, 30, 35],
active: [true, false, true]
});
// Create table from JSON objects
const jsonTable = tableFromJSON([
{ name: 'Alice', age: 25, active: true },
{ name: 'Bob', age: 30, active: false },
{ name: 'Charlie', age: 35, active: true }
]);
// Access data
const nameColumn = table.getColumn('name');
const firstRow = table.get(0); // { name: 'Alice', age: 25, active: true }
const firstAge = table.getColumn('age').get(0); // 25
// Create individual vectors
const numbers = vectorFromArray([1, 2, 3, null, 5]);
console.log(numbers.get(3)); // null
console.log(numbers.length); // 5
console.log(numbers.nullCount); // 1Apache Arrow JavaScript is built around several key architectural components:
Complete type system with 47+ Arrow data types including integers, floats, strings, temporal types, and complex nested structures. Supports all Arrow specification data types with full TypeScript integration.
// Primitive types
import { Int32, Float64, Utf8, Bool, Binary } from "apache-arrow";
// Temporal types
import { Date_, Timestamp, Time, Duration, Interval } from "apache-arrow";
// Nested types
import { List, Struct, Map_, Union, Dictionary } from "apache-arrow";Immutable columnar arrays that store sequences of values efficiently. Vectors provide type-safe access to data with optimized operations for filtering, slicing, and transformation. Includes Row classes for object-like access to structured data.
class Vector<T> {
get(index: number): T['TValue'] | null;
set(index: number, value: T['TValue'] | null): void;
slice(begin?: number, end?: number): Vector<T>;
concat(...others: Vector<T>[]): Vector<T>;
toArray(): (T['TValue'] | null)[];
}
// Row classes for structured data access
class MapRow<K, V> {
get(key: K): V | undefined;
has(key: K): boolean;
readonly size: number;
}
class StructRow<T> {
get<K extends keyof T>(key: K): T[K] | null;
toJSON(): { [K in keyof T]: T[K] | null };
}
function vectorFromArray<T>(data: T[]): Vector<T>;Tabular data structures representing collections of named columns with schemas. Tables provide DataFrame-like functionality with efficient columnar operations and type safety.
class Table<T> {
schema: Schema<T>;
length: number;
numCols: number;
get(index: number): T['TValue'] | null;
getColumn<P>(name: P): Vector<T[P]> | null;
select<K>(...columnNames: K[]): Table<Pick<T, K>>;
filter(predicate: Predicate): FilteredTable<T>;
}
class Schema<T> {
fields: Field[];
select(names: string[]): Schema;
assign(...fields: Field[]): Schema;
}Type-specific builder classes for efficiently constructing Arrow vectors from JavaScript data. Builders handle memory allocation and type-specific encoding automatically.
class Builder<T> {
append(value: T['TValue'] | null): this;
set(index: number, value: T['TValue'] | null): this;
finish(): Data<T>;
toVector(): Vector<T>;
}
function makeBuilder<T>(options: BuilderOptions<T>): Builder<T>;Reading and writing Arrow data in various formats including Arrow IPC (Inter-Process Communication), with support for both file and stream formats, plus JSON serialization.
// Serialization functions
function tableToIPC(table: Table, type?: 'stream' | 'file'): Uint8Array;
function tableFromIPC(buffer: ArrayBufferViewInput): Table;
// Reader classes
class RecordBatchReader {
static from(source: ReadableSource): RecordBatchReader;
readAll(): Table;
}
// Writer classes
class RecordBatchWriter {
static writeAll(sink: WritableSink, table: Table): void;
}Stream processing capabilities with support for both DOM ReadableStream and Node.js stream APIs. Enables processing of large datasets that don't fit in memory.
// DOM stream support
function toDOMStream<T>(source: Iterable<T>): ReadableStream<T>;
function recordBatchReaderThroughDOMStream(options?: ReadableDOMStreamOptions);
// Node.js stream support
function toNodeStream<T>(source: Iterable<T>): Readable;
function recordBatchReaderThroughNodeStream(options?: NodeStreamOptions);Comprehensive utility functions for data manipulation, type checking, mathematical operations, and compatibility helpers across different JavaScript environments.
// Utility object with all helper functions
const util = {
// Math utilities
BigInt64Array, BigUint64Array,
// Buffer utilities
toArrayBufferView, toUint8Array, rebaseValueOffsets,
// Bit manipulation
getBit, setBit, truncateBitmap,
// Type comparison
compareSchemas, compareFields, compareTypes
};