Tables are the primary data structure in Apache Arrow JavaScript for representing structured, tabular data. They combine multiple named columns (vectors) with schema information to provide DataFrame-like functionality with type safety and efficient columnar operations.
The core Table class provides immutable, structured data with schema-aware operations.
/**
* Immutable tabular data structure with typed columns
*/
class Table<T extends TypeMap = any> {
/** Schema defining the table structure */
readonly schema: Schema<T>;
/** Number of rows in the table */
readonly length: number;
/** Number of columns in the table */
readonly numCols: number;
/** Underlying record batches containing the data */
readonly batches: RecordBatch<T>[];
/** Column names */
readonly columnNames: (keyof T)[];
// Data access methods
/** Get row at specified index as object */
get(index: number): T[keyof T] | null;
/** Get column by name */
getColumn<P extends keyof T>(name: P): Vector<T[P]> | null;
/** Get column by index */
getColumnAt(index: number): Vector | null;
/** Get child column by index (for nested schemas) */
getChildAt<P extends keyof T>(index: number): Vector<T[P]> | null;
// Transformation methods
/** Select subset of columns by name */
select<K extends keyof T>(...columnNames: K[]): Table<Pick<T, K>>;
/** Select columns by index */
selectAt(columnIndices: number[]): Table;
/** Merge with another table */
assign<R extends TypeMap>(other: Table<R>): Table<T & R>;
/** Create slice of rows */
slice(begin?: number, end?: number): Table<T>;
/** Concatenate with other tables */
concat(...others: Table<T>[]): Table<T>;
/** Filter rows based on predicate */
filter(predicate: Predicate): FilteredTable<T>;
// Conversion methods
/** Convert to array of row objects */
toArray(): T[keyof T][];
/** Serialize to JSON array */
toJSON(): any[];
/** String representation */
toString(): string;
// Iteration methods
/** Scan rows with custom function */
scan(next: NextFunc<T>, bind?: Partial<T>): IterableIterator<T[keyof T]>;
/** Count occurrences by column values */
countBy(name: keyof T): CountByResult;
/** Iterator over rows */
[Symbol.iterator](): IterableIterator<T[keyof T]>;
/** Iterator over column values */
values(): IterableIterator<T[keyof T]>;
/** Iterator over row indices */
keys(): IterableIterator<number>;
/** Iterator over [index, row] pairs */
entries(): IterableIterator<[number, T[keyof T]]>;
}
// Type definitions for table values
type TypeMap = { [key: string]: DataType };
type Predicate = (row: any, index: number) => boolean;
type NextFunc<T> = (index: number, batch: RecordBatch) => T[keyof T];
type CountByResult = { [key: string]: number };Multiple ways to create Table instances from different data sources.
/**
* Table constructor overloads
*/
class Table<T extends TypeMap = any> {
/** Empty table */
constructor();
/** From record batches */
constructor(batches: Iterable<RecordBatch<T>>);
constructor(...batches: RecordBatch<T>[]);
/** From schema and vectors */
constructor(schema: Schema<T>, ...columns: Vector<T[keyof T]>[]);
/** From typed columns object */
constructor(columns: { [P in keyof T]: Vector<T[P]> });
/** From arrays object */
constructor(columns: { [P in keyof T]: T[P]['TArray'] });
/** From mixed data */
constructor(
schema: Schema<T>,
length?: number,
children?: (Vector | T[keyof T]['TArray'])[]
);
}Convenient functions for creating tables from common data sources.
/**
* Create table from object of arrays
*/
function tableFromArrays<T extends Record<string, ArrayLike>>(
columns: T
): Table<{ [P in keyof T]: DataTypeOf<T[P][number]> }>;
/**
* Create table from array of objects
*/
function tableFromJSON<T>(array: T[]): Table<InferredTypes<T>>;
/**
* Create table from record batches
*/
function makeTable<T extends TypeMap>(
batches: RecordBatch<T>[]
): Table<T>;
function makeTable<T extends TypeMap>(
schema: Schema<T>,
batches?: RecordBatch<T>[]
): Table<T>;
function makeTable<T extends TypeMap>(
...columns: Vector<T[keyof T]>[]
): Table<T>;
/**
* Create empty table with schema
*/
function emptyTable<T extends TypeMap>(schema: Schema<T>): Table<T>;Usage Examples:
import {
tableFromArrays,
tableFromJSON,
Table,
Schema,
Field,
Int32,
Utf8,
Bool
} from "apache-arrow";
// From arrays object
const table1 = tableFromArrays({
name: ['Alice', 'Bob', 'Charlie'],
age: [25, 30, 35],
active: [true, false, true]
});
// From JSON objects
const table2 = tableFromJSON([
{ name: 'Alice', age: 25, active: true },
{ name: 'Bob', age: 30, active: false },
{ name: 'Charlie', age: 35, active: true }
]);
// From schema and vectors
const schema = new Schema([
new Field('name', new Utf8()),
new Field('age', new Int32()),
new Field('active', new Bool())
]);
const nameVector = vectorFromArray(['Alice', 'Bob', 'Charlie'], new Utf8());
const ageVector = vectorFromArray([25, 30, 35], new Int32());
const activeVector = vectorFromArray([true, false, true], new Bool());
const table3 = new Table(schema, nameVector, ageVector, activeVector);Defines the structure and metadata for tables and record batches.
/**
* Schema defining table structure with metadata
*/
class Schema<T extends TypeMap = any> {
/** Array of field definitions */
readonly fields: Field<T[keyof T]>[];
/** Number of fields */
readonly length: number;
/** Key-value metadata for the schema */
readonly metadata: Map<string, string>;
/** Dictionary type registry */
readonly dictionaries: Map<number, DataType>;
/** Arrow metadata version */
readonly metadataVersion: MetadataVersion;
/** Field names array */
readonly names: string[];
// Schema manipulation methods
/** Create schema with subset of fields */
select(...names: string[]): Schema;
select(names: string[]): Schema;
/** Create schema with fields at indices */
selectAt(...indices: number[]): Schema;
selectAt(indices: number[]): Schema;
/** Add or replace fields */
assign(...fields: Field[]): Schema;
/** Create schema with new metadata */
withMetadata(metadata: Map<string, string>): Schema;
// Field access methods
/** Get field by name */
field(name: string): Field | null;
/** Get field by index */
fieldAt(index: number): Field | null;
/** Get field index by name */
fieldIndex(name: string): number;
/** Check if field exists */
hasField(name: string): boolean;
// Conversion methods
/** Convert to JSON representation */
toJSON(): object;
/** String representation */
toString(): string;
}Represents individual columns within a schema.
/**
* Field representing a named column with type and metadata
*/
class Field<T extends DataType = any> {
/** Field name */
readonly name: string;
/** Data type */
readonly type: T;
/** Whether field allows null values */
readonly nullable: boolean;
/** Field-specific metadata */
readonly metadata: Map<string, string>;
/** Constructor */
constructor(
name: string,
type: T,
nullable?: boolean,
metadata?: Map<string, string>
);
// Field manipulation methods
/** Create copy with modifications */
clone(options?: Partial<FieldOptions>): Field<T>;
/** Create field with new name */
withName(name: string): Field<T>;
/** Create field with new type */
withType<U extends DataType>(type: U): Field<U>;
/** Create field with new nullability */
withNullable(nullable: boolean): Field<T>;
/** Create field with new metadata */
withMetadata(metadata: Map<string, string>): Field<T>;
// Conversion methods
/** Convert to JSON representation */
toJSON(): object;
/** String representation */
toString(): string;
}
// Field options interface
interface FieldOptions {
name?: string;
type?: DataType;
nullable?: boolean;
metadata?: Map<string, string>;
}Usage Examples:
import { Schema, Field, Int32, Utf8, Bool } from "apache-arrow";
// Create individual fields
const nameField = new Field('name', new Utf8(), false); // Non-nullable
const ageField = new Field('age', new Int32(), true); // Nullable
const activeField = new Field('active', new Bool(), false);
// Create schema
const schema = new Schema([nameField, ageField, activeField]);
// Schema operations
const subset = schema.select('name', 'age'); // Select specific fields
const reordered = schema.selectAt([2, 0, 1]); // Reorder by index
const withExtra = schema.assign( // Add new field
new Field('score', new Float64())
);
// Field access
console.log(schema.names); // ['name', 'age', 'active']
console.log(schema.field('name')); // Field<Utf8>
console.log(schema.fieldIndex('age')); // 1
console.log(schema.hasField('nonexistent')); // false
// Field modifications
const renamedField = nameField.withName('full_name');
const nullableAge = ageField.withNullable(false);Represents a single batch of rows with equal-length columns.
/**
* Collection of equal-length vectors representing a batch of rows
*/
class RecordBatch<T extends TypeMap = any> {
/** Schema defining the batch structure */
readonly schema: Schema<T>;
/** Number of rows in this batch */
readonly length: number;
/** Number of columns */
readonly numCols: number;
/** Underlying data storage */
readonly data: Data<Struct<T>>;
// Data access (similar to Table)
/** Get row at index */
get(index: number): T[keyof T] | null;
/** Get column by name */
getColumn<P extends keyof T>(name: P): Vector<T[P]> | null;
/** Get column by index */
getColumnAt(index: number): Vector | null;
/** Get child column by index */
getChildAt<P extends keyof T>(index: number): Vector<T[P]> | null;
// Transformation methods
/** Select columns */
select<K extends keyof T>(...columnNames: K[]): RecordBatch<Pick<T, K>>;
selectAt(columnIndices: number[]): RecordBatch;
/** Create slice */
slice(begin?: number, end?: number): RecordBatch<T>;
/** Concatenate batches into table */
concat(...others: RecordBatch<T>[]): Table<T>;
// Conversion methods
/** Convert to array */
toArray(): T[keyof T][];
/** Convert to JSON */
toJSON(): any[];
/** String representation */
toString(): string;
// Iteration
[Symbol.iterator](): IterableIterator<T[keyof T]>;
values(): IterableIterator<T[keyof T]>;
keys(): IterableIterator<number>;
entries(): IterableIterator<[number, T[keyof T]]>;
}Methods for accessing table data in various ways.
/**
* Row access
*/
get(index: number): T[keyof T] | null;
/**
* Column access by name
*/
getColumn<P extends keyof T>(name: P): Vector<T[P]> | null;
/**
* Column access by index
*/
getColumnAt(index: number): Vector | null;
/**
* Batch access for large tables
*/
getBatch(index: number): RecordBatch<T>;
/**
* Iterator access for memory efficiency
*/
scan(next: NextFunc<T>, bind?: Partial<T>): IterableIterator<T[keyof T]>;Usage Examples:
import { tableFromArrays } from "apache-arrow";
const table = tableFromArrays({
name: ['Alice', 'Bob', 'Charlie', 'Diana'],
age: [25, 30, 35, 28],
department: ['Engineering', 'Sales', 'Engineering', 'Marketing']
});
// Row access
console.log(table.get(0)); // { name: 'Alice', age: 25, department: 'Engineering' }
console.log(table.get(2)); // { name: 'Charlie', age: 35, department: 'Engineering' }
// Column access
const nameColumn = table.getColumn('name');
console.log(nameColumn.toArray()); // ['Alice', 'Bob', 'Charlie', 'Diana']
const ageColumn = table.getColumnAt(1); // Get second column
console.log(ageColumn.toArray()); // [25, 30, 35, 28]
// Scan for memory-efficient iteration
const engineeringRows = [];
for (const row of table.scan((idx, batch) => batch.get(idx))) {
if (row.department === 'Engineering') {
engineeringRows.push(row);
}
}Methods for selecting subsets of columns or rows.
/**
* Select columns by name
*/
select<K extends keyof T>(...columnNames: K[]): Table<Pick<T, K>>;
/**
* Select columns by index
*/
selectAt(columnIndices: number[]): Table;
/**
* Filter rows based on predicate
*/
filter(predicate: (row: T[keyof T], index: number) => boolean): FilteredTable<T>;
/**
* Create row slice
*/
slice(begin?: number, end?: number): Table<T>;Usage Examples:
import { tableFromArrays } from "apache-arrow";
const table = tableFromArrays({
id: [1, 2, 3, 4, 5],
name: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
age: [25, 30, 35, 28, 32],
salary: [75000, 80000, 90000, 72000, 85000]
});
// Select specific columns
const nameAge = table.select('name', 'age');
// Result: Table with only 'name' and 'age' columns
// Select by index
const firstTwoCols = table.selectAt([0, 1]);
// Result: Table with 'id' and 'name' columns
// Filter rows
const highEarners = table.filter(row => row.salary > 80000);
// Result: Charlie and Eve rows
const youngEmployees = table.filter(row => row.age < 30);
// Result: Alice and Diana rows
// Slice rows
const middleThree = table.slice(1, 4);
// Result: Bob, Charlie, Diana rows (indices 1, 2, 3)Methods for combining multiple tables.
/**
* Concatenate tables vertically (same schema)
*/
concat(...others: Table<T>[]): Table<T>;
/**
* Merge tables horizontally (different columns)
*/
assign<R extends TypeMap>(other: Table<R>): Table<T & R>;
/**
* Join tables on common columns (not built-in, but pattern)
*/
// Note: Joins require custom implementationUsage Examples:
import { tableFromArrays } from "apache-arrow";
// Vertical concatenation (same columns)
const employees1 = tableFromArrays({
name: ['Alice', 'Bob'],
age: [25, 30]
});
const employees2 = tableFromArrays({
name: ['Charlie', 'Diana'],
age: [35, 28]
});
const allEmployees = employees1.concat(employees2);
// Result: 4 rows with name and age columns
// Horizontal merge (different columns)
const basicInfo = tableFromArrays({
name: ['Alice', 'Bob', 'Charlie'],
age: [25, 30, 35]
});
const jobInfo = tableFromArrays({
department: ['Engineering', 'Sales', 'Engineering'],
salary: [75000, 80000, 90000]
});
const combined = basicInfo.assign(jobInfo);
// Result: Table with name, age, department, salary columnsMethods for analyzing table data.
/**
* Count values by column
*/
countBy(name: keyof T): CountByResult;
/**
* Group by functionality (custom implementation needed)
*/
// Arrow doesn't provide built-in groupBy, but can be implemented
/**
* Statistical operations on columns
*/
// Access individual columns for statistical operations
table.getColumn('age').reduce((sum, age) => sum + (age || 0), 0) / table.length; // Mean ageUsage Examples:
import { tableFromArrays } from "apache-arrow";
const table = tableFromArrays({
name: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
department: ['Eng', 'Sales', 'Eng', 'Marketing', 'Eng'],
age: [25, 30, 35, 28, 32]
});
// Count by department
const deptCounts = table.countBy('department');
console.log(deptCounts); // { Eng: 3, Sales: 1, Marketing: 1 }
// Custom aggregations using column operations
const ageColumn = table.getColumn('age');
const totalAge = ageColumn.reduce((sum, age) => sum + (age || 0), 0);
const avgAge = totalAge / ageColumn.length;
console.log(`Average age: ${avgAge}`); // Average age: 30
// Find min/max
const minAge = ageColumn.reduce((min, age) =>
age !== null ? Math.min(min, age) : min, Infinity
);
const maxAge = ageColumn.reduce((max, age) =>
age !== null ? Math.max(max, age) : max, -Infinity
);Efficient methods for processing table data.
/**
* Standard iteration over rows
*/
[Symbol.iterator](): IterableIterator<T[keyof T]>;
/**
* Custom scanning with state
*/
scan(
next: (index: number, batch: RecordBatch<T>) => T[keyof T],
bind?: Partial<T>
): IterableIterator<T[keyof T]>;
/**
* Batch-wise processing for large tables
*/
// Process table in batches for memory efficiency
for (const batch of table.batches) {
// Process batch
}Usage Examples:
import { tableFromArrays } from "apache-arrow";
const largeTable = tableFromArrays({
id: Array.from({ length: 10000 }, (_, i) => i),
value: Array.from({ length: 10000 }, (_, i) => Math.random())
});
// Memory-efficient iteration
let sum = 0;
let count = 0;
// Process row by row without loading all into memory
for (const row of largeTable) {
if (row.value > 0.5) {
sum += row.value;
count++;
}
}
console.log(`Average of values > 0.5: ${sum / count}`);
// Batch-wise processing for very large tables
let batchSums = [];
for (const batch of largeTable.batches) {
const valueColumn = batch.getColumn('value');
const batchSum = valueColumn.reduce((sum, val) => sum + (val || 0), 0);
batchSums.push(batchSum);
}
console.log(`Per-batch sums:`, batchSums);Working with schemas programmatically.
/**
* Create schema from existing table
*/
const newSchema = table.schema.select('name', 'age');
/**
* Add fields to existing schema
*/
const extendedSchema = table.schema.assign(
new Field('new_field', new Float64())
);
/**
* Modify field properties
*/
const modifiedSchema = new Schema(
table.schema.fields.map(field =>
field.name === 'age'
? field.withType(new Float32())
: field
)
);
/**
* Schema validation and compatibility
*/
function isSchemaCompatible(schema1: Schema, schema2: Schema): boolean {
return schema1.names.every(name => schema2.hasField(name));
}Working with schema and field metadata.
/**
* Add metadata to schema
*/
const metadata = new Map([
['version', '1.0'],
['created_by', 'data_pipeline']
]);
const schemaWithMeta = schema.withMetadata(metadata);
/**
* Add metadata to field
*/
const fieldMetadata = new Map([
['unit', 'years'],
['description', 'Age in years']
]);
const fieldWithMeta = ageField.withMetadata(fieldMetadata);
const updatedSchema = schema.assign(fieldWithMeta);Usage Examples:
import { Schema, Field, Int32, Utf8, tableFromArrays } from "apache-arrow";
// Create table with metadata
const schema = new Schema(
[
new Field('name', new Utf8()),
new Field('age', new Int32())
],
new Map([
['version', '2.1'],
['source', 'employee_db']
])
);
console.log(schema.metadata.get('version')); // '2.1'
// Add field with metadata
const enrichedField = new Field(
'salary',
new Int32(),
true,
new Map([
['currency', 'USD'],
['confidential', 'true']
])
);
const enrichedSchema = schema.assign(enrichedField);
console.log(enrichedSchema.field('salary').metadata.get('currency')); // 'USD'