or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

builders.mddata-types.mdindex.mdio-operations.mdstreaming.mdtables.mdutilities.mdvectors.md
tile.json

tables.mddocs/

Tables and Schemas

Tables are the primary data structure in Apache Arrow JavaScript for representing structured, tabular data. They combine multiple named columns (vectors) with schema information to provide DataFrame-like functionality with type safety and efficient columnar operations.

Capabilities

Table Class

The core Table class provides immutable, structured data with schema-aware operations.

/**
 * Immutable tabular data structure with typed columns
 */
class Table<T extends TypeMap = any> {
  /** Schema defining the table structure */
  readonly schema: Schema<T>;
  
  /** Number of rows in the table */
  readonly length: number;
  
  /** Number of columns in the table */
  readonly numCols: number;
  
  /** Underlying record batches containing the data */
  readonly batches: RecordBatch<T>[];
  
  /** Column names */
  readonly columnNames: (keyof T)[];
  
  // Data access methods
  
  /** Get row at specified index as object */
  get(index: number): T[keyof T] | null;
  
  /** Get column by name */
  getColumn<P extends keyof T>(name: P): Vector<T[P]> | null;
  
  /** Get column by index */
  getColumnAt(index: number): Vector | null;
  
  /** Get child column by index (for nested schemas) */
  getChildAt<P extends keyof T>(index: number): Vector<T[P]> | null;
  
  // Transformation methods
  
  /** Select subset of columns by name */
  select<K extends keyof T>(...columnNames: K[]): Table<Pick<T, K>>;
  
  /** Select columns by index */
  selectAt(columnIndices: number[]): Table;
  
  /** Merge with another table */
  assign<R extends TypeMap>(other: Table<R>): Table<T & R>;
  
  /** Create slice of rows */
  slice(begin?: number, end?: number): Table<T>;
  
  /** Concatenate with other tables */
  concat(...others: Table<T>[]): Table<T>;
  
  /** Filter rows based on predicate */
  filter(predicate: Predicate): FilteredTable<T>;
  
  // Conversion methods
  
  /** Convert to array of row objects */
  toArray(): T[keyof T][];
  
  /** Serialize to JSON array */
  toJSON(): any[];
  
  /** String representation */
  toString(): string;
  
  // Iteration methods
  
  /** Scan rows with custom function */
  scan(next: NextFunc<T>, bind?: Partial<T>): IterableIterator<T[keyof T]>;
  
  /** Count occurrences by column values */
  countBy(name: keyof T): CountByResult;
  
  /** Iterator over rows */
  [Symbol.iterator](): IterableIterator<T[keyof T]>;
  
  /** Iterator over column values */
  values(): IterableIterator<T[keyof T]>;
  
  /** Iterator over row indices */
  keys(): IterableIterator<number>;
  
  /** Iterator over [index, row] pairs */
  entries(): IterableIterator<[number, T[keyof T]]>;
}

// Type definitions for table values
type TypeMap = { [key: string]: DataType };
type Predicate = (row: any, index: number) => boolean;
type NextFunc<T> = (index: number, batch: RecordBatch) => T[keyof T];
type CountByResult = { [key: string]: number };

Table Constructor Overloads

Multiple ways to create Table instances from different data sources.

/**
 * Table constructor overloads
 */
class Table<T extends TypeMap = any> {
  /** Empty table */
  constructor();
  
  /** From record batches */
  constructor(batches: Iterable<RecordBatch<T>>);
  constructor(...batches: RecordBatch<T>[]);
  
  /** From schema and vectors */
  constructor(schema: Schema<T>, ...columns: Vector<T[keyof T]>[]);
  
  /** From typed columns object */
  constructor(columns: { [P in keyof T]: Vector<T[P]> });
  
  /** From arrays object */
  constructor(columns: { [P in keyof T]: T[P]['TArray'] });
  
  /** From mixed data */
  constructor(
    schema: Schema<T>, 
    length?: number, 
    children?: (Vector | T[keyof T]['TArray'])[]
  );
}

Table Factory Functions

Convenient functions for creating tables from common data sources.

/**
 * Create table from object of arrays
 */
function tableFromArrays<T extends Record<string, ArrayLike>>(
  columns: T
): Table<{ [P in keyof T]: DataTypeOf<T[P][number]> }>;

/**
 * Create table from array of objects
 */
function tableFromJSON<T>(array: T[]): Table<InferredTypes<T>>;

/**
 * Create table from record batches
 */
function makeTable<T extends TypeMap>(
  batches: RecordBatch<T>[]
): Table<T>;

function makeTable<T extends TypeMap>(
  schema: Schema<T>, 
  batches?: RecordBatch<T>[]
): Table<T>;

function makeTable<T extends TypeMap>(
  ...columns: Vector<T[keyof T]>[]
): Table<T>;

/**
 * Create empty table with schema
 */
function emptyTable<T extends TypeMap>(schema: Schema<T>): Table<T>;

Usage Examples:

import { 
  tableFromArrays, 
  tableFromJSON, 
  Table, 
  Schema, 
  Field, 
  Int32, 
  Utf8, 
  Bool 
} from "apache-arrow";

// From arrays object
const table1 = tableFromArrays({
  name: ['Alice', 'Bob', 'Charlie'],
  age: [25, 30, 35],
  active: [true, false, true]
});

// From JSON objects
const table2 = tableFromJSON([
  { name: 'Alice', age: 25, active: true },
  { name: 'Bob', age: 30, active: false },
  { name: 'Charlie', age: 35, active: true }
]);

// From schema and vectors
const schema = new Schema([
  new Field('name', new Utf8()),
  new Field('age', new Int32()),
  new Field('active', new Bool())
]);

const nameVector = vectorFromArray(['Alice', 'Bob', 'Charlie'], new Utf8());
const ageVector = vectorFromArray([25, 30, 35], new Int32());
const activeVector = vectorFromArray([true, false, true], new Bool());

const table3 = new Table(schema, nameVector, ageVector, activeVector);

Schema Class

Defines the structure and metadata for tables and record batches.

/**
 * Schema defining table structure with metadata
 */
class Schema<T extends TypeMap = any> {
  /** Array of field definitions */
  readonly fields: Field<T[keyof T]>[];
  
  /** Number of fields */
  readonly length: number;
  
  /** Key-value metadata for the schema */
  readonly metadata: Map<string, string>;
  
  /** Dictionary type registry */
  readonly dictionaries: Map<number, DataType>;
  
  /** Arrow metadata version */
  readonly metadataVersion: MetadataVersion;
  
  /** Field names array */
  readonly names: string[];
  
  // Schema manipulation methods
  
  /** Create schema with subset of fields */
  select(...names: string[]): Schema;
  select(names: string[]): Schema;
  
  /** Create schema with fields at indices */
  selectAt(...indices: number[]): Schema;
  selectAt(indices: number[]): Schema;
  
  /** Add or replace fields */
  assign(...fields: Field[]): Schema;
  
  /** Create schema with new metadata */
  withMetadata(metadata: Map<string, string>): Schema;
  
  // Field access methods
  
  /** Get field by name */
  field(name: string): Field | null;
  
  /** Get field by index */
  fieldAt(index: number): Field | null;
  
  /** Get field index by name */
  fieldIndex(name: string): number;
  
  /** Check if field exists */
  hasField(name: string): boolean;
  
  // Conversion methods
  
  /** Convert to JSON representation */
  toJSON(): object;
  
  /** String representation */
  toString(): string;
}

Field Class

Represents individual columns within a schema.

/**
 * Field representing a named column with type and metadata
 */
class Field<T extends DataType = any> {
  /** Field name */
  readonly name: string;
  
  /** Data type */
  readonly type: T;
  
  /** Whether field allows null values */
  readonly nullable: boolean;
  
  /** Field-specific metadata */
  readonly metadata: Map<string, string>;
  
  /** Constructor */
  constructor(
    name: string,
    type: T,
    nullable?: boolean,
    metadata?: Map<string, string>
  );
  
  // Field manipulation methods
  
  /** Create copy with modifications */
  clone(options?: Partial<FieldOptions>): Field<T>;
  
  /** Create field with new name */
  withName(name: string): Field<T>;
  
  /** Create field with new type */
  withType<U extends DataType>(type: U): Field<U>;
  
  /** Create field with new nullability */
  withNullable(nullable: boolean): Field<T>;
  
  /** Create field with new metadata */
  withMetadata(metadata: Map<string, string>): Field<T>;
  
  // Conversion methods
  
  /** Convert to JSON representation */
  toJSON(): object;
  
  /** String representation */
  toString(): string;
}

// Field options interface
interface FieldOptions {
  name?: string;
  type?: DataType;
  nullable?: boolean;
  metadata?: Map<string, string>;
}

Usage Examples:

import { Schema, Field, Int32, Utf8, Bool } from "apache-arrow";

// Create individual fields
const nameField = new Field('name', new Utf8(), false); // Non-nullable
const ageField = new Field('age', new Int32(), true);   // Nullable
const activeField = new Field('active', new Bool(), false);

// Create schema
const schema = new Schema([nameField, ageField, activeField]);

// Schema operations
const subset = schema.select('name', 'age');           // Select specific fields
const reordered = schema.selectAt([2, 0, 1]);          // Reorder by index
const withExtra = schema.assign(                       // Add new field
  new Field('score', new Float64())
);

// Field access
console.log(schema.names);                    // ['name', 'age', 'active']
console.log(schema.field('name'));            // Field<Utf8>
console.log(schema.fieldIndex('age'));        // 1
console.log(schema.hasField('nonexistent'));  // false

// Field modifications
const renamedField = nameField.withName('full_name');
const nullableAge = ageField.withNullable(false);

RecordBatch Class

Represents a single batch of rows with equal-length columns.

/**
 * Collection of equal-length vectors representing a batch of rows
 */
class RecordBatch<T extends TypeMap = any> {
  /** Schema defining the batch structure */
  readonly schema: Schema<T>;
  
  /** Number of rows in this batch */
  readonly length: number;
  
  /** Number of columns */
  readonly numCols: number;
  
  /** Underlying data storage */
  readonly data: Data<Struct<T>>;
  
  // Data access (similar to Table)
  
  /** Get row at index */
  get(index: number): T[keyof T] | null;
  
  /** Get column by name */
  getColumn<P extends keyof T>(name: P): Vector<T[P]> | null;
  
  /** Get column by index */
  getColumnAt(index: number): Vector | null;
  
  /** Get child column by index */
  getChildAt<P extends keyof T>(index: number): Vector<T[P]> | null;
  
  // Transformation methods
  
  /** Select columns */
  select<K extends keyof T>(...columnNames: K[]): RecordBatch<Pick<T, K>>;
  selectAt(columnIndices: number[]): RecordBatch;
  
  /** Create slice */
  slice(begin?: number, end?: number): RecordBatch<T>;
  
  /** Concatenate batches into table */
  concat(...others: RecordBatch<T>[]): Table<T>;
  
  // Conversion methods
  
  /** Convert to array */
  toArray(): T[keyof T][];
  
  /** Convert to JSON */
  toJSON(): any[];
  
  /** String representation */
  toString(): string;
  
  // Iteration
  [Symbol.iterator](): IterableIterator<T[keyof T]>;
  values(): IterableIterator<T[keyof T]>;
  keys(): IterableIterator<number>;
  entries(): IterableIterator<[number, T[keyof T]]>;
}

Table Operations

Data Access Operations

Methods for accessing table data in various ways.

/**
 * Row access
 */
get(index: number): T[keyof T] | null;

/**
 * Column access by name
 */
getColumn<P extends keyof T>(name: P): Vector<T[P]> | null;

/**
 * Column access by index
 */
getColumnAt(index: number): Vector | null;

/**
 * Batch access for large tables
 */
getBatch(index: number): RecordBatch<T>;

/**
 * Iterator access for memory efficiency
 */
scan(next: NextFunc<T>, bind?: Partial<T>): IterableIterator<T[keyof T]>;

Usage Examples:

import { tableFromArrays } from "apache-arrow";

const table = tableFromArrays({
  name: ['Alice', 'Bob', 'Charlie', 'Diana'],
  age: [25, 30, 35, 28],
  department: ['Engineering', 'Sales', 'Engineering', 'Marketing']
});

// Row access
console.log(table.get(0));  // { name: 'Alice', age: 25, department: 'Engineering' }
console.log(table.get(2));  // { name: 'Charlie', age: 35, department: 'Engineering' }

// Column access
const nameColumn = table.getColumn('name');
console.log(nameColumn.toArray());  // ['Alice', 'Bob', 'Charlie', 'Diana']

const ageColumn = table.getColumnAt(1); // Get second column
console.log(ageColumn.toArray());   // [25, 30, 35, 28]

// Scan for memory-efficient iteration
const engineeringRows = [];
for (const row of table.scan((idx, batch) => batch.get(idx))) {
  if (row.department === 'Engineering') {
    engineeringRows.push(row);
  }
}

Selection and Projection

Methods for selecting subsets of columns or rows.

/**
 * Select columns by name
 */
select<K extends keyof T>(...columnNames: K[]): Table<Pick<T, K>>;

/**
 * Select columns by index
 */
selectAt(columnIndices: number[]): Table;

/**
 * Filter rows based on predicate
 */
filter(predicate: (row: T[keyof T], index: number) => boolean): FilteredTable<T>;

/**
 * Create row slice
 */
slice(begin?: number, end?: number): Table<T>;

Usage Examples:

import { tableFromArrays } from "apache-arrow";

const table = tableFromArrays({
  id: [1, 2, 3, 4, 5],
  name: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
  age: [25, 30, 35, 28, 32],
  salary: [75000, 80000, 90000, 72000, 85000]
});

// Select specific columns
const nameAge = table.select('name', 'age');
// Result: Table with only 'name' and 'age' columns

// Select by index
const firstTwoCols = table.selectAt([0, 1]); 
// Result: Table with 'id' and 'name' columns

// Filter rows
const highEarners = table.filter(row => row.salary > 80000);
// Result: Charlie and Eve rows

const youngEmployees = table.filter(row => row.age < 30);
// Result: Alice and Diana rows

// Slice rows
const middleThree = table.slice(1, 4);
// Result: Bob, Charlie, Diana rows (indices 1, 2, 3)

Table Combination

Methods for combining multiple tables.

/**
 * Concatenate tables vertically (same schema)
 */
concat(...others: Table<T>[]): Table<T>;

/**
 * Merge tables horizontally (different columns)
 */
assign<R extends TypeMap>(other: Table<R>): Table<T & R>;

/**
 * Join tables on common columns (not built-in, but pattern)
 */
// Note: Joins require custom implementation

Usage Examples:

import { tableFromArrays } from "apache-arrow";

// Vertical concatenation (same columns)
const employees1 = tableFromArrays({
  name: ['Alice', 'Bob'],
  age: [25, 30]
});

const employees2 = tableFromArrays({
  name: ['Charlie', 'Diana'],
  age: [35, 28]
});

const allEmployees = employees1.concat(employees2);
// Result: 4 rows with name and age columns

// Horizontal merge (different columns)
const basicInfo = tableFromArrays({
  name: ['Alice', 'Bob', 'Charlie'],
  age: [25, 30, 35]
});

const jobInfo = tableFromArrays({
  department: ['Engineering', 'Sales', 'Engineering'],
  salary: [75000, 80000, 90000]
});

const combined = basicInfo.assign(jobInfo);
// Result: Table with name, age, department, salary columns

Aggregation and Analysis

Methods for analyzing table data.

/**
 * Count values by column
 */
countBy(name: keyof T): CountByResult;

/**
 * Group by functionality (custom implementation needed)
 */
// Arrow doesn't provide built-in groupBy, but can be implemented

/**
 * Statistical operations on columns
 */
// Access individual columns for statistical operations
table.getColumn('age').reduce((sum, age) => sum + (age || 0), 0) / table.length; // Mean age

Usage Examples:

import { tableFromArrays } from "apache-arrow";

const table = tableFromArrays({
  name: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
  department: ['Eng', 'Sales', 'Eng', 'Marketing', 'Eng'],
  age: [25, 30, 35, 28, 32]
});

// Count by department
const deptCounts = table.countBy('department');
console.log(deptCounts); // { Eng: 3, Sales: 1, Marketing: 1 }

// Custom aggregations using column operations
const ageColumn = table.getColumn('age');
const totalAge = ageColumn.reduce((sum, age) => sum + (age || 0), 0);
const avgAge = totalAge / ageColumn.length;
console.log(`Average age: ${avgAge}`); // Average age: 30

// Find min/max
const minAge = ageColumn.reduce((min, age) => 
  age !== null ? Math.min(min, age) : min, Infinity
);
const maxAge = ageColumn.reduce((max, age) => 
  age !== null ? Math.max(max, age) : max, -Infinity
);

Table Iteration and Scanning

Efficient methods for processing table data.

/**
 * Standard iteration over rows
 */
[Symbol.iterator](): IterableIterator<T[keyof T]>;

/**
 * Custom scanning with state
 */
scan(
  next: (index: number, batch: RecordBatch<T>) => T[keyof T],
  bind?: Partial<T>
): IterableIterator<T[keyof T]>;

/**
 * Batch-wise processing for large tables
 */
// Process table in batches for memory efficiency
for (const batch of table.batches) {
  // Process batch
}

Usage Examples:

import { tableFromArrays } from "apache-arrow";

const largeTable = tableFromArrays({
  id: Array.from({ length: 10000 }, (_, i) => i),
  value: Array.from({ length: 10000 }, (_, i) => Math.random())
});

// Memory-efficient iteration
let sum = 0;
let count = 0;

// Process row by row without loading all into memory
for (const row of largeTable) {
  if (row.value > 0.5) {
    sum += row.value;
    count++;
  }
}

console.log(`Average of values > 0.5: ${sum / count}`);

// Batch-wise processing for very large tables
let batchSums = [];
for (const batch of largeTable.batches) {
  const valueColumn = batch.getColumn('value');
  const batchSum = valueColumn.reduce((sum, val) => sum + (val || 0), 0);
  batchSums.push(batchSum);
}

console.log(`Per-batch sums:`, batchSums);

Schema Management

Dynamic Schema Operations

Working with schemas programmatically.

/**
 * Create schema from existing table
 */
const newSchema = table.schema.select('name', 'age');

/**
 * Add fields to existing schema
 */
const extendedSchema = table.schema.assign(
  new Field('new_field', new Float64())
);

/**
 * Modify field properties
 */
const modifiedSchema = new Schema(
  table.schema.fields.map(field => 
    field.name === 'age' 
      ? field.withType(new Float32()) 
      : field
  )
);

/**
 * Schema validation and compatibility
 */
function isSchemaCompatible(schema1: Schema, schema2: Schema): boolean {
  return schema1.names.every(name => schema2.hasField(name));
}

Metadata Management

Working with schema and field metadata.

/**
 * Add metadata to schema
 */
const metadata = new Map([
  ['version', '1.0'],
  ['created_by', 'data_pipeline']
]);

const schemaWithMeta = schema.withMetadata(metadata);

/**
 * Add metadata to field
 */
const fieldMetadata = new Map([
  ['unit', 'years'],
  ['description', 'Age in years']
]);

const fieldWithMeta = ageField.withMetadata(fieldMetadata);
const updatedSchema = schema.assign(fieldWithMeta);

Usage Examples:

import { Schema, Field, Int32, Utf8, tableFromArrays } from "apache-arrow";

// Create table with metadata
const schema = new Schema(
  [
    new Field('name', new Utf8()),
    new Field('age', new Int32())
  ],
  new Map([
    ['version', '2.1'],
    ['source', 'employee_db']
  ])
);

console.log(schema.metadata.get('version')); // '2.1'

// Add field with metadata
const enrichedField = new Field(
  'salary', 
  new Int32(),
  true,
  new Map([
    ['currency', 'USD'],
    ['confidential', 'true']
  ])
);

const enrichedSchema = schema.assign(enrichedField);
console.log(enrichedSchema.field('salary').metadata.get('currency')); // 'USD'