or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

builders.md data-types.md index.md io-operations.md streaming.md tables.md utilities.md vectors.md

tile.json

Tables and Schemas

Tables are the primary data structure in Apache Arrow JavaScript for representing structured, tabular data. They combine multiple named columns (vectors) with schema information to provide DataFrame-like functionality with type safety and efficient columnar operations.

Capabilities

Table Class

The core Table class provides immutable, structured data with schema-aware operations.

/**
 * Immutable tabular data structure with typed columns
 */
class Table<T extends TypeMap = any> {
  /** Schema defining the table structure */
  readonly schema: Schema<T>;
  
  /** Number of rows in the table */
  readonly length: number;
  
  /** Number of columns in the table */
  readonly numCols: number;
  
  /** Underlying record batches containing the data */
  readonly batches: RecordBatch<T>[];
  
  /** Column names */
  readonly columnNames: (keyof T)[];
  
  // Data access methods
  
  /** Get row at specified index as object */
  get(index: number): T[keyof T] | null;
  
  /** Get column by name */
  getColumn<P extends keyof T>(name: P): Vector<T[P]> | null;
  
  /** Get column by index */
  getColumnAt(index: number): Vector | null;
  
  /** Get child column by index (for nested schemas) */
  getChildAt<P extends keyof T>(index: number): Vector<T[P]> | null;
  
  // Transformation methods
  
  /** Select subset of columns by name */
  select<K extends keyof T>(...columnNames: K[]): Table<Pick<T, K>>;
  
  /** Select columns by index */
  selectAt(columnIndices: number[]): Table;
  
  /** Merge with another table */
  assign<R extends TypeMap>(other: Table<R>): Table<T & R>;
  
  /** Create slice of rows */
  slice(begin?: number, end?: number): Table<T>;
  
  /** Concatenate with other tables */
  concat(...others: Table<T>[]): Table<T>;
  
  /** Filter rows based on predicate */
  filter(predicate: Predicate): FilteredTable<T>;
  
  // Conversion methods
  
  /** Convert to array of row objects */
  toArray(): T[keyof T][];
  
  /** Serialize to JSON array */
  toJSON(): any[];
  
  /** String representation */
  toString(): string;
  
  // Iteration methods
  
  /** Scan rows with custom function */
  scan(next: NextFunc<T>, bind?: Partial<T>): IterableIterator<T[keyof T]>;
  
  /** Count occurrences by column values */
  countBy(name: keyof T): CountByResult;
  
  /** Iterator over rows */
  [Symbol.iterator](): IterableIterator<T[keyof T]>;
  
  /** Iterator over column values */
  values(): IterableIterator<T[keyof T]>;
  
  /** Iterator over row indices */
  keys(): IterableIterator<number>;
  
  /** Iterator over [index, row] pairs */
  entries(): IterableIterator<[number, T[keyof T]]>;
}

// Type definitions for table values
type TypeMap = { [key: string]: DataType };
type Predicate = (row: any, index: number) => boolean;
type NextFunc<T> = (index: number, batch: RecordBatch) => T[keyof T];
type CountByResult = { [key: string]: number };

Table Constructor Overloads

Multiple ways to create Table instances from different data sources.

/**
 * Table constructor overloads
 */
class Table<T extends TypeMap = any> {
  /** Empty table */
  constructor();
  
  /** From record batches */
  constructor(batches: Iterable<RecordBatch<T>>);
  constructor(...batches: RecordBatch<T>[]);
  
  /** From schema and vectors */
  constructor(schema: Schema<T>, ...columns: Vector<T[keyof T]>[]);
  
  /** From typed columns object */
  constructor(columns: { [P in keyof T]: Vector<T[P]> });
  
  /** From arrays object */
  constructor(columns: { [P in keyof T]: T[P]['TArray'] });
  
  /** From mixed data */
  constructor(
    schema: Schema<T>, 
    length?: number, 
    children?: (Vector | T[keyof T]['TArray'])[]
  );
}

Table Factory Functions

Convenient functions for creating tables from common data sources.

/**
 * Create table from object of arrays
 */
function tableFromArrays<T extends Record<string, ArrayLike>>(
  columns: T
): Table<{ [P in keyof T]: DataTypeOf<T[P][number]> }>;

/**
 * Create table from array of objects
 */
function tableFromJSON<T>(array: T[]): Table<InferredTypes<T>>;

/**
 * Create table from record batches
 */
function makeTable<T extends TypeMap>(
  batches: RecordBatch<T>[]
): Table<T>;

function makeTable<T extends TypeMap>(
  schema: Schema<T>, 
  batches?: RecordBatch<T>[]
): Table<T>;

function makeTable<T extends TypeMap>(
  ...columns: Vector<T[keyof T]>[]
): Table<T>;

/**
 * Create empty table with schema
 */
function emptyTable<T extends TypeMap>(schema: Schema<T>): Table<T>;

Usage Examples:

import { 
  tableFromArrays, 
  tableFromJSON, 
  Table, 
  Schema, 
  Field, 
  Int32, 
  Utf8, 
  Bool 
} from "apache-arrow";

// From arrays object
const table1 = tableFromArrays({
  name: ['Alice', 'Bob', 'Charlie'],
  age: [25, 30, 35],
  active: [true, false, true]
});

// From JSON objects
const table2 = tableFromJSON([
  { name: 'Alice', age: 25, active: true },
  { name: 'Bob', age: 30, active: false },
  { name: 'Charlie', age: 35, active: true }
]);

// From schema and vectors
const schema = new Schema([
  new Field('name', new Utf8()),
  new Field('age', new Int32()),
  new Field('active', new Bool())
]);

const nameVector = vectorFromArray(['Alice', 'Bob', 'Charlie'], new Utf8());
const ageVector = vectorFromArray([25, 30, 35], new Int32());
const activeVector = vectorFromArray([true, false, true], new Bool());

const table3 = new Table(schema, nameVector, ageVector, activeVector);

Schema Class

Defines the structure and metadata for tables and record batches.

/**
 * Schema defining table structure with metadata
 */
class Schema<T extends TypeMap = any> {
  /** Array of field definitions */
  readonly fields: Field<T[keyof T]>[];
  
  /** Number of fields */
  readonly length: number;
  
  /** Key-value metadata for the schema */
  readonly metadata: Map<string, string>;
  
  /** Dictionary type registry */
  readonly dictionaries: Map<number, DataType>;
  
  /** Arrow metadata version */
  readonly metadataVersion: MetadataVersion;
  
  /** Field names array */
  readonly names: string[];
  
  // Schema manipulation methods
  
  /** Create schema with subset of fields */
  select(...names: string[]): Schema;
  select(names: string[]): Schema;
  
  /** Create schema with fields at indices */
  selectAt(...indices: number[]): Schema;
  selectAt(indices: number[]): Schema;
  
  /** Add or replace fields */
  assign(...fields: Field[]): Schema;
  
  /** Create schema with new metadata */
  withMetadata(metadata: Map<string, string>): Schema;
  
  // Field access methods
  
  /** Get field by name */
  field(name: string): Field | null;
  
  /** Get field by index */
  fieldAt(index: number): Field | null;
  
  /** Get field index by name */
  fieldIndex(name: string): number;
  
  /** Check if field exists */
  hasField(name: string): boolean;
  
  // Conversion methods
  
  /** Convert to JSON representation */
  toJSON(): object;
  
  /** String representation */
  toString(): string;
}

Field Class

Represents individual columns within a schema.

/**
 * Field representing a named column with type and metadata
 */
class Field<T extends DataType = any> {
  /** Field name */
  readonly name: string;
  
  /** Data type */
  readonly type: T;
  
  /** Whether field allows null values */
  readonly nullable: boolean;
  
  /** Field-specific metadata */
  readonly metadata: Map<string, string>;
  
  /** Constructor */
  constructor(
    name: string,
    type: T,
    nullable?: boolean,
    metadata?: Map<string, string>
  );
  
  // Field manipulation methods
  
  /** Create copy with modifications */
  clone(options?: Partial<FieldOptions>): Field<T>;
  
  /** Create field with new name */
  withName(name: string): Field<T>;
  
  /** Create field with new type */
  withType<U extends DataType>(type: U): Field<U>;
  
  /** Create field with new nullability */
  withNullable(nullable: boolean): Field<T>;
  
  /** Create field with new metadata */
  withMetadata(metadata: Map<string, string>): Field<T>;
  
  // Conversion methods
  
  /** Convert to JSON representation */
  toJSON(): object;
  
  /** String representation */
  toString(): string;
}

// Field options interface
interface FieldOptions {
  name?: string;
  type?: DataType;
  nullable?: boolean;
  metadata?: Map<string, string>;
}

Usage Examples:

import { Schema, Field, Int32, Utf8, Bool } from "apache-arrow";

// Create individual fields
const nameField = new Field('name', new Utf8(), false); // Non-nullable
const ageField = new Field('age', new Int32(), true);   // Nullable
const activeField = new Field('active', new Bool(), false);

// Create schema
const schema = new Schema([nameField, ageField, activeField]);

// Schema operations
const subset = schema.select('name', 'age');           // Select specific fields
const reordered = schema.selectAt([2, 0, 1]);          // Reorder by index
const withExtra = schema.assign(                       // Add new field
  new Field('score', new Float64())
);

// Field access
console.log(schema.names);                    // ['name', 'age', 'active']
console.log(schema.field('name'));            // Field<Utf8>
console.log(schema.fieldIndex('age'));        // 1
console.log(schema.hasField('nonexistent'));  // false

// Field modifications
const renamedField = nameField.withName('full_name');
const nullableAge = ageField.withNullable(false);

RecordBatch Class

Represents a single batch of rows with equal-length columns.

/**
 * Collection of equal-length vectors representing a batch of rows
 */
class RecordBatch<T extends TypeMap = any> {
  /** Schema defining the batch structure */
  readonly schema: Schema<T>;
  
  /** Number of rows in this batch */
  readonly length: number;
  
  /** Number of columns */
  readonly numCols: number;
  
  /** Underlying data storage */
  readonly data: Data<Struct<T>>;
  
  // Data access (similar to Table)
  
  /** Get row at index */
  get(index: number): T[keyof T] | null;
  
  /** Get column by name */
  getColumn<P extends keyof T>(name: P): Vector<T[P]> | null;
  
  /** Get column by index */
  getColumnAt(index: number): Vector | null;
  
  /** Get child column by index */
  getChildAt<P extends keyof T>(index: number): Vector<T[P]> | null;
  
  // Transformation methods
  
  /** Select columns */
  select<K extends keyof T>(...columnNames: K[]): RecordBatch<Pick<T, K>>;
  selectAt(columnIndices: number[]): RecordBatch;
  
  /** Create slice */
  slice(begin?: number, end?: number): RecordBatch<T>;
  
  /** Concatenate batches into table */
  concat(...others: RecordBatch<T>[]): Table<T>;
  
  // Conversion methods
  
  /** Convert to array */
  toArray(): T[keyof T][];
  
  /** Convert to JSON */
  toJSON(): any[];
  
  /** String representation */
  toString(): string;
  
  // Iteration
  [Symbol.iterator](): IterableIterator<T[keyof T]>;
  values(): IterableIterator<T[keyof T]>;
  keys(): IterableIterator<number>;
  entries(): IterableIterator<[number, T[keyof T]]>;
}

Table Operations

Data Access Operations

Methods for accessing table data in various ways.

/**
 * Row access
 */
get(index: number): T[keyof T] | null;

/**
 * Column access by name
 */
getColumn<P extends keyof T>(name: P): Vector<T[P]> | null;

/**
 * Column access by index
 */
getColumnAt(index: number): Vector | null;

/**
 * Batch access for large tables
 */
getBatch(index: number): RecordBatch<T>;

/**
 * Iterator access for memory efficiency
 */
scan(next: NextFunc<T>, bind?: Partial<T>): IterableIterator<T[keyof T]>;

Usage Examples:

import { tableFromArrays } from "apache-arrow";

const table = tableFromArrays({
  name: ['Alice', 'Bob', 'Charlie', 'Diana'],
  age: [25, 30, 35, 28],
  department: ['Engineering', 'Sales', 'Engineering', 'Marketing']
});

// Row access
console.log(table.get(0));  // { name: 'Alice', age: 25, department: 'Engineering' }
console.log(table.get(2));  // { name: 'Charlie', age: 35, department: 'Engineering' }

// Column access
const nameColumn = table.getColumn('name');
console.log(nameColumn.toArray());  // ['Alice', 'Bob', 'Charlie', 'Diana']

const ageColumn = table.getColumnAt(1); // Get second column
console.log(ageColumn.toArray());   // [25, 30, 35, 28]

// Scan for memory-efficient iteration
const engineeringRows = [];
for (const row of table.scan((idx, batch) => batch.get(idx))) {
  if (row.department === 'Engineering') {
    engineeringRows.push(row);
  }
}

Selection and Projection

Methods for selecting subsets of columns or rows.

/**
 * Select columns by name
 */
select<K extends keyof T>(...columnNames: K[]): Table<Pick<T, K>>;

/**
 * Select columns by index
 */
selectAt(columnIndices: number[]): Table;

/**
 * Filter rows based on predicate
 */
filter(predicate: (row: T[keyof T], index: number) => boolean): FilteredTable<T>;

/**
 * Create row slice
 */
slice(begin?: number, end?: number): Table<T>;

Usage Examples:

import { tableFromArrays } from "apache-arrow";

const table = tableFromArrays({
  id: [1, 2, 3, 4, 5],
  name: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
  age: [25, 30, 35, 28, 32],
  salary: [75000, 80000, 90000, 72000, 85000]
});

// Select specific columns
const nameAge = table.select('name', 'age');
// Result: Table with only 'name' and 'age' columns

// Select by index
const firstTwoCols = table.selectAt([0, 1]); 
// Result: Table with 'id' and 'name' columns

// Filter rows
const highEarners = table.filter(row => row.salary > 80000);
// Result: Charlie and Eve rows

const youngEmployees = table.filter(row => row.age < 30);
// Result: Alice and Diana rows

// Slice rows
const middleThree = table.slice(1, 4);
// Result: Bob, Charlie, Diana rows (indices 1, 2, 3)

Table Combination

Methods for combining multiple tables.

/**
 * Concatenate tables vertically (same schema)
 */
concat(...others: Table<T>[]): Table<T>;

/**
 * Merge tables horizontally (different columns)
 */
assign<R extends TypeMap>(other: Table<R>): Table<T & R>;

/**
 * Join tables on common columns (not built-in, but pattern)
 */
// Note: Joins require custom implementation

Usage Examples:

import { tableFromArrays } from "apache-arrow";

// Vertical concatenation (same columns)
const employees1 = tableFromArrays({
  name: ['Alice', 'Bob'],
  age: [25, 30]
});

const employees2 = tableFromArrays({
  name: ['Charlie', 'Diana'],
  age: [35, 28]
});

const allEmployees = employees1.concat(employees2);
// Result: 4 rows with name and age columns

// Horizontal merge (different columns)
const basicInfo = tableFromArrays({
  name: ['Alice', 'Bob', 'Charlie'],
  age: [25, 30, 35]
});

const jobInfo = tableFromArrays({
  department: ['Engineering', 'Sales', 'Engineering'],
  salary: [75000, 80000, 90000]
});

const combined = basicInfo.assign(jobInfo);
// Result: Table with name, age, department, salary columns

Aggregation and Analysis

Methods for analyzing table data.

/**
 * Count values by column
 */
countBy(name: keyof T): CountByResult;

/**
 * Group by functionality (custom implementation needed)
 */
// Arrow doesn't provide built-in groupBy, but can be implemented

/**
 * Statistical operations on columns
 */
// Access individual columns for statistical operations
table.getColumn('age').reduce((sum, age) => sum + (age || 0), 0) / table.length; // Mean age

Usage Examples:

import { tableFromArrays } from "apache-arrow";

const table = tableFromArrays({
  name: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
  department: ['Eng', 'Sales', 'Eng', 'Marketing', 'Eng'],
  age: [25, 30, 35, 28, 32]
});

// Count by department
const deptCounts = table.countBy('department');
console.log(deptCounts); // { Eng: 3, Sales: 1, Marketing: 1 }

// Custom aggregations using column operations
const ageColumn = table.getColumn('age');
const totalAge = ageColumn.reduce((sum, age) => sum + (age || 0), 0);
const avgAge = totalAge / ageColumn.length;
console.log(`Average age: ${avgAge}`); // Average age: 30

// Find min/max
const minAge = ageColumn.reduce((min, age) => 
  age !== null ? Math.min(min, age) : min, Infinity
);
const maxAge = ageColumn.reduce((max, age) => 
  age !== null ? Math.max(max, age) : max, -Infinity
);

Table Iteration and Scanning

Efficient methods for processing table data.

/**
 * Standard iteration over rows
 */
[Symbol.iterator](): IterableIterator<T[keyof T]>;

/**
 * Custom scanning with state
 */
scan(
  next: (index: number, batch: RecordBatch<T>) => T[keyof T],
  bind?: Partial<T>
): IterableIterator<T[keyof T]>;

/**
 * Batch-wise processing for large tables
 */
// Process table in batches for memory efficiency
for (const batch of table.batches) {
  // Process batch
}

Usage Examples:

import { tableFromArrays } from "apache-arrow";

const largeTable = tableFromArrays({
  id: Array.from({ length: 10000 }, (_, i) => i),
  value: Array.from({ length: 10000 }, (_, i) => Math.random())
});

// Memory-efficient iteration
let sum = 0;
let count = 0;

// Process row by row without loading all into memory
for (const row of largeTable) {
  if (row.value > 0.5) {
    sum += row.value;
    count++;
  }
}

console.log(`Average of values > 0.5: ${sum / count}`);

// Batch-wise processing for very large tables
let batchSums = [];
for (const batch of largeTable.batches) {
  const valueColumn = batch.getColumn('value');
  const batchSum = valueColumn.reduce((sum, val) => sum + (val || 0), 0);
  batchSums.push(batchSum);
}

console.log(`Per-batch sums:`, batchSums);

Schema Management

Dynamic Schema Operations

Working with schemas programmatically.

/**
 * Create schema from existing table
 */
const newSchema = table.schema.select('name', 'age');

/**
 * Add fields to existing schema
 */
const extendedSchema = table.schema.assign(
  new Field('new_field', new Float64())
);

/**
 * Modify field properties
 */
const modifiedSchema = new Schema(
  table.schema.fields.map(field => 
    field.name === 'age' 
      ? field.withType(new Float32()) 
      : field
  )
);

/**
 * Schema validation and compatibility
 */
function isSchemaCompatible(schema1: Schema, schema2: Schema): boolean {
  return schema1.names.every(name => schema2.hasField(name));
}

Metadata Management

Working with schema and field metadata.

/**
 * Add metadata to schema
 */
const metadata = new Map([
  ['version', '1.0'],
  ['created_by', 'data_pipeline']
]);

const schemaWithMeta = schema.withMetadata(metadata);

/**
 * Add metadata to field
 */
const fieldMetadata = new Map([
  ['unit', 'years'],
  ['description', 'Age in years']
]);

const fieldWithMeta = ageField.withMetadata(fieldMetadata);
const updatedSchema = schema.assign(fieldWithMeta);

Usage Examples:

import { Schema, Field, Int32, Utf8, tableFromArrays } from "apache-arrow";

// Create table with metadata
const schema = new Schema(
  [
    new Field('name', new Utf8()),
    new Field('age', new Int32())
  ],
  new Map([
    ['version', '2.1'],
    ['source', 'employee_db']
  ])
);

console.log(schema.metadata.get('version')); // '2.1'

// Add field with metadata
const enrichedField = new Field(
  'salary', 
  new Int32(),
  true,
  new Map([
    ['currency', 'USD'],
    ['confidential', 'true']
  ])
);

const enrichedSchema = schema.assign(enrichedField);
console.log(enrichedSchema.field('salary').metadata.get('currency')); // 'USD'

Version

Tile

Files

tables.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

Tables and Schemas

Capabilities

Table Class

Table Constructor Overloads

Table Factory Functions

Schema Class

Field Class

RecordBatch Class

Table Operations

Data Access Operations

Selection and Projection

Table Combination

Aggregation and Analysis

Table Iteration and Scanning

Schema Management

Dynamic Schema Operations

Metadata Management

tables.mddocs/