or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

builders.mddata-types.mdindex.mdio-operations.mdstreaming.mdtables.mdutilities.mdvectors.md
tile.json

data-types.mddocs/

Data Types

Apache Arrow JavaScript provides a comprehensive type system with 47+ data types that correspond to the Arrow columnar specification. These types enable efficient storage and processing of structured data with full type safety in TypeScript.

Capabilities

Base DataType Class

All Arrow data types inherit from the abstract DataType class, which provides common functionality and type checking methods.

/**
 * Abstract base class for all Arrow data types
 */
abstract class DataType<T = any> {
  /** Unique identifier for this data type */
  readonly typeId: Type;
  
  /** TypedArray constructor for underlying storage */
  readonly ArrayType: ArrayConstructor;
  
  /** TypedArray constructor for offset arrays (variable-length types) */
  readonly OffsetArrayType: ArrayConstructor;
  
  /** Child types for nested data types */
  readonly children: Field[];
  
  // Type checking static methods
  static isNull(x: any): x is Null;
  static isInt(x: any): x is Int;
  static isFloat(x: any): x is Float;
  static isBinary(x: any): x is Binary;
  static isUtf8(x: any): x is Utf8;
  static isBool(x: any): x is Bool;
  static isDecimal(x: any): x is Decimal;
  static isDate(x: any): x is Date_;
  static isTime(x: any): x is Time;
  static isTimestamp(x: any): x is Timestamp;
  static isInterval(x: any): x is Interval;
  static isList(x: any): x is List;
  static isStruct(x: any): x is Struct;
  static isUnion(x: any): x is Union;
  static isDictionary(x: any): x is Dictionary;
  static isFixedSizeBinary(x: any): x is FixedSizeBinary;
  static isFixedSizeList(x: any): x is FixedSizeList;
  static isMap(x: any): x is Map_;
  static isDenseUnion(x: any): x is DenseUnion;
  static isSparseUnion(x: any): x is SparseUnion;
}

Type Enumeration

Core enumeration defining all Arrow data types with both serializable (positive) and TypeScript-specific (negative) values.

/**
 * Arrow Type enumeration
 */
enum Type {
  // Serializable types (positive values)
  NONE = 0,
  Null = 1,
  Int = 2,
  Float = 3,
  Binary = 4,
  Utf8 = 5,
  Bool = 6,
  Decimal = 7,
  Date = 8,
  Time = 9,
  Timestamp = 10,
  Interval = 11,
  List = 12,
  Struct = 13,
  Union = 14,
  FixedSizeBinary = 15,
  FixedSizeList = 16,
  Map = 17,
  Duration = 18,
  LargeBinary = 19,
  LargeUtf8 = 20,
  
  // TypeScript-specific types (negative values)
  Dictionary = -1,
  Int8 = -2,
  Int16 = -3,
  Int32 = -4,
  Int64 = -5,
  Uint8 = -6,
  Uint16 = -7,
  Uint32 = -8,
  Uint64 = -9,
  Float16 = -10,
  Float32 = -11,
  Float64 = -12,
  DateDay = -13,
  DateMillisecond = -14,
  TimestampSecond = -15,
  TimestampMillisecond = -16,
  TimestampMicrosecond = -17,
  TimestampNanosecond = -18,
  TimeSecond = -19,
  TimeMillisecond = -20,
  TimeMicrosecond = -21,
  TimeNanosecond = -22,
  DenseUnion = -23,
  SparseUnion = -24,
  IntervalDayTime = -25,
  IntervalYearMonth = -26,
  DurationSecond = -27,
  DurationMillisecond = -28,
  DurationMicrosecond = -29,
  DurationNanosecond = -30,
  IntervalMonthDayNano = -31
}

Primitive Types

Null Type

Represents null values with no physical storage requirements.

/**
 * NULL type with no physical storage
 */
class Null extends DataType {
  readonly typeId: Type.Null;
  readonly ArrayType: never;
  readonly TValue: null;
}

Usage:

import { Null } from "apache-arrow";

const nullType = new Null();
// Used for columns that are entirely null

Integer Types

Signed and unsigned integer types with various bit widths.

/**
 * Base integer type
 */
class Int<T extends IntBitWidth = IntBitWidth> extends DataType {
  readonly isSigned: boolean;
  readonly bitWidth: T;
  readonly ArrayType: IntArrayConstructor<T>;
  readonly TValue: IntArray<T>['TValue'];
}

// Specific integer types
class Int8 extends Int<8> { readonly typeId: Type.Int8; }
class Int16 extends Int<16> { readonly typeId: Type.Int16; }
class Int32 extends Int<32> { readonly typeId: Type.Int32; }
class Int64 extends Int<64> { readonly typeId: Type.Int64; }
class Uint8 extends Int<8> { readonly typeId: Type.Uint8; }
class Uint16 extends Int<16> { readonly typeId: Type.Uint16; }
class Uint32 extends Int<32> { readonly typeId: Type.Uint32; }
class Uint64 extends Int<64> { readonly typeId: Type.Uint64; }

// Bit width types
type IntBitWidth = 8 | 16 | 32 | 64;

// Constructor signatures
new Int8(): Int8;
new Int16(): Int16; 
new Int32(): Int32;
new Int64(): Int64;
new Uint8(): Uint8;
new Uint16(): Uint16;
new Uint32(): Uint32;
new Uint64(): Uint64;

Usage:

import { Int32, Uint8, Int64 } from "apache-arrow";

const int32Type = new Int32();     // 32-bit signed integers
const uint8Type = new Uint8();     // 8-bit unsigned integers  
const int64Type = new Int64();     // 64-bit signed integers (BigInt values)

Float Types

Floating-point numeric types with different precisions.

/**
 * Base floating-point type
 */
class Float<T extends Precision = Precision> extends DataType {
  readonly precision: T;
  readonly ArrayType: FloatArrayConstructor<T>;
  readonly TValue: number;
}

// Specific float types
class Float16 extends Float<Precision.HALF> { 
  readonly typeId: Type.Float16;
  readonly ArrayType: Uint16ArrayConstructor;
}

class Float32 extends Float<Precision.SINGLE> { 
  readonly typeId: Type.Float32;
  readonly ArrayType: Float32ArrayConstructor;
}

class Float64 extends Float<Precision.DOUBLE> { 
  readonly typeId: Type.Float64;
  readonly ArrayType: Float64ArrayConstructor;
}

// Precision enumeration
enum Precision {
  HALF = 0,    // 16-bit half precision
  SINGLE = 1,  // 32-bit single precision  
  DOUBLE = 2   // 64-bit double precision
}

Usage:

import { Float32, Float64, Float16 } from "apache-arrow";

const float32Type = new Float32(); // 32-bit floats
const float64Type = new Float64(); // 64-bit floats (standard JavaScript numbers)
const float16Type = new Float16(); // 16-bit half-precision floats

Boolean Type

Boolean values stored as bit-packed arrays.

/**
 * Boolean type with bit-packed storage
 */
class Bool extends DataType {
  readonly typeId: Type.Bool;
  readonly ArrayType: Uint8ArrayConstructor;
  readonly TValue: boolean;
}

Usage:

import { Bool } from "apache-arrow";

const boolType = new Bool();
// Stores boolean values efficiently as bits in Uint8Array

String Types

UTF-8 encoded string types with variable-length storage.

/**
 * UTF-8 string type (up to 2^31-1 bytes)
 */
class Utf8 extends DataType {
  readonly typeId: Type.Utf8;
  readonly ArrayType: Uint8ArrayConstructor;
  readonly OffsetArrayType: Int32ArrayConstructor;
  readonly TValue: string;
}

/**
 * Large UTF-8 string type (up to 2^63-1 bytes)
 */
class LargeUtf8 extends DataType {
  readonly typeId: Type.LargeUtf8;
  readonly ArrayType: Uint8ArrayConstructor;
  readonly OffsetArrayType: BigInt64ArrayConstructor;
  readonly TValue: string;
}

Usage:

import { Utf8, LargeUtf8 } from "apache-arrow";

const utf8Type = new Utf8();           // Standard UTF-8 strings
const largeUtf8Type = new LargeUtf8(); // Large UTF-8 strings for very long text

Binary Types

Raw binary data types with variable-length storage.

/**
 * Variable-length binary type (up to 2^31-1 bytes)
 */
class Binary extends DataType {
  readonly typeId: Type.Binary;
  readonly ArrayType: Uint8ArrayConstructor;
  readonly OffsetArrayType: Int32ArrayConstructor;
  readonly TValue: Uint8Array;
}

/**
 * Large variable-length binary type (up to 2^63-1 bytes)
 */
class LargeBinary extends DataType {
  readonly typeId: Type.LargeBinary;
  readonly ArrayType: Uint8ArrayConstructor;
  readonly OffsetArrayType: BigInt64ArrayConstructor;
  readonly TValue: Uint8Array;
}

/**
 * Fixed-size binary type
 */
class FixedSizeBinary extends DataType {
  readonly typeId: Type.FixedSizeBinary;
  readonly byteWidth: number;
  readonly ArrayType: Uint8ArrayConstructor;
  readonly TValue: Uint8Array;
}

// Constructor
new FixedSizeBinary(byteWidth: number): FixedSizeBinary;

Usage:

import { Binary, LargeBinary, FixedSizeBinary } from "apache-arrow";

const binaryType = new Binary();                    // Variable-length binary data
const largeBinaryType = new LargeBinary();          // Large binary data
const fixedBinaryType = new FixedSizeBinary(16);    // Fixed 16-byte binary (e.g., UUIDs)

Decimal Type

High-precision decimal numbers with configurable scale and precision.

/**
 * Decimal type with configurable precision and scale
 */
class Decimal extends DataType {
  readonly typeId: Type.Decimal;
  readonly scale: number;
  readonly precision: number;
  readonly bitWidth: number;
  readonly ArrayType: Uint32ArrayConstructor;
  readonly TValue: Uint32Array;
}

// Constructor
new Decimal(scale: number, precision: number, bitWidth?: number): Decimal;

Usage:

import { Decimal } from "apache-arrow";

const decimalType = new Decimal(2, 10); // 10 digits total, 2 after decimal point
// Example: 12345678.90

Temporal Types

Date Types

Date values stored as days or milliseconds since Unix epoch.

/**
 * Base date type
 */
class Date_<T extends DateUnit = DateUnit> extends DataType {
  readonly unit: T;
  readonly ArrayType: DateArrayType<T>;
  readonly TValue: number;
}

// Specific date types
class DateDay extends Date_<DateUnit.DAY> {
  readonly typeId: Type.DateDay;
  readonly ArrayType: Int32ArrayConstructor;
}

class DateMillisecond extends Date_<DateUnit.MILLISECOND> {
  readonly typeId: Type.DateMillisecond;
  readonly ArrayType: BigInt64ArrayConstructor;
}

// Date units
enum DateUnit {
  DAY = 0,         // Days since Unix epoch
  MILLISECOND = 1  // Milliseconds since Unix epoch
}

Usage:

import { DateDay, DateMillisecond } from "apache-arrow";

const dayType = new DateDay();              // Dates as day count
const millisecondType = new DateMillisecond(); // Dates as millisecond timestamps

Time Types

Time-of-day values with various precision levels.

/**
 * Base time type
 */
class Time_<T extends TimeUnit = TimeUnit> extends DataType {
  readonly unit: T;
  readonly bitWidth: TimeBitWidth;
  readonly ArrayType: TimeArrayType<T>;
  readonly TValue: TimeValue<T>;
}

// Specific time types
class TimeSecond extends Time_<TimeUnit.SECOND> {
  readonly typeId: Type.TimeSecond;
  readonly bitWidth: 32;
  readonly ArrayType: Int32ArrayConstructor;
  readonly TValue: number;
}

class TimeMillisecond extends Time_<TimeUnit.MILLISECOND> {
  readonly typeId: Type.TimeMillisecond;
  readonly bitWidth: 32;
  readonly ArrayType: Int32ArrayConstructor;
  readonly TValue: number;
}

class TimeMicrosecond extends Time_<TimeUnit.MICROSECOND> {
  readonly typeId: Type.TimeMicrosecond;
  readonly bitWidth: 64;
  readonly ArrayType: BigInt64ArrayConstructor;
  readonly TValue: bigint;
}

class TimeNanosecond extends Time_<TimeUnit.NANOSECOND> {
  readonly typeId: Type.TimeNanosecond;
  readonly bitWidth: 64;
  readonly ArrayType: BigInt64ArrayConstructor;
  readonly TValue: bigint;
}

// Time units
enum TimeUnit {
  SECOND = 0,
  MILLISECOND = 1,
  MICROSECOND = 2,
  NANOSECOND = 3
}

Usage:

import { TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond } from "apache-arrow";

const secondType = new TimeSecond();           // Time in seconds
const millisecondType = new TimeMillisecond(); // Time in milliseconds
const microsecondType = new TimeMicrosecond(); // Time in microseconds (BigInt)
const nanosecondType = new TimeNanosecond();   // Time in nanoseconds (BigInt)

Timestamp Types

Absolute timestamps with timezone support.

/**
 * Base timestamp type
 */
class Timestamp_<T extends TimeUnit = TimeUnit> extends DataType {
  readonly unit: T;
  readonly timezone: string | null;
  readonly ArrayType: TimestampArrayType<T>;
  readonly TValue: TimestampValue<T>;
}

// Specific timestamp types
class TimestampSecond extends Timestamp_<TimeUnit.SECOND> {
  readonly typeId: Type.TimestampSecond;
  readonly ArrayType: Int32ArrayConstructor;
  readonly TValue: number;
}

class TimestampMillisecond extends Timestamp_<TimeUnit.MILLISECOND> {
  readonly typeId: Type.TimestampMillisecond;
  readonly ArrayType: Int32ArrayConstructor;
  readonly TValue: number;
}

class TimestampMicrosecond extends Timestamp_<TimeUnit.MICROSECOND> {
  readonly typeId: Type.TimestampMicrosecond;
  readonly ArrayType: BigInt64ArrayConstructor;
  readonly TValue: bigint;
}

class TimestampNanosecond extends Timestamp_<TimeUnit.NANOSECOND> {
  readonly typeId: Type.TimestampNanosecond;
  readonly ArrayType: BigInt64ArrayConstructor;
  readonly TValue: bigint;
}

// Constructors with optional timezone
new TimestampSecond(timezone?: string | null): TimestampSecond;
new TimestampMillisecond(timezone?: string | null): TimestampMillisecond;
new TimestampMicrosecond(timezone?: string | null): TimestampMicrosecond;
new TimestampNanosecond(timezone?: string | null): TimestampNanosecond;

Usage:

import { TimestampSecond, TimestampMillisecond, TimestampMicrosecond } from "apache-arrow";

const timestampType = new TimestampMillisecond();           // Millisecond timestamps
const utcTimestampType = new TimestampSecond('UTC');        // With timezone
const localTimestampType = new TimestampMicrosecond(null);  // Local timezone

Duration Types

Time duration values representing elapsed time.

/**
 * Duration type representing elapsed time
 */
class Duration<T extends TimeUnit = TimeUnit> extends DataType {
  readonly typeId: Type.Duration;
  readonly unit: T;
  readonly ArrayType: DurationArrayType<T>;
  readonly TValue: DurationValue<T>;
}

// Specific duration types
class DurationSecond extends Duration<TimeUnit.SECOND> {
  readonly typeId: Type.DurationSecond;
  readonly ArrayType: Int32ArrayConstructor;
  readonly TValue: number;
}

class DurationMillisecond extends Duration<TimeUnit.MILLISECOND> {
  readonly typeId: Type.DurationMillisecond;
  readonly ArrayType: Int32ArrayConstructor;
  readonly TValue: number;
}

class DurationMicrosecond extends Duration<TimeUnit.MICROSECOND> {
  readonly typeId: Type.DurationMicrosecond;
  readonly ArrayType: BigInt64ArrayConstructor;
  readonly TValue: bigint;
}

class DurationNanosecond extends Duration<TimeUnit.NANOSECOND> {
  readonly typeId: Type.DurationNanosecond;
  readonly ArrayType: BigInt64ArrayConstructor;
  readonly TValue: bigint;
}

Usage:

import { DurationSecond, DurationMillisecond, DurationMicrosecond } from "apache-arrow";

const secondDurationType = new DurationSecond();        // Duration in seconds
const millisecondDurationType = new DurationMillisecond(); // Duration in milliseconds
const microsecondDurationType = new DurationMicrosecond(); // Duration in microseconds

Interval Types

Calendar interval types for date/time arithmetic.

/**
 * Base interval type
 */
class Interval_<T extends IntervalUnit = IntervalUnit> extends DataType {
  readonly unit: T;
  readonly ArrayType: IntervalArrayType<T>;
}

// Specific interval types
class IntervalDayTime extends Interval_<IntervalUnit.DAY_TIME> {
  readonly typeId: Type.IntervalDayTime;
  readonly TValue: { days: number; milliseconds: number };
}

class IntervalYearMonth extends Interval_<IntervalUnit.YEAR_MONTH> {
  readonly typeId: Type.IntervalYearMonth;
  readonly TValue: { years: number; months: number };
}

class IntervalMonthDayNano extends Interval_<IntervalUnit.MONTH_DAY_NANO> {
  readonly typeId: Type.IntervalMonthDayNano;
  readonly TValue: { months: number; days: number; nanoseconds: bigint };
}

// Interval units
enum IntervalUnit {
  YEAR_MONTH = 0,
  DAY_TIME = 1,
  MONTH_DAY_NANO = 2
}

Usage:

import { IntervalDayTime, IntervalYearMonth, IntervalMonthDayNano } from "apache-arrow";

const dayTimeType = new IntervalDayTime();         // Days and milliseconds
const yearMonthType = new IntervalYearMonth();     // Years and months
const monthDayNanoType = new IntervalMonthDayNano(); // Months, days, and nanoseconds

Nested Types

List Type

Variable-length lists of homogeneous elements.

/**
 * Variable-length list of elements of type T
 */
class List<T extends DataType = any> extends DataType {
  readonly typeId: Type.List;
  readonly children: Field<T>[];
  readonly ArrayType: Int32ArrayConstructor; // Offset array
  readonly OffsetArrayType: Int32ArrayConstructor;
  readonly TValue: (T['TValue'] | null)[];
}

/**
 * Fixed-size list of elements of type T
 */
class FixedSizeList<T extends DataType = any> extends DataType {
  readonly typeId: Type.FixedSizeList;
  readonly listSize: number;
  readonly children: Field<T>[];
  readonly TValue: (T['TValue'] | null)[];
}

// Constructors
new List<T>(child: Field<T>): List<T>;
new FixedSizeList<T>(listSize: number, child: Field<T>): FixedSizeList<T>;

Usage:

import { List, FixedSizeList, Field, Int32, Utf8 } from "apache-arrow";

const stringListType = new List(new Field('item', new Utf8()));
const fixedIntListType = new FixedSizeList(5, new Field('item', new Int32()));

Struct Type

Record-like structures with named fields.

/**
 * Struct type representing records with named fields
 */
class Struct<T extends TypeMap = any> extends DataType {
  readonly typeId: Type.Struct;
  readonly children: Field[];
  readonly TValue: StructRowProxy<T>;
}

// Constructor
new Struct<T>(children: Field[]): Struct<T>;

// Helper type for struct values
interface StructRowProxy<T> {
  [K in keyof T]: T[K]['TValue'] | null;
}

Usage:

import { Struct, Field, Utf8, Int32, Bool } from "apache-arrow";

const personType = new Struct([
  new Field('name', new Utf8()),
  new Field('age', new Int32()),
  new Field('active', new Bool())
]);
// Creates struct type: { name: string, age: number, active: boolean }

Map Type

Key-value pair collections.

/**
 * Map type representing key-value pairs
 */
class Map_<TKey extends DataType = any, TValue extends DataType = any> extends DataType {
  readonly typeId: Type.Map;
  readonly children: Field[];
  readonly keysSorted: boolean;
  readonly TValue: Map<TKey['TValue'], TValue['TValue']>;
}

// Constructor
new Map_<TKey, TValue>(
  child: Field<Struct<{ key: TKey; value: TValue }>>,
  keysSorted?: boolean
): Map_<TKey, TValue>;

Usage:

import { Map_, Field, Struct, Utf8, Int32 } from "apache-arrow";

const stringIntMapType = new Map_(
  new Field('entries', new Struct([
    new Field('key', new Utf8()),
    new Field('value', new Int32())
  ]))
);

Union Types

Tagged unions supporting multiple data types in a single column.

/**
 * Base union type
 */
abstract class Union_<T extends DataType = any> extends DataType {
  readonly mode: UnionMode;
  readonly typeIds: Int8Array;
  readonly children: Field[];
  readonly TValue: T['TValue'];
}

/**
 * Dense union - type buffer + offset buffer
 */
class DenseUnion<T extends DataType = any> extends Union_<T> {
  readonly typeId: Type.DenseUnion;
  readonly mode: UnionMode.Dense;
}

/**
 * Sparse union - type buffer only
 */
class SparseUnion<T extends DataType = any> extends Union_<T> {
  readonly typeId: Type.SparseUnion;
  readonly mode: UnionMode.Sparse;
}

// Union modes
enum UnionMode {
  Sparse = 0,
  Dense = 1
}

// Constructors
new DenseUnion<T>(mode: UnionMode, typeIds: number[], children: Field[]): DenseUnion<T>;
new SparseUnion<T>(mode: UnionMode, typeIds: number[], children: Field[]): SparseUnion<T>;

Usage:

import { DenseUnion, SparseUnion, Field, Utf8, Int32, UnionMode } from "apache-arrow";

const denseUnionType = new DenseUnion(
  UnionMode.Dense,
  [0, 1],
  [
    new Field('0', new Utf8()),
    new Field('1', new Int32())
  ]
);

Dictionary Type

Dictionary-encoded data with index and value arrays.

/**
 * Dictionary type for efficient storage of repeated values
 */
class Dictionary<T extends DataType = any, TKey extends DataType = any> extends DataType {
  readonly typeId: Type.Dictionary;
  readonly id: number;
  readonly valueType: T;
  readonly indexType: TKey;
  readonly isOrdered: boolean;
  readonly TValue: T['TValue'];
}

// Constructor
new Dictionary<T, TKey>(
  valueType: T,
  indexType: TKey,
  id?: number,
  isOrdered?: boolean
): Dictionary<T, TKey>;

Usage:

import { Dictionary, Utf8, Int32 } from "apache-arrow";

const stringDictionaryType = new Dictionary(
  new Utf8(),     // Value type (the actual strings)
  new Int32(),    // Index type (indices into dictionary)
  1,              // Dictionary ID
  false           // Not ordered
);

Type Creation Examples

Creating and using data types:

import { 
  Int32, Float64, Utf8, Bool, 
  List, Struct, Field, 
  TimestampMillisecond,
  vectorFromArray 
} from "apache-arrow";

// Simple types
const numbers = vectorFromArray([1, 2, 3], new Int32());
const strings = vectorFromArray(['a', 'b', 'c'], new Utf8());
const bools = vectorFromArray([true, false, true], new Bool());

// Complex nested type
const complexType = new Struct([
  new Field('id', new Int32()),
  new Field('name', new Utf8()),
  new Field('scores', new List(new Field('item', new Float64()))),
  new Field('created_at', new TimestampMillisecond('UTC'))
]);

// The struct type represents: {
//   id: number,
//   name: string,
//   scores: number[],
//   created_at: Date
// }