Apache Arrow JavaScript provides a comprehensive set of utility functions for data manipulation, type checking, mathematical operations, and compatibility helpers across different JavaScript environments. These utilities are essential for efficient Arrow data processing and cross-platform compatibility.
The main utility object containing all helper functions organized by category.
/**
* Complete utility object with all helper functions
*/
const util = {
// BigNumber utilities (from util/bn.ts)
...util_bn_,
// Integer utilities (from util/int.ts)
...util_int_,
// Bit manipulation utilities (from util/bit.ts)
...util_bit_,
// Math utilities (from util/math.ts)
...util_math_,
// Buffer utilities (from util/buffer.ts)
...util_buffer_,
// Vector utilities (from util/vector.ts)
...util_vector_,
// Pretty printing utilities (from util/pretty.ts)
...util_pretty_,
// Interval utilities (from util/interval.ts)
...util_interval_,
// Type comparison utilities (from visitor/typecomparator.ts)
compareSchemas,
compareFields,
compareTypes,
};Mathematical operations and constants for numerical computations.
/**
* BigInt array compatibility helpers
*/
const BigInt64Array: BigInt64ArrayConstructor;
const BigUint64Array: BigUint64ArrayConstructor;
/**
* Mathematical constants and functions
*/
const kInt32DecimalDigits: number;
const kMaxInt32DecimalDigits: number;
/**
* Type checking for numeric types
*/
function isTypedArray(value: any): value is TypedArray;
function isBigIntArray(value: any): value is BigIntArray;
function isArrayLike(value: any): value is ArrayLike<any>;
/**
* Numeric precision utilities
*/
function getPrecision<T extends number | bigint>(value: T): number;
function getScale<T extends number | bigint>(value: T): number;
/**
* Safe arithmetic operations
*/
function addSafe(a: number, b: number): number;
function subtractSafe(a: number, b: number): number;
function multiplySafe(a: number, b: number): number;
function divideSafe(a: number, b: number): number;Usage Examples:
import { util } from "apache-arrow";
// Check array types
const int32Array = new Int32Array([1, 2, 3]);
const bigintArray = new BigInt64Array([1n, 2n, 3n]);
console.log(util.isTypedArray(int32Array)); // true
console.log(util.isBigIntArray(bigintArray)); // true
console.log(util.isArrayLike([1, 2, 3])); // true
// Safe arithmetic
const result1 = util.addSafe(Number.MAX_SAFE_INTEGER, 1);
const result2 = util.divideSafe(10, 3);
// Precision handling
const precision = util.getPrecision(123.456); // 6
const scale = util.getScale(123.456); // 3Functions for working with ArrayBuffers and typed arrays.
/**
* Convert various inputs to ArrayBuffer views
*/
function toArrayBufferView<T extends ArrayBufferView>(
input: ArrayBufferViewInput
): T;
function toArrayBufferView(
ArrayBufferViewCtor: ArrayBufferViewConstructor,
input: ArrayBufferViewInput
): ArrayBufferView;
/**
* Convert to Uint8Array
*/
function toUint8Array(input: ArrayBufferViewInput): Uint8Array;
/**
* Convert to specific typed array
*/
function toInt8Array(input: ArrayBufferViewInput): Int8Array;
function toInt16Array(input: ArrayBufferViewInput): Int16Array;
function toInt32Array(input: ArrayBufferViewInput): Int32Array;
function toBigInt64Array(input: ArrayBufferViewInput): BigInt64Array;
function toFloat32Array(input: ArrayBufferViewInput): Float32Array;
function toFloat64Array(input: ArrayBufferViewInput): Float64Array;
/**
* Buffer manipulation
*/
function rebaseValueOffsets(
offset: number,
length: number,
valueOffsets: Int32Array | BigInt64Array
): Int32Array | BigInt64Array;
function compareArrayLike<T extends ArrayLike<any>>(
a: T,
b: T
): number;
/**
* Memory allocation utilities
*/
function memcpy<T extends ArrayBufferView>(
target: T,
source: ArrayBufferView,
targetOffset?: number,
sourceOffset?: number,
length?: number
): T;
// Input type definitions
type ArrayBufferViewInput =
| ArrayBuffer
| ArrayBufferView
| Iterable<number>
| string;
type ArrayBufferViewConstructor =
| Int8ArrayConstructor
| Uint8ArrayConstructor
| Int16ArrayConstructor
| Uint16ArrayConstructor
| Int32ArrayConstructor
| Uint32ArrayConstructor
| Float32ArrayConstructor
| Float64ArrayConstructor
| BigInt64ArrayConstructor
| BigUint64ArrayConstructor;Usage Examples:
import { util } from "apache-arrow";
// Convert various inputs to typed arrays
const buffer = new ArrayBuffer(16);
const uint8View = util.toUint8Array(buffer);
const int32View = util.toInt32Array(buffer);
const float64View = util.toFloat64Array(buffer);
// Convert from different sources
const fromArray = util.toInt32Array([1, 2, 3, 4]);
const fromString = util.toUint8Array("hello"); // UTF-8 encoded
const fromIterable = util.toFloat32Array([1.1, 2.2, 3.3]);
// Buffer operations
const sourceBuffer = new Int32Array([10, 20, 30, 40, 50]);
const targetBuffer = new Int32Array(10);
// Copy data between buffers
util.memcpy(targetBuffer, sourceBuffer, 2, 1, 3);
// Copies sourceBuffer[1:4] to targetBuffer[2:5]
// Offset operations for variable-length data
const offsets = new Int32Array([0, 5, 12, 18]);
const rebasedOffsets = util.rebaseValueOffsets(10, 3, offsets);
// Adjusts offsets for slicing operations
// Compare arrays
const array1 = new Int32Array([1, 2, 3]);
const array2 = new Int32Array([1, 2, 4]);
const comparison = util.compareArrayLike(array1, array2); // -1 (array1 < array2)Functions for efficient bit-level operations, particularly for boolean vectors.
/**
* Bit manipulation functions
*/
function getBit(bitmap: Uint8Array, index: number): 0 | 1;
function setBit(bitmap: Uint8Array, index: number): Uint8Array;
function clearBit(bitmap: Uint8Array, index: number): Uint8Array;
/**
* Bitmap operations
*/
function truncateBitmap(
bitmap: Uint8Array,
offset: number,
length: number
): Uint8Array;
function packBooleans(booleans: boolean[]): Uint8Array;
function unpackBooleans(
bitmap: Uint8Array,
length: number,
offset?: number
): boolean[];
/**
* Bit counting operations
*/
function popcount32(value: number): number;
function popcount64(high: number, low: number): number;
/**
* Bitmap validation
*/
function validateBitmap(
bitmap: Uint8Array,
offset: number,
length: number
): boolean;
/**
* Null bitmap utilities
*/
function createNullBitmap(length: number, fillValue?: boolean): Uint8Array;
function invertBitmap(bitmap: Uint8Array, length: number): Uint8Array;Usage Examples:
import { util } from "apache-arrow";
// Create and manipulate bitmaps
const bitmap = new Uint8Array(4); // 32 bits
// Set individual bits
util.setBit(bitmap, 0); // Set bit 0
util.setBit(bitmap, 7); // Set bit 7
util.setBit(bitmap, 15); // Set bit 15
// Check bits
console.log(util.getBit(bitmap, 0)); // 1
console.log(util.getBit(bitmap, 1)); // 0
// Clear bits
util.clearBit(bitmap, 7);
console.log(util.getBit(bitmap, 7)); // 0
// Pack boolean array into bitmap
const booleans = [true, false, true, true, false, true];
const packed = util.packBooleans(booleans);
// Unpack bitmap to boolean array
const unpacked = util.unpackBooleans(packed, booleans.length);
console.log(unpacked); // [true, false, true, true, false, true]
// Count set bits
const byte = 0b11010100; // 5 bits set
console.log(util.popcount32(byte)); // 5
// Null bitmap operations
const nullBitmap = util.createNullBitmap(100, true); // All valid
const invertedBitmap = util.invertBitmap(nullBitmap, 100); // All null
// Truncate bitmap for slicing
const originalBitmap = util.packBooleans([true, true, false, true, false, false, true, true]);
const truncated = util.truncateBitmap(originalBitmap, 2, 4); // Extract bits 2-5Functions for working with 64-bit integers and large number operations.
/**
* Base integer class for 64-bit operations
*/
class BaseInt64 {
constructor(buffer: ArrayBufferLike, byteOffset?: number, signed?: boolean);
/** High 32 bits */
readonly high: number;
/** Low 32 bits */
readonly low: number;
/** Convert to BigInt */
toBigInt(): bigint;
/** Convert to Number (may lose precision) */
toNumber(): number;
/** Convert to string */
toString(): string;
/** Arithmetic operations */
plus(other: BaseInt64): BaseInt64;
minus(other: BaseInt64): BaseInt64;
times(other: BaseInt64): BaseInt64;
div(other: BaseInt64): BaseInt64;
}
/**
* 64-bit unsigned integer
*/
class Uint64 extends BaseInt64 {
static readonly MAX_VALUE: Uint64;
static readonly MIN_VALUE: Uint64;
/** Create from number */
static from(value: number | bigint | string): Uint64;
}
/**
* 64-bit signed integer
*/
class Int64 extends BaseInt64 {
static readonly MAX_VALUE: Int64;
static readonly MIN_VALUE: Int64;
/** Create from number */
static from(value: number | bigint | string): Int64;
}
/**
* Utility functions for integer operations
*/
function int64ToNumber(int64: BaseInt64): number;
function numberToInt64(value: number): Int64;
function bigintToInt64(value: bigint): Int64;
function stringToInt64(value: string): Int64;
/**
* Integer arithmetic helpers
*/
function addInt64(a: BaseInt64, b: BaseInt64): BaseInt64;
function subtractInt64(a: BaseInt64, b: BaseInt64): BaseInt64;
function multiplyInt64(a: BaseInt64, b: BaseInt64): BaseInt64;
function divideInt64(a: BaseInt64, b: BaseInt64): BaseInt64;Usage Examples:
import { util } from "apache-arrow";
// Create 64-bit integers
const bigNumber1 = util.Uint64.from(9223372036854775807n); // Max safe integer
const bigNumber2 = util.Int64.from("-9223372036854775808"); // From string
// Arithmetic operations
const sum = bigNumber1.plus(util.Int64.from(100));
const difference = bigNumber1.minus(util.Int64.from(50));
// Conversions
console.log(bigNumber1.toBigInt()); // 9223372036854775807n
console.log(bigNumber1.toString()); // "9223372036854775807"
console.log(bigNumber1.toNumber()); // May lose precision for large values
// Utility functions
const converted = util.numberToInt64(123456789);
const backToNumber = util.int64ToNumber(converted);
// Working with high/low components
console.log(bigNumber1.high); // High 32 bits
console.log(bigNumber1.low); // Low 32 bitsFunctions for arbitrary precision arithmetic when BigInt is not available.
/**
* BigNumber implementation for environments without BigInt
*/
class BN {
constructor(value?: number | string | BN);
/** Arithmetic operations */
add(other: BN): BN;
sub(other: BN): BN;
mul(other: BN): BN;
div(other: BN): BN;
mod(other: BN): BN;
/** Comparison operations */
eq(other: BN): boolean;
lt(other: BN): boolean;
gt(other: BN): boolean;
lte(other: BN): boolean;
gte(other: BN): boolean;
/** Bitwise operations */
and(other: BN): BN;
or(other: BN): BN;
xor(other: BN): BN;
not(): BN;
/** Shift operations */
shiftLeft(bits: number): BN;
shiftRight(bits: number): BN;
/** Conversion */
toString(base?: number): string;
toNumber(): number;
toArray(): number[];
}
/**
* Utility functions for BigNumber operations
*/
function createBN(value: number | string): BN;
function isBN(value: any): value is BN;
function bnToNumber(bn: BN): number;
function numberToBN(value: number): BN;Functions for UTF-8 encoding and decoding operations.
/**
* UTF-8 encoding/decoding functions
*/
function encodeUtf8(input: string): Uint8Array;
function decodeUtf8(input: Uint8Array): string;
/**
* UTF-8 length calculations
*/
function utf8ByteLength(input: string): number;
function utf8CharLength(input: Uint8Array): number;
/**
* UTF-8 validation
*/
function validateUtf8(input: Uint8Array): boolean;
function isValidUtf8(input: Uint8Array, offset?: number, length?: number): boolean;
/**
* UTF-8 string utilities
*/
function truncateUtf8(input: string, maxBytes: number): string;
function splitUtf8(input: string, chunkSize: number): string[];Usage Examples:
import { util } from "apache-arrow";
// Encode/decode UTF-8
const text = "Hello, δΈη! π";
const encoded = util.encodeUtf8(text);
const decoded = util.decodeUtf8(encoded);
console.log(text === decoded); // true
// Length calculations
const byteLength = util.utf8ByteLength(text); // Byte length in UTF-8
const charLength = util.utf8CharLength(encoded); // Character count
console.log(`"${text}" has ${charLength} characters and ${byteLength} bytes`);
// Validation
const validUtf8 = util.validateUtf8(encoded);
console.log(`Is valid UTF-8: ${validUtf8}`); // true
// Truncate to fit byte limit
const truncated = util.truncateUtf8("Very long string...", 10);
console.log(`Truncated: "${truncated}"`);
// Split into chunks
const chunks = util.splitUtf8("abcdefghijk", 3);
console.log(chunks); // ["abc", "def", "ghi", "jk"]Functions for working with Arrow vectors and chunked data.
/**
* Range and index utilities
*/
function clampRange(
source: Vector | RecordBatch | Table,
begin?: number,
end?: number
): [number, number];
function wrapIndex(length: number, index: number): number;
/**
* Chunking utilities
*/
function computeChunkOffsets(chunks: Vector[]): Uint32Array;
function sliceChunks<T extends Vector>(
chunks: T[],
begin: number,
end: number
): T[];
function wrapChunkedCall<T extends Vector, R>(
chunks: T[],
begin: number,
end: number,
fn: (chunk: T, offset: number, length: number) => R
): R[];
/**
* Vector comparison and search
*/
function vectorIndexOf<T>(
vector: Vector<T>,
searchElement: T,
fromIndex?: number
): number;
function vectorIncludes<T>(
vector: Vector<T>,
searchElement: T,
fromIndex?: number
): boolean;
/**
* Vector statistics
*/
function vectorMin<T extends DataType>(vector: Vector<T>): T['TValue'] | null;
function vectorMax<T extends DataType>(vector: Vector<T>): T['TValue'] | null;
function vectorSum<T extends DataType>(vector: Vector<T>): number | bigint | null;Usage Examples:
import { util, vectorFromArray } from "apache-arrow";
// Create sample vectors
const numbers = vectorFromArray([10, 20, 30, 40, 50]);
const strings = vectorFromArray(['apple', 'banana', 'cherry', 'date']);
// Range clamping for safe slicing
const [begin, end] = util.clampRange(numbers, -2, 100);
console.log(`Safe range: ${begin} to ${end}`); // Clamped to valid indices
// Index wrapping (negative indices)
const wrappedIndex = util.wrapIndex(numbers.length, -1);
console.log(`Last element index: ${wrappedIndex}`); // 4
// Search operations
const foundIndex = util.vectorIndexOf(strings, 'cherry');
console.log(`'cherry' found at index: ${foundIndex}`); // 2
const hasElement = util.vectorIncludes(numbers, 30);
console.log(`Contains 30: ${hasElement}`); // true
// Statistics
const minValue = util.vectorMin(numbers);
const maxValue = util.vectorMax(numbers);
const sum = util.vectorSum(numbers);
console.log(`Min: ${minValue}, Max: ${maxValue}, Sum: ${sum}`);
// Min: 10, Max: 50, Sum: 150
// Chunked operations
const chunk1 = vectorFromArray([1, 2, 3]);
const chunk2 = vectorFromArray([4, 5, 6]);
const chunk3 = vectorFromArray([7, 8, 9]);
const chunks = [chunk1, chunk2, chunk3];
const offsets = util.computeChunkOffsets(chunks);
console.log(offsets); // Uint32Array [0, 3, 6, 9]
// Slice across chunks
const slicedChunks = util.sliceChunks(chunks, 2, 7); // Elements 2-6Functions for formatting Arrow data for human-readable output.
/**
* Pretty printing configuration
*/
interface PrettyOptions {
maxWidth?: number;
indent?: string;
showTypes?: boolean;
showMetadata?: boolean;
maxRows?: number;
}
/**
* Pretty printing functions
*/
function formatVector<T extends DataType>(
vector: Vector<T>,
options?: PrettyOptions
): string;
function formatTable<T extends TypeMap>(
table: Table<T>,
options?: PrettyOptions
): string;
function formatSchema<T extends TypeMap>(
schema: Schema<T>,
options?: PrettyOptions
): string;
function formatRecordBatch<T extends TypeMap>(
batch: RecordBatch<T>,
options?: PrettyOptions
): string;
/**
* Value formatting
*/
function formatValue(value: any, type: DataType): string;
function formatNull(): string;
function formatNumber(value: number, precision?: number): string;
function formatString(value: string, maxLength?: number): string;Usage Examples:
import { util, tableFromArrays } from "apache-arrow";
// Create sample data
const table = tableFromArrays({
id: [1, 2, 3, 4, 5],
name: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
score: [95.5, 87.2, 92.1, 88.8, 91.3],
active: [true, false, true, true, false]
});
// Format table for display
const formatted = util.formatTable(table, {
maxRows: 3,
showTypes: true,
maxWidth: 80
});
console.log(formatted);
/*
βββββββ¬ββββββββββ¬ββββββββ¬βββββββββ
β id β name β score β active β
β i32 β utf8 β f64 β bool β
βββββββΌββββββββββΌββββββββΌβββββββββ€
β 1 β Alice β 95.5 β true β
β 2 β Bob β 87.2 β false β
β 3 β Charlie β 92.1 β true β
β ... β ... β ... β ... β
βββββββ΄ββββββββββ΄ββββββββ΄βββββββββ
*/
// Format individual vectors
const nameVector = table.getColumn('name');
const vectorOutput = util.formatVector(nameVector, {
maxWidth: 50,
showTypes: true
});
// Format schema
const schemaOutput = util.formatSchema(table.schema, {
showMetadata: true,
indent: ' '
});
console.log(schemaOutput);
/*
Schema {
id: Int32 (nullable)
name: Utf8 (nullable)
score: Float64 (nullable)
active: Bool (nullable)
}
*/
// Custom value formatting
console.log(util.formatValue(null, new Int32())); // "null"
console.log(util.formatValue(123.456789, new Float64())); // "123.457"
console.log(util.formatValue("very long string...", new Utf8())); // "very long st..."Functions for comparing Arrow schemas, fields, and types.
/**
* Schema comparison
*/
function compareSchemas(
left: Schema,
right: Schema,
checkMetadata?: boolean
): boolean;
/**
* Field comparison
*/
function compareFields(
left: Field,
right: Field,
checkMetadata?: boolean
): boolean;
/**
* DataType comparison
*/
function compareTypes(left: DataType, right: DataType): boolean;
/**
* Detailed comparison with differences
*/
interface ComparisonResult {
equal: boolean;
differences: string[];
}
function compareWithDetails(
left: Schema | Field | DataType,
right: Schema | Field | DataType
): ComparisonResult;
/**
* Type compatibility checking
*/
function isTypeCompatible(from: DataType, to: DataType): boolean;
function isSchemaCompatible(from: Schema, to: Schema): boolean;Usage Examples:
import { util, Schema, Field, Int32, Utf8, Float64 } from "apache-arrow";
// Create schemas
const schema1 = new Schema([
new Field('id', new Int32()),
new Field('name', new Utf8()),
new Field('score', new Float64())
]);
const schema2 = new Schema([
new Field('id', new Int32()),
new Field('name', new Utf8()),
new Field('score', new Float64())
]);
const schema3 = new Schema([
new Field('id', new Int32()),
new Field('name', new Utf8()),
new Field('value', new Float64()) // Different field name
]);
// Compare schemas
console.log(util.compareSchemas(schema1, schema2)); // true
console.log(util.compareSchemas(schema1, schema3)); // false
// Compare individual fields
const field1 = new Field('age', new Int32());
const field2 = new Field('age', new Int32());
const field3 = new Field('age', new Float64()); // Different type
console.log(util.compareFields(field1, field2)); // true
console.log(util.compareFields(field1, field3)); // false
// Compare data types
console.log(util.compareTypes(new Int32(), new Int32())); // true
console.log(util.compareTypes(new Int32(), new Int64())); // false
// Detailed comparison
const detailed = util.compareWithDetails(schema1, schema3);
console.log(detailed.equal); // false
console.log(detailed.differences); // ["Field 2: name mismatch: 'score' vs 'value'"]
// Type compatibility
console.log(util.isTypeCompatible(new Int32(), new Int64())); // true (can upcast)
console.log(util.isTypeCompatible(new Int64(), new Int32())); // false (would lose precision)Functions for detecting the JavaScript environment and capabilities.
/**
* Environment detection
*/
function isNode(): boolean;
function isBrowser(): boolean;
function isWebWorker(): boolean;
function isElectron(): boolean;
/**
* Feature detection
*/
function hasBigInt(): boolean;
function hasArrayBuffer(): boolean;
function hasTypedArrays(): boolean;
function hasSharedArrayBuffer(): boolean;
/**
* Value type detection
*/
function isPromise(value: any): value is Promise<any>;
function isIterable<T>(value: any): value is Iterable<T>;
function isAsyncIterable<T>(value: any): value is AsyncIterable<T>;
function isArrayLike<T>(value: any): value is ArrayLike<T>;
/**
* Platform-specific optimizations
*/
function getOptimalChunkSize(): number;
function getMaxSafeMemorySize(): number;
function supportsWorkers(): boolean;Usage Examples:
import { util } from "apache-arrow";
// Environment detection
if (util.isNode()) {
console.log('Running in Node.js');
// Use Node.js specific features
const fs = require('fs');
} else if (util.isBrowser()) {
console.log('Running in browser');
// Use browser specific features
const url = window.location.href;
}
// Feature detection
if (util.hasBigInt()) {
// Use BigInt for 64-bit integers
const bigNumber = 9223372036854775807n;
} else {
// Fallback to BN.js or similar
console.log('BigInt not supported, using fallback');
}
// Value type checking
async function processValue(value: unknown) {
if (util.isPromise(value)) {
const resolved = await value;
return processValue(resolved);
}
if (util.isIterable(value)) {
for (const item of value) {
console.log('Processing item:', item);
}
}
if (util.isAsyncIterable(value)) {
for await (const item of value) {
console.log('Processing async item:', item);
}
}
}
// Platform optimizations
const chunkSize = util.getOptimalChunkSize();
const maxMemory = util.getMaxSafeMemorySize();
if (util.supportsWorkers()) {
// Use web workers for parallel processing
const worker = new Worker('worker.js');
}
console.log(`Optimal chunk size: ${chunkSize}`);
console.log(`Max safe memory: ${(maxMemory / 1024 / 1024).toFixed(1)}MB`);Functions for monitoring and optimizing memory usage.
/**
* Memory profiling utilities
*/
function getMemoryUsage(): MemoryInfo;
function trackMemoryUsage(fn: () => void): MemoryDelta;
function suggestGarbageCollection(): void;
interface MemoryInfo {
used: number;
total: number;
available: number;
}
interface MemoryDelta {
before: MemoryInfo;
after: MemoryInfo;
delta: number;
}
/**
* Performance measurement
*/
function measureTime<T>(fn: () => T): [T, number];
function measureTimeAsync<T>(fn: () => Promise<T>): Promise<[T, number]>;Usage Examples:
// Memory monitoring
const memBefore = util.getMemoryUsage();
console.log(`Memory before: ${memBefore.used / 1024 / 1024}MB`);
// Track memory usage of operation
const memoryDelta = util.trackMemoryUsage(() => {
const largeArray = new Array(1000000).fill(0);
// Process large array
});
console.log(`Memory delta: ${memoryDelta.delta / 1024 / 1024}MB`);
// Performance measurement
const [result, duration] = util.measureTime(() => {
return expensiveOperation();
});
console.log(`Operation took ${duration}ms`);
// Async performance measurement
const [asyncResult, asyncDuration] = await util.measureTimeAsync(async () => {
return await expensiveAsyncOperation();
});