Builders are type-specific classes for efficiently constructing Arrow vectors from JavaScript data. They handle memory allocation, type-specific encoding, and null value tracking automatically while providing a fluent API for data construction.
All builder classes inherit from the abstract Builder class providing common functionality.
/**
* Abstract base class for all Arrow builders
*/
abstract class Builder<T extends DataType = any, TNull = any> {
/** Target data type for this builder */
readonly type: T;
/** Current number of elements */
readonly length: number;
/** Number of null values written */
readonly nullCount: number;
/** Whether builder has been finished */
readonly finished: boolean;
/** Reserve capacity for specified number of elements */
reserve(length: number): this;
/** Append value to end of builder */
append(value: T['TValue'] | TNull): this;
/** Set value at specific index */
set(index: number, value: T['TValue'] | TNull): this;
/** Set validity bit at index */
setValid(index: number, valid: boolean): this;
/** Append null value */
appendNull(): this;
/** Flush current data to Data object */
flush(): Data<T>;
/** Finish builder and create Vector */
finish(): this;
/** Convert to Vector (calls finish if needed) */
toVector(): Vector<T>;
/** Clear all data and reset builder */
clear(): this;
}Factory function to create appropriate builders for data types.
/**
* Create builder for specified data type
*/
function makeBuilder<T extends DataType>(
options: BuilderOptions<T>
): Builder<T>;
interface BuilderOptions<T extends DataType> {
/** Target data type */
type: T;
/** Values to treat as null */
nullValues?: any[];
/** Child builder options for nested types */
children?: BuilderOptions<any>[];
}Usage Examples:
import { makeBuilder, Int32, Utf8, Bool, List, Field } from "apache-arrow";
// Create builders for different types
const intBuilder = makeBuilder({ type: new Int32() });
const stringBuilder = makeBuilder({ type: new Utf8() });
const boolBuilder = makeBuilder({ type: new Bool() });
// Build data
intBuilder.append(1).append(2).append(null).append(4);
stringBuilder.append('hello').append('world').appendNull();
boolBuilder.append(true).append(false).append(true);
// Create vectors
const intVector = intBuilder.finish().toVector();
const stringVector = stringBuilder.finish().toVector();
const boolVector = boolBuilder.finish().toVector();Builder for Null type vectors (tracks only length, no data storage).
/**
* Builder for Null vectors
*/
class NullBuilder extends Builder<Null> {
readonly type: Null;
/** Append null (only valid operation) */
append(value: null): this;
appendNull(): this;
}Builder for Boolean vectors with bit-packed storage.
/**
* Builder for Boolean vectors with efficient bit packing
*/
class BoolBuilder extends Builder<Bool> {
readonly type: Bool;
/** Append boolean value */
append(value: boolean | null): this;
/** Set boolean value at index */
set(index: number, value: boolean | null): this;
}Builders for various integer types with corresponding bit widths.
/**
* Base integer builder
*/
abstract class IntBuilder<T extends Int = Int> extends Builder<T> {
readonly type: T;
/** Append integer value */
append(value: T['TValue'] | null): this;
/** Set integer value at index */
set(index: number, value: T['TValue'] | null): this;
}
// Specific integer builders
class Int8Builder extends IntBuilder<Int8> {
append(value: number | null): this;
set(index: number, value: number | null): this;
}
class Int16Builder extends IntBuilder<Int16> {
append(value: number | null): this;
set(index: number, value: number | null): this;
}
class Int32Builder extends IntBuilder<Int32> {
append(value: number | null): this;
set(index: number, value: number | null): this;
}
class Int64Builder extends IntBuilder<Int64> {
append(value: bigint | null): this;
set(index: number, value: bigint | null): this;
}
class Uint8Builder extends IntBuilder<Uint8> {
append(value: number | null): this;
set(index: number, value: number | null): this;
}
class Uint16Builder extends IntBuilder<Uint16> {
append(value: number | null): this;
set(index: number, value: number | null): this;
}
class Uint32Builder extends IntBuilder<Uint32> {
append(value: number | null): this;
set(index: number, value: number | null): this;
}
class Uint64Builder extends IntBuilder<Uint64> {
append(value: bigint | null): this;
set(index: number, value: bigint | null): this;
}Usage Examples:
import {
Int8Builder, Int32Builder, Int64Builder,
Uint8Builder, Uint32Builder, Uint64Builder
} from "apache-arrow";
// 32-bit signed integers
const int32Builder = new Int32Builder();
int32Builder
.append(42)
.append(-17)
.append(null)
.append(2147483647);
const int32Vector = int32Builder.finish().toVector();
// 64-bit integers (use BigInt)
const int64Builder = new Int64Builder();
int64Builder
.append(9223372036854775807n) // Max int64
.append(-1n)
.append(null)
.append(0n);
const int64Vector = int64Builder.finish().toVector();
// Unsigned 8-bit integers
const uint8Builder = new Uint8Builder();
uint8Builder.append(255).append(0).append(128);
const uint8Vector = uint8Builder.finish().toVector();Builders for floating-point numbers with different precision levels.
/**
* Base float builder
*/
abstract class FloatBuilder<T extends Float = Float> extends Builder<T> {
readonly type: T;
/** Append float value */
append(value: number | null): this;
/** Set float value at index */
set(index: number, value: number | null): this;
}
// Specific float builders
class Float16Builder extends FloatBuilder<Float16> {
readonly type: Float16;
}
class Float32Builder extends FloatBuilder<Float32> {
readonly type: Float32;
}
class Float64Builder extends FloatBuilder<Float64> {
readonly type: Float64;
}Usage Examples:
import { Float32Builder, Float64Builder } from "apache-arrow";
// 32-bit floats
const float32Builder = new Float32Builder();
float32Builder
.append(3.14159)
.append(-2.718)
.append(null)
.append(Infinity);
// 64-bit floats (standard JavaScript numbers)
const float64Builder = new Float64Builder();
float64Builder
.append(Math.PI)
.append(Math.E)
.append(Number.MAX_SAFE_INTEGER)
.append(null);Builders for UTF-8 encoded strings with variable-length storage.
/**
* UTF-8 string builder
*/
class Utf8Builder extends Builder<Utf8> {
readonly type: Utf8;
/** Append string value */
append(value: string | null): this;
/** Set string value at index */
set(index: number, value: string | null): this;
}
/**
* Large UTF-8 string builder (for very long strings)
*/
class LargeUtf8Builder extends Builder<LargeUtf8> {
readonly type: LargeUtf8;
/** Append string value */
append(value: string | null): this;
/** Set string value at index */
set(index: number, value: string | null): this;
}Usage Examples:
import { Utf8Builder, LargeUtf8Builder } from "apache-arrow";
// Standard UTF-8 strings
const utf8Builder = new Utf8Builder();
utf8Builder
.append('Hello')
.append('世界') // Unicode support
.append(null)
.append('Arrow! 🏹')
.append(''); // Empty string
const stringVector = utf8Builder.finish().toVector();
// Large strings (for very long text)
const largeUtf8Builder = new LargeUtf8Builder();
const longText = 'A'.repeat(100000); // 100K character string
largeUtf8Builder.append(longText).append(null);Builders for raw binary data with variable-length storage.
/**
* Binary data builder
*/
class BinaryBuilder extends Builder<Binary> {
readonly type: Binary;
/** Append binary data */
append(value: Uint8Array | ArrayBufferLike | null): this;
/** Set binary data at index */
set(index: number, value: Uint8Array | ArrayBufferLike | null): this;
}
/**
* Large binary data builder
*/
class LargeBinaryBuilder extends Builder<LargeBinary> {
readonly type: LargeBinary;
/** Append binary data */
append(value: Uint8Array | ArrayBufferLike | null): this;
/** Set binary data at index */
set(index: number, value: Uint8Array | ArrayBufferLike | null): this;
}
/**
* Fixed-size binary data builder
*/
class FixedSizeBinaryBuilder extends Builder<FixedSizeBinary> {
readonly type: FixedSizeBinary;
readonly byteWidth: number;
/** Append fixed-size binary data */
append(value: Uint8Array | ArrayBufferLike | null): this;
/** Set fixed-size binary data at index */
set(index: number, value: Uint8Array | ArrayBufferLike | null): this;
}Usage Examples:
import { BinaryBuilder, FixedSizeBinaryBuilder } from "apache-arrow";
// Variable-length binary data
const binaryBuilder = new BinaryBuilder();
const data1 = new Uint8Array([1, 2, 3, 4]);
const data2 = new Uint8Array([255, 254, 253]);
binaryBuilder
.append(data1)
.append(data2)
.append(null)
.append(new Uint8Array([])); // Empty binary
// Fixed-size binary (e.g., for UUIDs, checksums)
const fixedBinaryBuilder = new FixedSizeBinaryBuilder({
type: new FixedSizeBinary(16) // 16 bytes for UUID
});
const uuid1 = new Uint8Array(16).fill(1); // Mock UUID
const uuid2 = new Uint8Array(16).fill(2);
fixedBinaryBuilder.append(uuid1).append(uuid2).append(null);Builder for high-precision decimal numbers.
/**
* Decimal number builder with configurable precision and scale
*/
class DecimalBuilder extends Builder<Decimal> {
readonly type: Decimal;
readonly scale: number;
readonly precision: number;
/** Append decimal value */
append(value: Uint32Array | string | number | null): this;
/** Set decimal value at index */
set(index: number, value: Uint32Array | string | number | null): this;
}Usage Examples:
import { DecimalBuilder, Decimal } from "apache-arrow";
// Decimal with 2 decimal places, 10 total digits
const decimalType = new Decimal(2, 10); // scale=2, precision=10
const decimalBuilder = new DecimalBuilder({ type: decimalType });
decimalBuilder
.append("12345678.90") // String input
.append(1234.56) // Number input
.append(null);
const decimalVector = decimalBuilder.finish().toVector();Builders for date values stored as days or milliseconds since Unix epoch.
/**
* Base date builder
*/
abstract class DateBuilder<T extends Date_ = Date_> extends Builder<T> {
readonly type: T;
/** Append date value */
append(value: Date | number | null): this;
/** Set date value at index */
set(index: number, value: Date | number | null): this;
}
// Specific date builders
class DateDayBuilder extends DateBuilder<DateDay> {
readonly type: DateDay;
/** Append date (converted to days since epoch) */
append(value: Date | number | null): this;
}
class DateMillisecondBuilder extends DateBuilder<DateMillisecond> {
readonly type: DateMillisecond;
/** Append date (converted to milliseconds since epoch) */
append(value: Date | number | null): this;
}Builders for time-of-day values with various precision levels.
/**
* Base time builder
*/
abstract class TimeBuilder<T extends Time_ = Time_> extends Builder<T> {
readonly type: T;
/** Append time value */
append(value: Date | number | bigint | null): this;
/** Set time value at index */
set(index: number, value: Date | number | bigint | null): this;
}
// Specific time builders
class TimeSecondBuilder extends TimeBuilder<TimeSecond> {
append(value: Date | number | null): this;
}
class TimeMillisecondBuilder extends TimeBuilder<TimeMillisecond> {
append(value: Date | number | null): this;
}
class TimeMicrosecondBuilder extends TimeBuilder<TimeMicrosecond> {
append(value: Date | number | bigint | null): this;
}
class TimeNanosecondBuilder extends TimeBuilder<TimeNanosecond> {
append(value: Date | number | bigint | null): this;
}Builders for absolute timestamps with timezone support.
/**
* Base timestamp builder
*/
abstract class TimestampBuilder<T extends Timestamp_ = Timestamp_> extends Builder<T> {
readonly type: T;
/** Append timestamp value */
append(value: Date | string | number | bigint | null): this;
/** Set timestamp value at index */
set(index: number, value: Date | string | number | bigint | null): this;
}
// Specific timestamp builders
class TimestampSecondBuilder extends TimestampBuilder<TimestampSecond> {
append(value: Date | string | number | null): this;
}
class TimestampMillisecondBuilder extends TimestampBuilder<TimestampMillisecond> {
append(value: Date | string | number | null): this;
}
class TimestampMicrosecondBuilder extends TimestampBuilder<TimestampMicrosecond> {
append(value: Date | string | number | bigint | null): this;
}
class TimestampNanosecondBuilder extends TimestampBuilder<TimestampNanosecond> {
append(value: Date | string | number | bigint | null): this;
}Usage Examples:
import {
DateDayBuilder, DateMillisecondBuilder,
TimestampMillisecondBuilder, TimestampSecondBuilder
} from "apache-arrow";
// Date builders
const dayBuilder = new DateDayBuilder();
const now = new Date();
const yesterday = new Date(Date.now() - 24 * 60 * 60 * 1000);
dayBuilder
.append(now) // Date object
.append(yesterday)
.append(null);
// Timestamp builder with timezone
const timestampBuilder = new TimestampMillisecondBuilder({
type: new TimestampMillisecond('UTC')
});
timestampBuilder
.append(new Date()) // Date object
.append(Date.now()) // Milliseconds since epoch
.append('2023-01-01T00:00:00.000Z') // ISO string
.append(null);Builders for time duration and calendar interval values.
/**
* Duration builder for elapsed time
*/
abstract class DurationBuilder<T extends Duration = Duration> extends Builder<T> {
readonly type: T;
/** Append duration value */
append(value: number | bigint | null): this;
/** Set duration value at index */
set(index: number, value: number | bigint | null): this;
}
// Specific duration builders
class DurationSecondBuilder extends DurationBuilder<DurationSecond> {}
class DurationMillisecondBuilder extends DurationBuilder<DurationMillisecond> {}
class DurationMicrosecondBuilder extends DurationBuilder<DurationMicrosecond> {}
class DurationNanosecondBuilder extends DurationBuilder<DurationNanosecond> {}
/**
* Interval builders for calendar intervals
*/
abstract class IntervalBuilder<T extends Interval_ = Interval_> extends Builder<T> {
readonly type: T;
/** Append interval value */
append(value: T['TValue'] | null): this;
/** Set interval value at index */
set(index: number, value: T['TValue'] | null): this;
}
class IntervalDayTimeBuilder extends IntervalBuilder<IntervalDayTime> {
append(value: { days: number; milliseconds: number } | null): this;
}
class IntervalYearMonthBuilder extends IntervalBuilder<IntervalYearMonth> {
append(value: { years: number; months: number } | null): this;
}
class IntervalMonthDayNanoBuilder extends IntervalBuilder<IntervalMonthDayNano> {
append(value: { months: number; days: number; nanoseconds: bigint } | null): this;
}Builder for variable-length lists of homogeneous elements.
/**
* List builder for arrays of homogeneous elements
*/
class ListBuilder<T extends DataType = any> extends Builder<List<T>> {
readonly type: List<T>;
readonly valueBuilder: Builder<T>;
/** Append list of values */
append(value: (T['TValue'] | null)[] | null): this;
/** Set list at index */
set(index: number, value: (T['TValue'] | null)[] | null): this;
/** Start building new list item */
appendValue(): Builder<T>;
/** Finish current list item */
finishValue(): this;
}
/**
* Fixed-size list builder
*/
class FixedSizeListBuilder<T extends DataType = any> extends Builder<FixedSizeList<T>> {
readonly type: FixedSizeList<T>;
readonly listSize: number;
readonly valueBuilder: Builder<T>;
/** Append fixed-size list */
append(value: (T['TValue'] | null)[] | null): this;
/** Set fixed-size list at index */
set(index: number, value: (T['TValue'] | null)[] | null): this;
}Usage Examples:
import { ListBuilder, FixedSizeListBuilder, Field, Int32, Utf8 } from "apache-arrow";
// Variable-length list of integers
const listBuilder = new ListBuilder({
type: new List(new Field('item', new Int32()))
});
listBuilder
.append([1, 2, 3]) // List with 3 elements
.append([]) // Empty list
.append(null) // Null list
.append([42, -17, 0]); // Another list
const listVector = listBuilder.finish().toVector();
// Fixed-size list of strings (always 3 elements)
const fixedListBuilder = new FixedSizeListBuilder({
type: new FixedSizeList(3, new Field('item', new Utf8()))
});
fixedListBuilder
.append(['a', 'b', 'c']) // Exactly 3 strings
.append([null, 'x', 'y']) // With null element
.append(null); // Null list
// Using fluent API for complex construction
const complexListBuilder = new ListBuilder({
type: new List(new Field('item', new Int32()))
});
// Build lists dynamically
const lists = [[1, 2], [3, 4, 5], [], [6]];
lists.forEach(list => complexListBuilder.append(list));Builder for record-like structures with named fields.
/**
* Struct builder for records with named fields
*/
class StructBuilder<T extends StructRow = any> extends Builder<Struct<T>> {
readonly type: Struct<T>;
readonly children: Builder[];
/** Append struct value */
append(value: { [K in keyof T]: T[K] } | null): this;
/** Set struct value at index */
set(index: number, value: { [K in keyof T]: T[K] } | null): this;
/** Get child builder by field name */
getChildBuilder<K extends keyof T>(name: K): Builder<T[K]>;
/** Get child builder by field index */
getChildBuilderAt(index: number): Builder;
}Usage Examples:
import { StructBuilder, Field, Int32, Utf8, Bool } from "apache-arrow";
// Create struct type (person record)
const personType = new Struct([
new Field('name', new Utf8()),
new Field('age', new Int32()),
new Field('active', new Bool())
]);
const structBuilder = new StructBuilder({ type: personType });
// Append complete structs
structBuilder
.append({ name: 'Alice', age: 25, active: true })
.append({ name: 'Bob', age: 30, active: false })
.append(null)
.append({ name: 'Charlie', age: 35, active: true });
// Alternative: Build fields individually
const nameBuilder = structBuilder.getChildBuilder('name');
const ageBuilder = structBuilder.getChildBuilder('age');
const activeBuilder = structBuilder.getChildBuilder('active');
nameBuilder.append('Diana');
ageBuilder.append(28);
activeBuilder.append(true);
const structVector = structBuilder.finish().toVector();Builder for key-value pair collections.
/**
* Map builder for key-value pairs
*/
class MapBuilder<TKey extends DataType = any, TValue extends DataType = any>
extends Builder<Map_<TKey, TValue>> {
readonly type: Map_<TKey, TValue>;
readonly entryBuilder: StructBuilder<{ key: TKey; value: TValue }>;
/** Append map entries */
append(value: Map<TKey['TValue'], TValue['TValue']> |
{ [key: string]: TValue['TValue'] } |
[TKey['TValue'], TValue['TValue']][] |
null): this;
/** Set map entries at index */
set(index: number, value: Map<TKey['TValue'], TValue['TValue']> |
{ [key: string]: TValue['TValue'] } |
[TKey['TValue'], TValue['TValue']][] |
null): this;
}Usage Examples:
import { MapBuilder, Field, Struct, Utf8, Int32 } from "apache-arrow";
// Create map type (string -> integer)
const mapType = new Map_(
new Field('entries', new Struct([
new Field('key', new Utf8()),
new Field('value', new Int32())
]))
);
const mapBuilder = new MapBuilder({ type: mapType });
// Append maps in different formats
mapBuilder
.append(new Map([['a', 1], ['b', 2]])) // Map object
.append({ x: 10, y: 20 }) // Plain object
.append([['key1', 100], ['key2', 200]]) // Array of pairs
.append(null); // Null map
const mapVector = mapBuilder.finish().toVector();Builder for tagged unions supporting multiple data types in a single column.
/**
* Base union builder
*/
abstract class UnionBuilder<T extends Union_ = any> extends Builder<T> {
readonly type: T;
readonly children: Builder[];
readonly typeIds: Int8Array;
/** Append value with automatic type detection */
append(value: any): this;
/** Append value with explicit type ID */
appendValue(typeId: number, value: any): this;
/** Get child builder by type ID */
getChildBuilder(typeId: number): Builder;
}
/**
* Dense union builder
*/
class DenseUnionBuilder<T extends DenseUnion = any> extends UnionBuilder<T> {
readonly mode: UnionMode.Dense;
}
/**
* Sparse union builder
*/
class SparseUnionBuilder<T extends SparseUnion = any> extends UnionBuilder<T> {
readonly mode: UnionMode.Sparse;
}Builder for dictionary-encoded data with index and value arrays.
/**
* Dictionary builder for efficient storage of repeated values
*/
class DictionaryBuilder<T extends DataType = any, TKey extends DataType = any>
extends Builder<Dictionary<T, TKey>> {
readonly type: Dictionary<T, TKey>;
readonly indexBuilder: Builder<TKey>;
readonly valueBuilder: Builder<T>;
/** Append value (automatically adds to dictionary if new) */
append(value: T['TValue'] | null): this;
/** Set value at index */
set(index: number, value: T['TValue'] | null): this;
/** Get current dictionary values */
getDictionary(): Vector<T>;
/** Get current indices */
getIndices(): Vector<TKey>;
}Usage Examples:
import { DictionaryBuilder, Dictionary, Utf8, Int32 } from "apache-arrow";
// Create dictionary type (string values with int32 indices)
const dictType = new Dictionary(
new Utf8(), // Value type
new Int32(), // Index type
1 // Dictionary ID
);
const dictBuilder = new DictionaryBuilder({ type: dictType });
// Append repeated values - dictionary builder automatically deduplicates
dictBuilder
.append('apple') // Index 0 -> 'apple'
.append('banana') // Index 1 -> 'banana'
.append('apple') // Index 0 (reuses existing)
.append('cherry') // Index 2 -> 'cherry'
.append('banana') // Index 1 (reuses existing)
.append(null);
const dictVector = dictBuilder.finish().toVector();
// Result: indices=[0,1,0,2,1,null], dictionary=['apple','banana','cherry']Functions for creating builders that process streaming data.
/**
* Create builder that processes iterable input
*/
function builderThroughIterable<T extends DataType>(
options: IterableBuilderOptions<T>
): (source: Iterable<T['TValue']>) => AsyncIterable<Vector<T>>;
/**
* Create builder that processes async iterable input
*/
function builderThroughAsyncIterable<T extends DataType>(
options: IterableBuilderOptions<T>
): (source: AsyncIterable<T['TValue']>) => AsyncIterable<Vector<T>>;
// DOM-specific (when using Arrow.dom.js)
/**
* Create builder that processes DOM ReadableStream
*/
function builderThroughDOMStream<T extends DataType>(
options: BuilderDuplexOptions<T>
): BuilderTransform<T>;
// Node.js-specific (when using Arrow.node.js)
/**
* Create builder that processes Node.js streams
*/
function builderThroughNodeStream<T extends DataType>(
options: BuilderDuplexOptions<T>
): NodeJS.ReadWriteStream;
interface IterableBuilderOptions<T extends DataType> {
type: T;
nullValues?: any[];
highWaterMark?: number;
queueingStrategy?: 'bytes' | 'count';
}Efficiently building large datasets in batches.
/**
* Build data in batches for memory efficiency
*/
class BatchBuilder<T extends TypeMap> {
private builders: { [K in keyof T]: Builder<T[K]> };
private batchSize: number;
/** Add row to current batch */
append(row: { [K in keyof T]: T[K]['TValue'] | null }): this;
/** Flush current batch to RecordBatch */
flush(): RecordBatch<T>;
/** Get completed batches */
getBatches(): RecordBatch<T>[];
/** Create final table from all batches */
toTable(): Table<T>;
}Usage Examples:
import {
makeBuilder,
builderThroughAsyncIterable,
Int32,
Utf8
} from "apache-arrow";
// Streaming construction
const streamingIntBuilder = builderThroughAsyncIterable({
type: new Int32(),
highWaterMark: 1000
});
async function* generateNumbers() {
for (let i = 0; i < 10000; i++) {
yield i;
}
}
// Process stream and get vectors in chunks
for await (const vector of streamingIntBuilder(generateNumbers())) {
console.log(`Received vector with ${vector.length} elements`);
}
// Batch construction for large datasets
const batchSize = 1000;
const intBuilder = makeBuilder({ type: new Int32() });
const stringBuilder = makeBuilder({ type: new Utf8() });
const batches: RecordBatch[] = [];
for (let i = 0; i < 10000; i++) {
intBuilder.append(i);
stringBuilder.append(`value_${i}`);
// Flush every 1000 rows
if ((i + 1) % batchSize === 0) {
const intVector = intBuilder.finish().toVector();
const stringVector = stringBuilder.finish().toVector();
const batch = new RecordBatch({
id: intVector,
name: stringVector
});
batches.push(batch);
// Clear builders for next batch
intBuilder.clear();
stringBuilder.clear();
}
}
// Create final table
const largeTable = new Table(batches);Best practices for efficient memory usage with builders.
// Pre-allocate capacity when size is known
const builder = makeBuilder({ type: new Int32() });
builder.reserve(10000); // Reserve space for 10K elements
// Clear builders for reuse
builder.clear(); // More efficient than creating new builder
// Batch operations for large datasets
const batchSize = 1000;
const batches = [];
for (let batch = 0; batch < totalRows / batchSize; batch++) {
// Build batch
for (let i = 0; i < batchSize; i++) {
builder.append(data[batch * batchSize + i]);
}
batches.push(builder.flush().toVector());
builder.clear(); // Clear for next batch
}Optimizations for different data types.
// Use appropriate types for data ranges
const smallInts = makeBuilder({ type: new Int8() }); // -128 to 127
const mediumInts = makeBuilder({ type: new Int16() }); // -32K to 32K
const largeInts = makeBuilder({ type: new Int32() }); // -2B to 2B
// Dictionary encoding for repeated strings
const dictBuilder = new DictionaryBuilder({
type: new Dictionary(new Utf8(), new Int32())
});
// Automatically deduplicates repeated values
// Fixed-size types when possible
const uuidBuilder = new FixedSizeBinaryBuilder({
type: new FixedSizeBinary(16)
}); // More efficient than variable-length binary