Experimental low-level parsing utilities for advanced use cases requiring direct BSON element access and high-performance parsing scenarios.
⚠️ Warning: These APIs are experimental and not intended for production use. They may change or be removed in future versions without notice.
Low-level BSON parsing utilities that provide direct access to BSON elements without full deserialization, enabling streaming and selective parsing scenarios.
/**
* Experimental namespace for low-level BSON operations
*/
namespace onDemand {
/** Parse BSON bytes into iterable elements */
function parseToElements(bytes: Uint8Array, startOffset?: number): Iterable<BSONElement>;
/** Utility classes for low-level operations */
const ByteUtils: ByteUtils;
const NumberUtils: NumberUtils;
}
/**
* Represents a single BSON element in a document
*/
interface BSONElement {
/** BSON type code */
type: number;
/** Field name */
name: string;
/** Byte offset of element in buffer */
offset: number;
/** Total length of element including type, name, and value */
length: number;
/** Raw value bytes (for advanced processing) */
value?: Uint8Array;
}Usage Examples:
import { serialize, onDemand, BSONType } from "bson";
// Create sample document
const document = {
name: "Alice",
age: 30,
active: true,
tags: ["user", "admin"],
metadata: { created: new Date() }
};
const bytes = serialize(document);
// Parse elements without full deserialization
for (const element of onDemand.parseToElements(bytes)) {
console.log(`Field: ${element.name}`);
console.log(`Type: ${element.type} (${getTypeName(element.type)})`);
console.log(`Offset: ${element.offset}`);
console.log(`Length: ${element.length}`);
// Handle specific types
if (element.type === BSONType.string && element.name === 'name') {
// Extract string value directly
const nameValue = extractStringValue(bytes, element);
console.log(`Name value: ${nameValue}`);
}
console.log('---');
}
function getTypeName(typeCode: number): string {
const typeNames = {
[BSONType.double]: 'double',
[BSONType.string]: 'string',
[BSONType.object]: 'object',
[BSONType.array]: 'array',
[BSONType.bool]: 'boolean',
[BSONType.int]: 'int32'
};
return typeNames[typeCode] || 'unknown';
}
function extractStringValue(bytes: Uint8Array, element: BSONElement): string {
// Advanced: manually extract string value from BSON bytes
// This is just an example - actual implementation would be more complex
const stringStart = element.offset + element.name.length + 2; // Skip type + name + null terminator
const stringLength = onDemand.NumberUtils.getInt32LE(bytes, stringStart - 4);
const stringBytes = bytes.slice(stringStart, stringStart + stringLength - 1); // Exclude null terminator
return onDemand.ByteUtils.toUTF8(stringBytes);
}Use the experimental parser for selective field extraction without deserializing entire documents:
import { serialize, onDemand, BSONType } from "bson";
/**
* Extract specific fields from BSON without full deserialization
*/
function extractFields(bytes: Uint8Array, fieldNames: string[]): Map<string, any> {
const result = new Map<string, any>();
const targetFields = new Set(fieldNames);
for (const element of onDemand.parseToElements(bytes)) {
if (targetFields.has(element.name)) {
// Extract value based on type
const value = extractElementValue(bytes, element);
result.set(element.name, value);
// Early exit if all fields found
if (result.size === fieldNames.length) {
break;
}
}
}
return result;
}
function extractElementValue(bytes: Uint8Array, element: BSONElement): any {
const { type, offset, name } = element;
const valueOffset = offset + name.length + 2; // Skip type byte + name + null terminator
switch (type) {
case BSONType.double:
return onDemand.NumberUtils.getFloat64LE(bytes, valueOffset);
case BSONType.string:
const stringLength = onDemand.NumberUtils.getInt32LE(bytes, valueOffset);
const stringBytes = bytes.slice(valueOffset + 4, valueOffset + 4 + stringLength - 1);
return onDemand.ByteUtils.toUTF8(stringBytes);
case BSONType.int:
return onDemand.NumberUtils.getInt32LE(bytes, valueOffset);
case BSONType.bool:
return bytes[valueOffset] === 1;
default:
return null; // Unsupported type for this example
}
}
// Usage example
const largeDocument = {
_id: "507f1f77bcf86cd799439011",
name: "John Doe",
age: 30,
email: "john@example.com",
// ... many other fields
metadata: { /* large nested object */ },
history: [/* large array */]
};
const bytes = serialize(largeDocument);
// Extract only specific fields efficiently
const selectedFields = extractFields(bytes, ['name', 'age', 'email']);
console.log(selectedFields);
// Map { 'name' => 'John Doe', 'age' => 30, 'email' => 'john@example.com' }Analyze BSON document structure without full deserialization:
import { serialize, onDemand, BSONType } from "bson";
interface DocumentStats {
totalFields: number;
typeDistribution: Map<number, number>;
totalSize: number;
averageFieldSize: number;
maxFieldSize: number;
fieldNames: string[];
}
function analyzeDocument(bytes: Uint8Array): DocumentStats {
const stats: DocumentStats = {
totalFields: 0,
typeDistribution: new Map(),
totalSize: bytes.length,
averageFieldSize: 0,
maxFieldSize: 0,
fieldNames: []
};
let totalFieldBytes = 0;
for (const element of onDemand.parseToElements(bytes)) {
stats.totalFields++;
stats.fieldNames.push(element.name);
// Track type distribution
const currentCount = stats.typeDistribution.get(element.type) || 0;
stats.typeDistribution.set(element.type, currentCount + 1);
// Track field size statistics
totalFieldBytes += element.length;
stats.maxFieldSize = Math.max(stats.maxFieldSize, element.length);
}
stats.averageFieldSize = stats.totalFields > 0 ? totalFieldBytes / stats.totalFields : 0;
return stats;
}
// Usage example
const sampleDocument = {
_id: "507f1f77bcf86cd799439011",
name: "Alice Smith",
age: 28,
scores: [95, 87, 92],
address: {
street: "123 Main St",
city: "Springfield",
zip: "12345"
},
active: true,
salary: 75000.50,
tags: ["employee", "manager", "senior"]
};
const bytes = serialize(sampleDocument);
const stats = analyzeDocument(bytes);
console.log(`Document Statistics:`);
console.log(`- Total fields: ${stats.totalFields}`);
console.log(`- Total size: ${stats.totalSize} bytes`);
console.log(`- Average field size: ${stats.averageFieldSize.toFixed(2)} bytes`);
console.log(`- Largest field: ${stats.maxFieldSize} bytes`);
console.log(`- Field names: ${stats.fieldNames.join(', ')}`);
console.log(`Type distribution:`);
stats.typeDistribution.forEach((count, type) => {
const typeName = getTypeName(type);
console.log(`- ${typeName}: ${count} fields`);
});Process large BSON documents or streams element by element:
import { serialize, onDemand } from "bson";
/**
* Process BSON document elements in streaming fashion
*/
async function processDocumentStream(
bytes: Uint8Array,
processor: (element: BSONElement, elementBytes: Uint8Array) => Promise<void>
): Promise<void> {
for (const element of onDemand.parseToElements(bytes)) {
// Extract element bytes for processing
const elementBytes = bytes.slice(element.offset, element.offset + element.length);
try {
await processor(element, elementBytes);
} catch (error) {
console.error(`Error processing field ${element.name}:`, error);
// Continue processing other elements
}
}
}
// Usage example: Index field names and types
const fieldIndex = new Map<string, { type: number; count: number }>();
await processDocumentStream(bytes, async (element, elementBytes) => {
const existing = fieldIndex.get(element.name) || { type: element.type, count: 0 };
existing.count++;
fieldIndex.set(element.name, existing);
// Simulate async processing (e.g., database operation)
await new Promise(resolve => setTimeout(resolve, 1));
console.log(`Processed field: ${element.name} (type: ${element.type})`);
});
console.log('Field index:', fieldIndex);The experimental APIs are designed for performance-critical scenarios:
import { serialize, deserialize, onDemand } from "bson";
// Performance comparison example
const largeDocument = {
// ... large document with many fields
};
const bytes = serialize(largeDocument);
// Timing full deserialization
console.time('Full deserialization');
const fullDoc = deserialize(bytes);
const targetValue = fullDoc.specificField;
console.timeEnd('Full deserialization');
// Timing selective extraction
console.time('Selective extraction');
const selectedFields = extractFields(bytes, ['specificField']);
const targetValueSelective = selectedFields.get('specificField');
console.timeEnd('Selective extraction');
// Selective extraction is typically faster for documents with many fields
// when you only need a few specific values// Safe usage pattern
try {
const elements = Array.from(onDemand.parseToElements(bytes));
// Process elements safely
} catch (error) {
console.error('Experimental parser failed:', error);
// Fallback to standard deserialization
const fallback = deserialize(bytes);
}The experimental APIs are being developed to support:
These APIs will mature based on community feedback and real-world usage patterns.