tessl/npm-bson

A BSON (Binary JSON) parser for Node.js and browsers with comprehensive data type support and Extended JSON functionality

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Experimental APIs

Name: tessl/npm-bson
Author: tessl

Experimental low-level parsing utilities for advanced use cases requiring direct BSON element access and high-performance parsing scenarios.

⚠️ Warning: These APIs are experimental and not intended for production use. They may change or be removed in future versions without notice.

Capabilities

OnDemand Parser

Low-level BSON parsing utilities that provide direct access to BSON elements without full deserialization, enabling streaming and selective parsing scenarios.

/**
 * Experimental namespace for low-level BSON operations
 */
namespace onDemand {
  /** Parse BSON bytes into iterable elements */
  function parseToElements(bytes: Uint8Array, startOffset?: number): Iterable<BSONElement>;
  
  /** Utility classes for low-level operations */
  const ByteUtils: ByteUtils;
  const NumberUtils: NumberUtils;
}

/**
 * Represents a single BSON element in a document
 */
interface BSONElement {
  /** BSON type code */
  type: number;
  /** Field name */
  name: string;
  /** Byte offset of element in buffer */
  offset: number;
  /** Total length of element including type, name, and value */
  length: number;
  /** Raw value bytes (for advanced processing) */
  value?: Uint8Array;
}

Usage Examples:

import { serialize, onDemand, BSONType } from "bson";

// Create sample document
const document = {
  name: "Alice",
  age: 30,
  active: true,
  tags: ["user", "admin"],
  metadata: { created: new Date() }
};

const bytes = serialize(document);

// Parse elements without full deserialization
for (const element of onDemand.parseToElements(bytes)) {
  console.log(`Field: ${element.name}`);
  console.log(`Type: ${element.type} (${getTypeName(element.type)})`);
  console.log(`Offset: ${element.offset}`);
  console.log(`Length: ${element.length}`);
  
  // Handle specific types
  if (element.type === BSONType.string && element.name === 'name') {
    // Extract string value directly
    const nameValue = extractStringValue(bytes, element);
    console.log(`Name value: ${nameValue}`);
  }
  
  console.log('---');
}

function getTypeName(typeCode: number): string {
  const typeNames = {
    [BSONType.double]: 'double',
    [BSONType.string]: 'string',
    [BSONType.object]: 'object',
    [BSONType.array]: 'array',
    [BSONType.bool]: 'boolean',
    [BSONType.int]: 'int32'
  };
  return typeNames[typeCode] || 'unknown';
}

function extractStringValue(bytes: Uint8Array, element: BSONElement): string {
  // Advanced: manually extract string value from BSON bytes
  // This is just an example - actual implementation would be more complex
  const stringStart = element.offset + element.name.length + 2; // Skip type + name + null terminator
  const stringLength = onDemand.NumberUtils.getInt32LE(bytes, stringStart - 4);
  const stringBytes = bytes.slice(stringStart, stringStart + stringLength - 1); // Exclude null terminator
  return onDemand.ByteUtils.toUTF8(stringBytes);
}

Selective Field Extraction

Use the experimental parser for selective field extraction without deserializing entire documents:

import { serialize, onDemand, BSONType } from "bson";

/**
 * Extract specific fields from BSON without full deserialization
 */
function extractFields(bytes: Uint8Array, fieldNames: string[]): Map<string, any> {
  const result = new Map<string, any>();
  const targetFields = new Set(fieldNames);
  
  for (const element of onDemand.parseToElements(bytes)) {
    if (targetFields.has(element.name)) {
      // Extract value based on type
      const value = extractElementValue(bytes, element);
      result.set(element.name, value);
      
      // Early exit if all fields found
      if (result.size === fieldNames.length) {
        break;
      }
    }
  }
  
  return result;
}

function extractElementValue(bytes: Uint8Array, element: BSONElement): any {
  const { type, offset, name } = element;
  const valueOffset = offset + name.length + 2; // Skip type byte + name + null terminator
  
  switch (type) {
    case BSONType.double:
      return onDemand.NumberUtils.getFloat64LE(bytes, valueOffset);
    
    case BSONType.string:
      const stringLength = onDemand.NumberUtils.getInt32LE(bytes, valueOffset);
      const stringBytes = bytes.slice(valueOffset + 4, valueOffset + 4 + stringLength - 1);
      return onDemand.ByteUtils.toUTF8(stringBytes);
    
    case BSONType.int:
      return onDemand.NumberUtils.getInt32LE(bytes, valueOffset);
    
    case BSONType.bool:
      return bytes[valueOffset] === 1;
    
    default:
      return null; // Unsupported type for this example
  }
}

// Usage example
const largeDocument = {
  _id: "507f1f77bcf86cd799439011",
  name: "John Doe",
  age: 30,
  email: "john@example.com",
  // ... many other fields
  metadata: { /* large nested object */ },
  history: [/* large array */]
};

const bytes = serialize(largeDocument);

// Extract only specific fields efficiently
const selectedFields = extractFields(bytes, ['name', 'age', 'email']);
console.log(selectedFields);
// Map { 'name' => 'John Doe', 'age' => 30, 'email' => 'john@example.com' }

Document Statistics

Analyze BSON document structure without full deserialization:

import { serialize, onDemand, BSONType } from "bson";

interface DocumentStats {
  totalFields: number;
  typeDistribution: Map<number, number>;
  totalSize: number;
  averageFieldSize: number;
  maxFieldSize: number;
  fieldNames: string[];
}

function analyzeDocument(bytes: Uint8Array): DocumentStats {
  const stats: DocumentStats = {
    totalFields: 0,
    typeDistribution: new Map(),
    totalSize: bytes.length,
    averageFieldSize: 0,
    maxFieldSize: 0,
    fieldNames: []
  };
  
  let totalFieldBytes = 0;
  
  for (const element of onDemand.parseToElements(bytes)) {
    stats.totalFields++;
    stats.fieldNames.push(element.name);
    
    // Track type distribution
    const currentCount = stats.typeDistribution.get(element.type) || 0;
    stats.typeDistribution.set(element.type, currentCount + 1);
    
    // Track field size statistics
    totalFieldBytes += element.length;
    stats.maxFieldSize = Math.max(stats.maxFieldSize, element.length);
  }
  
  stats.averageFieldSize = stats.totalFields > 0 ? totalFieldBytes / stats.totalFields : 0;
  
  return stats;
}

// Usage example
const sampleDocument = {
  _id: "507f1f77bcf86cd799439011",
  name: "Alice Smith",
  age: 28,
  scores: [95, 87, 92],
  address: {
    street: "123 Main St",
    city: "Springfield",
    zip: "12345"
  },
  active: true,
  salary: 75000.50,
  tags: ["employee", "manager", "senior"]
};

const bytes = serialize(sampleDocument);
const stats = analyzeDocument(bytes);

console.log(`Document Statistics:`);
console.log(`- Total fields: ${stats.totalFields}`);
console.log(`- Total size: ${stats.totalSize} bytes`);
console.log(`- Average field size: ${stats.averageFieldSize.toFixed(2)} bytes`);
console.log(`- Largest field: ${stats.maxFieldSize} bytes`);
console.log(`- Field names: ${stats.fieldNames.join(', ')}`);

console.log(`Type distribution:`);
stats.typeDistribution.forEach((count, type) => {
  const typeName = getTypeName(type);
  console.log(`- ${typeName}: ${count} fields`);
});

Stream Processing

Process large BSON documents or streams element by element:

import { serialize, onDemand } from "bson";

/**
 * Process BSON document elements in streaming fashion
 */
async function processDocumentStream(
  bytes: Uint8Array,
  processor: (element: BSONElement, elementBytes: Uint8Array) => Promise<void>
): Promise<void> {
  for (const element of onDemand.parseToElements(bytes)) {
    // Extract element bytes for processing
    const elementBytes = bytes.slice(element.offset, element.offset + element.length);
    
    try {
      await processor(element, elementBytes);
    } catch (error) {
      console.error(`Error processing field ${element.name}:`, error);
      // Continue processing other elements
    }
  }
}

// Usage example: Index field names and types
const fieldIndex = new Map<string, { type: number; count: number }>();

await processDocumentStream(bytes, async (element, elementBytes) => {
  const existing = fieldIndex.get(element.name) || { type: element.type, count: 0 };
  existing.count++;
  fieldIndex.set(element.name, existing);
  
  // Simulate async processing (e.g., database operation)
  await new Promise(resolve => setTimeout(resolve, 1));
  
  console.log(`Processed field: ${element.name} (type: ${element.type})`);
});

console.log('Field index:', fieldIndex);

Performance Considerations

The experimental APIs are designed for performance-critical scenarios:

import { serialize, deserialize, onDemand } from "bson";

// Performance comparison example
const largeDocument = {
  // ... large document with many fields
};

const bytes = serialize(largeDocument);

// Timing full deserialization
console.time('Full deserialization');
const fullDoc = deserialize(bytes);
const targetValue = fullDoc.specificField;
console.timeEnd('Full deserialization');

// Timing selective extraction
console.time('Selective extraction');
const selectedFields = extractFields(bytes, ['specificField']);
const targetValueSelective = selectedFields.get('specificField');
console.timeEnd('Selective extraction');

// Selective extraction is typically faster for documents with many fields
// when you only need a few specific values

Limitations and Warnings

Experimental Status: These APIs may change without notice
Limited Type Support: Manual value extraction requires type-specific handling
Complexity: Lower-level APIs require more careful error handling
No Validation: Experimental parsers may not include full validation
Memory Usage: Direct buffer access requires careful memory management

// Safe usage pattern
try {
  const elements = Array.from(onDemand.parseToElements(bytes));
  // Process elements safely
} catch (error) {
  console.error('Experimental parser failed:', error);
  // Fallback to standard deserialization
  const fallback = deserialize(bytes);
}