or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

cli.mdconfiguration.mdcore-parsing.mderror-handling.mdindex.mdstream-processing.mdtransformation-hooks.md
tile.json

transformation-hooks.mddocs/

Data Transformation Hooks

Preprocessing hooks for customizing data before parsing occurs, including raw data processing, line-by-line transformation, and event-driven parsing with subscriptions.

Capabilities

Raw Data Hook

Process raw CSV string data before any parsing operations begin.

/**
 * Hook to process raw CSV data before parsing
 * @param onRawData - Callback to transform raw CSV string
 * @returns Converter instance for chaining
 */
preRawData(onRawData: PreRawDataCallback): Converter;

/**
 * Callback type for processing raw CSV data
 * @param csvString - The complete raw CSV string
 * @returns Transformed CSV string or Promise resolving to transformed string
 */
type PreRawDataCallback = (csvString: string) => string | PromiseLike<string>;

Usage Examples:

import csvtojson from "csvtojson";

// Remove BOM and normalize line endings
const jsonArray = await csvtojson()
  .preRawData((csvString) => {
    // Remove BOM if present
    if (csvString.charCodeAt(0) === 0xFEFF) {
      csvString = csvString.slice(1);
    }
    // Normalize line endings
    return csvString.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
  })
  .fromFile('./data-with-bom.csv');

// Async raw data processing
const jsonArray = await csvtojson()
  .preRawData(async (csvString) => {
    // Decrypt or decode data
    const decoded = await someDecryptionFunction(csvString);
    return decoded;
  })
  .fromString(encryptedCsvData);

// Replace problematic characters
const jsonArray = await csvtojson()
  .preRawData((csvString) => {
    return csvString
      .replace(/"/g, '""')  // Escape quotes
      .replace(/\0/g, '');  // Remove null bytes
  })
  .fromFile('./problematic-data.csv');

File Line Hook

Process individual CSV lines before parsing with access to line numbers.

/**
 * Hook to process individual lines before parsing
 * @param onFileLine - Callback to transform each line
 * @returns Converter instance for chaining
 */
preFileLine(onFileLine: PreFileLineCallback): Converter;

/**
 * Callback type for processing individual CSV lines
 * @param line - The CSV line string
 * @param lineNumber - Zero-based line number
 * @returns Transformed line string or Promise resolving to transformed string
 */
type PreFileLineCallback = (line: string, lineNumber: number) => string | PromiseLike<string>;

Usage Examples:

import csvtojson from "csvtojson";

// Skip header and comment lines
const jsonArray = await csvtojson()
  .preFileLine((line, lineNumber) => {
    // Skip comment lines
    if (line.startsWith('#') || line.startsWith('//')) {
      return '';
    }
    // Add line number to data
    return lineNumber === 0 ? line : `${lineNumber},${line}`;
  })
  .fromFile('./data-with-comments.csv');

// Clean and validate lines
const jsonArray = await csvtojson()
  .preFileLine((line, lineNumber) => {
    // Remove extra whitespace
    line = line.trim();
    
    // Skip empty lines
    if (line.length === 0) {
      return '';
    }
    
    // Validate expected column count
    const columns = line.split(',');
    if (columns.length !== 5) {
      console.warn(`Line ${lineNumber}: Expected 5 columns, got ${columns.length}`);
    }
    
    return line;
  })
  .fromFile('./data.csv');

// Async line processing
const jsonArray = await csvtojson()
  .preFileLine(async (line, lineNumber) => {
    if (lineNumber > 0) { // Skip header
      // Enrich data from external source
      const enrichedData = await enrichLineData(line);
      return enrichedData;
    }
    return line;
  })
  .fromFile('./data.csv');

Event Subscription

Subscribe to parsing events for real-time processing without collecting all results in memory.

/**
 * Subscribe to parsing events for streaming data processing
 * @param onNext - Callback for each parsed data object
 * @param onError - Callback for parsing errors
 * @param onCompleted - Callback when parsing is complete
 * @returns Converter instance for chaining
 */
subscribe(
  onNext?: (data: any, lineNumber: number) => void | PromiseLike<void>,
  onError?: (err: CSVError) => void,
  onCompleted?: () => void
): Converter;

Usage Examples:

import csvtojson from "csvtojson";

// Real-time data processing
csvtojson()
  .fromFile('./large-dataset.csv')
  .subscribe(
    (jsonObj, lineNumber) => {
      // Process each record immediately
      console.log(`Line ${lineNumber}:`, jsonObj);
      
      // Save to database, send to API, etc.
      saveToDatabase(jsonObj);
    },
    (error) => {
      console.error('Parsing error:', error);
    },
    () => {
      console.log('Parsing completed');
    }
  );

// Async processing with backpressure
csvtojson()
  .fromFile('./data.csv')
  .subscribe(
    async (jsonObj, lineNumber) => {
      // Async processing - csvtojson will wait
      await processDataAsync(jsonObj);
      
      if (lineNumber % 100 === 0) {
        console.log(`Processed ${lineNumber} records`);
      }
    }
  );

// Error handling and recovery
csvtojson({ 
  maxRowLength: 10000,
  checkColumn: true 
})
.fromFile('./data.csv')
.subscribe(
  (jsonObj, lineNumber) => {
    try {
      validateAndProcess(jsonObj);
    } catch (error) {
      console.error(`Error processing line ${lineNumber}:`, error);
    }
  },
  (csvError) => {
    // Handle parsing errors
    console.error(`CSV parsing error at line ${csvError.line}: ${csvError.err}`);
    if (csvError.extra) {
      console.error(`Near: ${csvError.extra}`);
    }
  },
  () => {
    console.log('File processing completed');
  }
);

Hook Chaining

Combine multiple hooks for complex data transformation pipelines.

/**
 * Chain multiple hooks for comprehensive data processing
 */
interface Converter {
  preRawData(callback: PreRawDataCallback): Converter;
  preFileLine(callback: PreFileLineCallback): Converter;
  subscribe(onNext?, onError?, onCompleted?): Converter;
}

Usage Examples:

import csvtojson from "csvtojson";

// Complete transformation pipeline
const result = await csvtojson({
  delimiter: ",",
  trim: true,
  checkType: true
})
.preRawData((csvString) => {
  // First: clean raw data
  return csvString
    .replace(/\r\n/g, '\n')    // Normalize line endings
    .replace(/^\uFEFF/, '');   // Remove BOM
})
.preFileLine((line, lineNumber) => {
  // Second: process each line
  if (lineNumber === 0) {
    // Normalize headers
    return line.toLowerCase().replace(/\s+/g, '_');
  }
  
  // Skip empty lines
  return line.trim() || '';
})
.subscribe(
  // Third: process parsed objects
  (jsonObj, lineNumber) => {
    console.log(`Processed record ${lineNumber}:`, jsonObj);
  }
)
.fromFile('./complex-data.csv');

Performance Considerations

Optimize hook usage for large file processing.

// Efficient hooks for large files
csvtojson({
  maxRowLength: 100000,
  alwaysSplitAtEOL: true
})
.preRawData((csvString) => {
  // Minimal raw processing to avoid memory overhead
  return csvString.replace(/\r\n/g, '\n');
})
.preFileLine((line, lineNumber) => {
  // Fast line processing
  return line.trim();
})
.subscribe(
  (jsonObj, lineNumber) => {
    // Stream processing without accumulating in memory
    processAndDiscard(jsonObj);
  }
)
.fromFile('./very-large-file.csv');