CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-papaparse

Fast and powerful CSV parser for the browser that supports web workers and streaming large files.

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

streaming.mddocs/

File Streaming

High-performance streaming capabilities for processing large CSV files with chunk-based processing, progress callbacks, and memory-efficient parsing. Supports local files, remote URLs, and Node.js streams.

Capabilities

File Upload Processing

Parse files directly from HTML file input elements with progress tracking.

/**
 * Parse File object with streaming support
 * @param file - Browser File object from input element
 * @param config - Configuration with streaming callbacks
 */
Papa.parse(file: File, config: {
  step?: (result: ParseResult, parser: ParserHandle) => void;
  chunk?: (result: ParseResult, parser: ParserHandle) => void;
  complete?: (result: ParseResult) => void;
  error?: (error: ParseError) => void;
}): void;

Usage Examples:

// HTML file input processing
const fileInput = document.getElementById('csvFile');
fileInput.addEventListener('change', function(event) {
  const file = event.target.files[0];
  
  Papa.parse(file, {
    header: true,
    step: function(result, parser) {
      // Process each row as it's parsed
      console.log('Row parsed:', result.data);
      
      // Optionally pause parsing
      if (someCondition) {
        parser.pause();
        // Resume later with parser.resume()
      }
    },
    complete: function(results) {
      console.log('File parsing completed');
      console.log('Total rows:', results.data.length);
    },
    error: function(error) {
      console.error('Parse error:', error);
    }
  });
});

Remote File Download

Download and parse CSV files from URLs with automatic streaming.

/**
 * Download and parse CSV from URL
 * @param url - URL string pointing to CSV file
 * @param config - Configuration with download options
 */
Papa.parse(url: string, config: {
  download: true;
  downloadRequestHeaders?: { [key: string]: string };
  downloadRequestBody?: string | FormData;
  withCredentials?: boolean;
  step?: (result: ParseResult, parser: ParserHandle) => void;
  chunk?: (result: ParseResult, parser: ParserHandle) => void;
  complete?: (result: ParseResult) => void;
  error?: (error: ParseError) => void;
}): void;

Usage Examples:

// Download and stream parse remote CSV
Papa.parse('https://example.com/data.csv', {
  download: true,
  header: true,
  step: function(result, parser) {
    // Process each row as it streams
    console.log('Streaming row:', result.data);
  },
  complete: function(results) {
    console.log('Download and parsing completed');
  },
  error: function(error) {
    console.error('Download or parse error:', error);
  }
});

// Download with custom headers and credentials
Papa.parse('https://api.example.com/export.csv', {
  download: true,
  downloadRequestHeaders: {
    'Authorization': 'Bearer token123',
    'Content-Type': 'application/json'
  },
  downloadRequestBody: JSON.stringify({ format: 'csv' }),
  withCredentials: true,
  header: true,
  step: function(result) {
    console.log('Authenticated data:', result.data);
  }
});

Step-by-Step Processing

Process CSV data row by row as it's parsed for memory efficiency.

interface StepConfig {
  step?: (result: ParseResult, parser: ParserHandle) => void; // Row-by-row callback
}

interface ParserHandle {
  pause(): void;                  // Pause parsing
  resume(): void;                 // Resume parsing
  abort(): void;                  // Abort parsing
}

Usage Examples:

let processedCount = 0;
const maxRows = 1000;

Papa.parse(largeCsvFile, {
  header: true,
  step: function(result, parser) {
    // Process each row individually
    const row = result.data;
    
    // Validate and process row
    if (isValidRow(row)) {
      processRow(row);
      processedCount++;
    }
    
    // Pause after processing certain number of rows
    if (processedCount >= maxRows) {
      parser.pause();
      
      // Show progress to user, then resume
      showProgress(processedCount);
      setTimeout(() => parser.resume(), 100);
    }
    
    // Abort on error condition
    if (result.errors.length > 0) {
      console.error('Row errors:', result.errors);
      parser.abort();
    }
  },
  complete: function(results) {
    console.log('Processing completed:', processedCount, 'rows');
  }
});

function isValidRow(row) {
  return row && row.name && row.email;
}

function processRow(row) {
  // Process individual row (save to database, etc.)
  console.log('Processing:', row.name);
}

Chunk Processing

Process data in larger chunks for better performance with very large files.

interface ChunkConfig {
  chunk?: (result: ParseResult, parser: ParserHandle) => void; // Chunk callback
  chunkSize?: number;             // Chunk size in bytes
}

Usage Examples:

Papa.parse(hugeFile, {
  header: true,
  chunkSize: Papa.LocalChunkSize, // 10MB chunks
  chunk: function(result, parser) {
    // Process entire chunk at once
    console.log('Chunk received:', result.data.length, 'rows');
    
    // Batch process rows in chunk
    processBatch(result.data);
    
    // Show progress
    updateProgressBar(result.meta.cursor);
    
    // Pause processing if needed
    if (shouldPauseProcessing()) {
      parser.pause();
      // Resume when ready
      setTimeout(() => parser.resume(), 1000);
    }
  },
  complete: function(results) {
    console.log('All chunks processed');
  }
});

function processBatch(rows) {
  // Efficiently process batch of rows
  const validRows = rows.filter(isValidRow);
  saveBatchToDatabase(validRows);
}

Node.js Stream Support

Use Node.js Readable Streams as input for server-side processing.

/**
 * Parse Node.js ReadableStream
 * @param stream - Node.js ReadableStream
 * @param config - Configuration for stream parsing
 */
Papa.parse(stream: NodeJS.ReadableStream, config: {
  encoding?: string;              // Character encoding
  step?: (result: ParseResult) => void;
  complete?: (result: ParseResult) => void;
  error?: (error: ParseError) => void;
}): void;

Usage Examples:

const fs = require('fs');
const Papa = require('papaparse');

// Parse file stream
const fileStream = fs.createReadStream('large-data.csv');
Papa.parse(fileStream, {
  header: true,
  encoding: 'utf8',
  step: function(result) {
    // Process each row
    console.log('Stream row:', result.data);
  },
  complete: function(results) {
    console.log('Stream parsing completed');
  },
  error: function(error) {
    console.error('Stream error:', error);
  }
});

// Parse HTTP response stream
const https = require('https');
https.get('https://example.com/data.csv', (response) => {
  Papa.parse(response, {
    header: true,
    step: function(result) {
      console.log('HTTP stream row:', result.data);
    }
  });
});

Duplex Stream Mode

Create a duplex stream for use with Node.js .pipe() operations.

/**
 * Create duplex stream for piping
 * @param streamInput - Special constant Papa.NODE_STREAM_INPUT
 * @param config - Stream configuration
 * @returns Node.js Duplex Stream
 */
Papa.parse(streamInput: 1, config: {
  header?: boolean;
  delimiter?: string;
  // Note: step, complete, worker not available in duplex mode
}): NodeJS.ReadWriteStream;

Usage Examples:

const fs = require('fs');
const Papa = require('papaparse');

// Create duplex stream
const parseStream = Papa.parse(Papa.NODE_STREAM_INPUT, {
  header: true,
  delimiter: ','
});

// Handle parsed data
parseStream.on('data', function(chunk) {
  console.log('Parsed chunk:', chunk);
});

parseStream.on('end', function() {
  console.log('Stream parsing completed');
});

parseStream.on('error', function(error) {
  console.error('Stream error:', error);
});

// Pipe file through parser
fs.createReadStream('input.csv')
  .pipe(parseStream)
  .pipe(fs.createWriteStream('output.json'));

Progress Tracking

Track parsing progress for large files with user feedback.

interface ProgressConfig {
  step?: (result: ParseResult, parser: ParserHandle) => void;
  chunk?: (result: ParseResult, parser: ParserHandle) => void;
  beforeFirstChunk?: (chunk: string) => string; // Pre-process first chunk
}

interface ParseMeta {
  cursor: number;                 // Current parsing position
  aborted: boolean;               // Whether parsing was aborted
  truncated: boolean;             // Whether data was truncated
}

Usage Examples:

const progressBar = document.getElementById('progress');
const statusText = document.getElementById('status');
let fileSize = 0;

// Get file size for progress calculation
fileInput.addEventListener('change', function(event) {
  const file = event.target.files[0];
  fileSize = file.size;
  
  Papa.parse(file, {
    header: true,
    step: function(result, parser) {
      // Calculate progress
      const progress = (result.meta.cursor / fileSize) * 100;
      progressBar.value = progress;
      statusText.textContent = `Processing: ${progress.toFixed(1)}%`;
      
      // Process row
      processRow(result.data);
    },
    complete: function(results) {
      progressBar.value = 100;
      statusText.textContent = 'Completed!';
      console.log('Final results:', results);
    }
  });
});

Error Recovery

Handle errors gracefully during streaming operations.

interface ErrorHandling {
  error?: (error: ParseError, file?: File) => void;
}

Usage Examples:

Papa.parse(file, {
  header: true,
  step: function(result, parser) {
    // Check for row-level errors
    if (result.errors.length > 0) {
      console.warn('Row errors:', result.errors);
      // Continue processing despite errors
    }
    
    // Process valid data
    if (result.data && Object.keys(result.data).length > 0) {
      processRow(result.data);
    }
  },
  error: function(error, file) {
    console.error('Fatal parse error:', error);
    console.error('File:', file ? file.name : 'unknown');
    
    // Handle different error types
    switch (error.code) {
      case 'NetworkError':
        showMessage('Network error: Please check your connection');
        break;
      case 'AbortError':
        showMessage('Parsing was cancelled');
        break;
      default:
        showMessage('An error occurred while parsing the file');
    }
  },
  complete: function(results) {
    // Show summary including any errors
    console.log('Parsing completed with', results.errors.length, 'errors');
    showSummary(results);
  }
});

Memory Management

Optimize memory usage for very large files.

interface MemoryConfig {
  chunkSize?: number;             // Control memory usage with chunk size
  step?: (result: ParseResult, parser: ParserHandle) => void; // Process immediately
}

Usage Examples:

// Process huge files with minimal memory footprint
Papa.parse(massiveFile, {
  header: true,
  chunkSize: 1024 * 1024, // 1MB chunks for memory efficiency
  step: function(result, parser) {
    // Process and discard each row immediately
    const processedRow = transformRow(result.data);
    saveToDatabase(processedRow);
    
    // Row data can be garbage collected after this
  },
  complete: function(results) {
    // Only metadata is retained, not all the data
    console.log('Processed file with minimal memory usage');
    console.log('Total rows processed:', results.meta.cursor);
  }
});

function transformRow(row) {
  // Transform row data as needed
  return {
    id: parseInt(row.id),
    name: row.name.trim(),
    processed_at: new Date()
  };
}

Install with Tessl CLI

npx tessl i tessl/npm-papaparse

docs

index.md

parsing.md

streaming.md

unparsing.md

workers.md

tile.json