tessl/npm-papaparse

Fast and powerful CSV parser for the browser that supports web workers and streaming large files.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

File Streaming

Name: tessl/npm-papaparse
Author: tessl

High-performance streaming capabilities for processing large CSV files with chunk-based processing, progress callbacks, and memory-efficient parsing. Supports local files, remote URLs, and Node.js streams.

Capabilities

File Upload Processing

Parse files directly from HTML file input elements with progress tracking.

/**
 * Parse File object with streaming support
 * @param file - Browser File object from input element
 * @param config - Configuration with streaming callbacks
 */
Papa.parse(file: File, config: {
  step?: (result: ParseResult, parser: ParserHandle) => void;
  chunk?: (result: ParseResult, parser: ParserHandle) => void;
  complete?: (result: ParseResult) => void;
  error?: (error: ParseError) => void;
}): void;

Usage Examples:

// HTML file input processing
const fileInput = document.getElementById('csvFile');
fileInput.addEventListener('change', function(event) {
  const file = event.target.files[0];
  
  Papa.parse(file, {
    header: true,
    step: function(result, parser) {
      // Process each row as it's parsed
      console.log('Row parsed:', result.data);
      
      // Optionally pause parsing
      if (someCondition) {
        parser.pause();
        // Resume later with parser.resume()
      }
    },
    complete: function(results) {
      console.log('File parsing completed');
      console.log('Total rows:', results.data.length);
    },
    error: function(error) {
      console.error('Parse error:', error);
    }
  });
});

Remote File Download

Download and parse CSV files from URLs with automatic streaming.

/**
 * Download and parse CSV from URL
 * @param url - URL string pointing to CSV file
 * @param config - Configuration with download options
 */
Papa.parse(url: string, config: {
  download: true;
  downloadRequestHeaders?: { [key: string]: string };
  downloadRequestBody?: string | FormData;
  withCredentials?: boolean;
  step?: (result: ParseResult, parser: ParserHandle) => void;
  chunk?: (result: ParseResult, parser: ParserHandle) => void;
  complete?: (result: ParseResult) => void;
  error?: (error: ParseError) => void;
}): void;

Usage Examples:

// Download and stream parse remote CSV
Papa.parse('https://example.com/data.csv', {
  download: true,
  header: true,
  step: function(result, parser) {
    // Process each row as it streams
    console.log('Streaming row:', result.data);
  },
  complete: function(results) {
    console.log('Download and parsing completed');
  },
  error: function(error) {
    console.error('Download or parse error:', error);
  }
});

// Download with custom headers and credentials
Papa.parse('https://api.example.com/export.csv', {
  download: true,
  downloadRequestHeaders: {
    'Authorization': 'Bearer token123',
    'Content-Type': 'application/json'
  },
  downloadRequestBody: JSON.stringify({ format: 'csv' }),
  withCredentials: true,
  header: true,
  step: function(result) {
    console.log('Authenticated data:', result.data);
  }
});

Step-by-Step Processing

Process CSV data row by row as it's parsed for memory efficiency.

interface StepConfig {
  step?: (result: ParseResult, parser: ParserHandle) => void; // Row-by-row callback
}

interface ParserHandle {
  pause(): void;                  // Pause parsing
  resume(): void;                 // Resume parsing
  abort(): void;                  // Abort parsing
}

Usage Examples:

let processedCount = 0;
const maxRows = 1000;

Papa.parse(largeCsvFile, {
  header: true,
  step: function(result, parser) {
    // Process each row individually
    const row = result.data;
    
    // Validate and process row
    if (isValidRow(row)) {
      processRow(row);
      processedCount++;
    }
    
    // Pause after processing certain number of rows
    if (processedCount >= maxRows) {
      parser.pause();
      
      // Show progress to user, then resume
      showProgress(processedCount);
      setTimeout(() => parser.resume(), 100);
    }
    
    // Abort on error condition
    if (result.errors.length > 0) {
      console.error('Row errors:', result.errors);
      parser.abort();
    }
  },
  complete: function(results) {
    console.log('Processing completed:', processedCount, 'rows');
  }
});

function isValidRow(row) {
  return row && row.name && row.email;
}

function processRow(row) {
  // Process individual row (save to database, etc.)
  console.log('Processing:', row.name);
}

Chunk Processing

Process data in larger chunks for better performance with very large files.

interface ChunkConfig {
  chunk?: (result: ParseResult, parser: ParserHandle) => void; // Chunk callback
  chunkSize?: number;             // Chunk size in bytes
}

Usage Examples:

Papa.parse(hugeFile, {
  header: true,
  chunkSize: Papa.LocalChunkSize, // 10MB chunks
  chunk: function(result, parser) {
    // Process entire chunk at once
    console.log('Chunk received:', result.data.length, 'rows');
    
    // Batch process rows in chunk
    processBatch(result.data);
    
    // Show progress
    updateProgressBar(result.meta.cursor);
    
    // Pause processing if needed
    if (shouldPauseProcessing()) {
      parser.pause();
      // Resume when ready
      setTimeout(() => parser.resume(), 1000);
    }
  },
  complete: function(results) {
    console.log('All chunks processed');
  }
});

function processBatch(rows) {
  // Efficiently process batch of rows
  const validRows = rows.filter(isValidRow);
  saveBatchToDatabase(validRows);
}

Node.js Stream Support

Use Node.js Readable Streams as input for server-side processing.

/**
 * Parse Node.js ReadableStream
 * @param stream - Node.js ReadableStream
 * @param config - Configuration for stream parsing
 */
Papa.parse(stream: NodeJS.ReadableStream, config: {
  encoding?: string;              // Character encoding
  step?: (result: ParseResult) => void;
  complete?: (result: ParseResult) => void;
  error?: (error: ParseError) => void;
}): void;

Usage Examples:

const fs = require('fs');
const Papa = require('papaparse');

// Parse file stream
const fileStream = fs.createReadStream('large-data.csv');
Papa.parse(fileStream, {
  header: true,
  encoding: 'utf8',
  step: function(result) {
    // Process each row
    console.log('Stream row:', result.data);
  },
  complete: function(results) {
    console.log('Stream parsing completed');
  },
  error: function(error) {
    console.error('Stream error:', error);
  }
});

// Parse HTTP response stream
const https = require('https');
https.get('https://example.com/data.csv', (response) => {
  Papa.parse(response, {
    header: true,
    step: function(result) {
      console.log('HTTP stream row:', result.data);
    }
  });
});

Duplex Stream Mode

Create a duplex stream for use with Node.js .pipe() operations.

/**
 * Create duplex stream for piping
 * @param streamInput - Special constant Papa.NODE_STREAM_INPUT
 * @param config - Stream configuration
 * @returns Node.js Duplex Stream
 */
Papa.parse(streamInput: 1, config: {
  header?: boolean;
  delimiter?: string;
  // Note: step, complete, worker not available in duplex mode
}): NodeJS.ReadWriteStream;

Usage Examples:

const fs = require('fs');
const Papa = require('papaparse');

// Create duplex stream
const parseStream = Papa.parse(Papa.NODE_STREAM_INPUT, {
  header: true,
  delimiter: ','
});

// Handle parsed data
parseStream.on('data', function(chunk) {
  console.log('Parsed chunk:', chunk);
});

parseStream.on('end', function() {
  console.log('Stream parsing completed');
});

parseStream.on('error', function(error) {
  console.error('Stream error:', error);
});

// Pipe file through parser
fs.createReadStream('input.csv')
  .pipe(parseStream)
  .pipe(fs.createWriteStream('output.json'));

Progress Tracking

Track parsing progress for large files with user feedback.

interface ProgressConfig {
  step?: (result: ParseResult, parser: ParserHandle) => void;
  chunk?: (result: ParseResult, parser: ParserHandle) => void;
  beforeFirstChunk?: (chunk: string) => string; // Pre-process first chunk
}

interface ParseMeta {
  cursor: number;                 // Current parsing position
  aborted: boolean;               // Whether parsing was aborted
  truncated: boolean;             // Whether data was truncated
}

Usage Examples:

const progressBar = document.getElementById('progress');
const statusText = document.getElementById('status');
let fileSize = 0;

// Get file size for progress calculation
fileInput.addEventListener('change', function(event) {
  const file = event.target.files[0];
  fileSize = file.size;
  
  Papa.parse(file, {
    header: true,
    step: function(result, parser) {
      // Calculate progress
      const progress = (result.meta.cursor / fileSize) * 100;
      progressBar.value = progress;
      statusText.textContent = `Processing: ${progress.toFixed(1)}%`;
      
      // Process row
      processRow(result.data);
    },
    complete: function(results) {
      progressBar.value = 100;
      statusText.textContent = 'Completed!';
      console.log('Final results:', results);
    }
  });
});

Error Recovery

Handle errors gracefully during streaming operations.

interface ErrorHandling {
  error?: (error: ParseError, file?: File) => void;
}

Usage Examples:

Papa.parse(file, {
  header: true,
  step: function(result, parser) {
    // Check for row-level errors
    if (result.errors.length > 0) {
      console.warn('Row errors:', result.errors);
      // Continue processing despite errors
    }
    
    // Process valid data
    if (result.data && Object.keys(result.data).length > 0) {
      processRow(result.data);
    }
  },
  error: function(error, file) {
    console.error('Fatal parse error:', error);
    console.error('File:', file ? file.name : 'unknown');
    
    // Handle different error types
    switch (error.code) {
      case 'NetworkError':
        showMessage('Network error: Please check your connection');
        break;
      case 'AbortError':
        showMessage('Parsing was cancelled');
        break;
      default:
        showMessage('An error occurred while parsing the file');
    }
  },
  complete: function(results) {
    // Show summary including any errors
    console.log('Parsing completed with', results.errors.length, 'errors');
    showSummary(results);
  }
});

Memory Management

Optimize memory usage for very large files.

interface MemoryConfig {
  chunkSize?: number;             // Control memory usage with chunk size
  step?: (result: ParseResult, parser: ParserHandle) => void; // Process immediately
}

Usage Examples:

// Process huge files with minimal memory footprint
Papa.parse(massiveFile, {
  header: true,
  chunkSize: 1024 * 1024, // 1MB chunks for memory efficiency
  step: function(result, parser) {
    // Process and discard each row immediately
    const processedRow = transformRow(result.data);
    saveToDatabase(processedRow);
    
    // Row data can be garbage collected after this
  },
  complete: function(results) {
    // Only metadata is retained, not all the data
    console.log('Processed file with minimal memory usage');
    console.log('Total rows processed:', results.meta.cursor);
  }
});

function transformRow(row) {
  // Transform row data as needed
  return {
    id: parseInt(row.id),
    name: row.name.trim(),
    processed_at: new Date()
  };
}

Install with Tessl CLI