tessl/npm-papaparse

Fast and powerful CSV parser for the browser that supports web workers and streaming large files.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Web Workers

Name: tessl/npm-papaparse
Author: tessl

Background parsing using web workers to prevent UI blocking during large file processing. Automatically creates and manages worker threads with message passing for seamless integration.

Capabilities

Worker-Based Parsing

Enable background processing for large CSV files without blocking the main thread.

/**
 * Parse CSV using web worker
 * @param input - CSV string, File object, or URL
 * @param config - Configuration with worker: true
 */
Papa.parse(input: string | File, config: {
  worker: true;
  step?: (result: ParseResult) => void;
  chunk?: (result: ParseResult) => void;
  complete?: (result: ParseResult) => void;
  error?: (error: ParseError) => void;
  // Other parsing options...
}): void;

Usage Examples:

// Parse large file in background
const largeFile = document.getElementById('csvFile').files[0];

Papa.parse(largeFile, {
  worker: true,          // Enable web worker
  header: true,
  dynamicTyping: true,
  step: function(result) {
    // This callback runs on main thread
    console.log('Worker parsed row:', result.data);
    updateUI(result.data);
  },
  complete: function(results) {
    console.log('Worker completed parsing');
    console.log('Total rows:', results.data.length);
    hideLoadingSpinner();
  },
  error: function(error) {
    console.error('Worker error:', error);
    showErrorMessage('Failed to parse file');
  }
});

// UI remains responsive during parsing
function updateUI(rowData) {
  // Update progress bar, add rows to table, etc.
  // This runs smoothly without blocking
}

Worker Compatibility Check

Check if web workers are supported in the current environment.

Papa.WORKERS_SUPPORTED: boolean; // True if web workers are available

Usage Examples:

if (Papa.WORKERS_SUPPORTED) {
  // Use worker-based parsing for better performance
  Papa.parse(largeFile, {
    worker: true,
    header: true,
    complete: function(results) {
      console.log('Parsed with worker:', results.data.length, 'rows');
    }
  });
} else {
  // Fall back to main thread parsing
  console.warn('Web Workers not supported, using main thread');
  Papa.parse(largeFile, {
    header: true,
    complete: function(results) {
      console.log('Parsed on main thread:', results.data.length, 'rows');
    }
  });
}

Large File Processing

Process very large CSV files without freezing the browser interface.

interface WorkerConfig {
  worker: true;                   // Enable web worker processing
  step?: (result: ParseResult) => void; // Process rows as they're parsed
  chunk?: (result: ParseResult) => void; // Process data in chunks
}

Usage Examples:

// Process multi-megabyte CSV file
const hugeFile = event.target.files[0]; // 100MB+ CSV file

// Show progress indicator
showProgressSpinner('Processing large file...');

Papa.parse(hugeFile, {
  worker: true,
  header: true,
  step: function(result) {
    // Process each row without blocking UI
    processRowData(result.data);
    
    // Update progress counter
    updateRowCounter();
  },
  chunk: function(result) {
    // Update progress based on chunk size
    const progress = (result.meta.cursor / hugeFile.size) * 100;
    updateProgressBar(progress);
  },
  complete: function(results) {
    hideProgressSpinner();
    showCompletionMessage(`Processed ${results.data.length} rows`);
  },
  error: function(error) {
    hideProgressSpinner();
    showErrorMessage('Failed to process file: ' + error.message);
  }
});

// User can continue interacting with the page while file processes

Worker Thread Management

PapaParse automatically manages worker thread lifecycle.

interface WorkerManagement {
  // These properties are managed internally by PapaParse
  Papa.WORKER_ID?: number;        // Current worker ID (set automatically)
  Papa.BLOB_URL?: string;         // Worker blob URL (created automatically)
}

Usage Examples:

// Multiple simultaneous worker operations
const files = Array.from(document.getElementById('multipleFiles').files);

files.forEach((file, index) => {
  Papa.parse(file, {
    worker: true,
    header: true,
    complete: function(results) {
      console.log(`File ${index + 1} completed:`, results.data.length, 'rows');
      
      // Each worker gets its own ID automatically
      console.log('Worker ID was:', Papa.WORKER_ID);
    }
  });
});

// PapaParse handles creating and cleaning up workers automatically

Worker Error Handling

Handle errors that occur in worker threads.

interface WorkerErrorHandling {
  error?: (error: ParseError) => void; // Worker error callback
}

interface WorkerError extends ParseError {
  type: 'WorkerError';            // Error occurred in worker thread
  code: string;                   // Specific error code
  message: string;                // Error description
}

Usage Examples:

Papa.parse(problematicFile, {
  worker: true,
  header: true,
  error: function(error) {
    // Handle different types of worker errors
    switch (error.type) {
      case 'WorkerError':
        console.error('Worker thread error:', error.message);
        showUserMessage('Background processing failed');
        break;
      case 'NetworkError':
        console.error('Network error in worker:', error.message);
        showUserMessage('Failed to download file');
        break;
      default:
        console.error('Parse error in worker:', error);
        showUserMessage('File parsing failed');
    }
    
    // Optionally retry without worker
    retryWithoutWorker();
  }
});

function retryWithoutWorker() {
  console.log('Retrying without web worker...');
  Papa.parse(problematicFile, {
    worker: false, // Disable worker for retry
    header: true,
    complete: function(results) {
      console.log('Retry successful:', results.data.length, 'rows');
    }
  });
}

Performance Optimization

Optimize worker usage for different file sizes and processing needs.

interface WorkerOptimization {
  worker: true;
  chunkSize?: number;             // Optimize chunk size for worker communication
  step?: (result: ParseResult) => void; // Minimize data transfer
}

Usage Examples:

function parseWithOptimalWorkerConfig(file) {
  const fileSize = file.size;
  let config = {
    worker: true,
    header: true
  };
  
  if (fileSize > 50 * 1024 * 1024) { // Files larger than 50MB
    // Use larger chunks to reduce worker communication overhead
    config.chunkSize = Papa.RemoteChunkSize * 2; // 10MB chunks
    config.chunk = function(result) {
      // Process larger chunks less frequently
      processBigChunk(result.data);
    };
  } else if (fileSize > 5 * 1024 * 1024) { // Files 5-50MB
    // Use step processing for better progress feedback
    config.step = function(result) {
      processRowWithProgress(result.data);
    };
  } else {
    // Small files - just use worker for consistency
    config.complete = function(results) {
      processAllData(results.data);
    };
  }
  
  Papa.parse(file, config);
}

function processBigChunk(chunkData) {
  // Efficiently process large chunks
  const batchSize = 1000;
  for (let i = 0; i < chunkData.length; i += batchSize) {
    const batch = chunkData.slice(i, i + batchSize);
    processBatch(batch);
  }
}

Worker Limitations

Understanding what features are available in worker mode.

interface WorkerLimitations {
  // These features are NOT available when worker: true
  // - Direct File object manipulation
  // - DOM access from callbacks
  // - Synchronous operations
  // - Some advanced streaming features
}

Usage Examples:

// Features that work in worker mode
Papa.parse(file, {
  worker: true,
  
  // ✅ These work in workers
  header: true,
  delimiter: ',',
  dynamicTyping: true,
  transformHeader: function(header) {
    return header.toLowerCase();
  },
  transform: function(value, field) {
    return value.trim();
  },
  
  // ✅ Callbacks work (run on main thread)
  step: function(result) {
    // Process data on main thread
    updateDatabase(result.data);
  },
  
  complete: function(results) {
    // Final processing on main thread
    showResults(results.data);
  }
});

// Features that don't work in worker mode
Papa.parse(file, {
  worker: true,
  
  // ❌ These don't work with workers
  // download: true,        // Network requests not supported
  // withCredentials: true, // Credentials not supported
  // Custom File handling    // Limited File object access
});

jQuery Integration with Workers

Use PapaParse workers with jQuery file input processing.

// jQuery plugin supports worker mode
$('#fileInput').parse({
  config: {
    worker: true,
    header: true,
    complete: function(results, file) {
      console.log('Worker parsed file:', file.name);
      console.log('Results:', results.data.length, 'rows');
    }
  }
});

Usage Examples:

// jQuery multiple file processing with workers
$('#multipleFileInput').parse({
  config: {
    worker: true,
    header: true,
    step: function(result) {
      // Each file processes in its own worker
      addRowToTable(result.data);
    }
  },
  complete: function() {
    // All files completed
    console.log('All files processed by workers');
    enableDownloadButton();
  },
  error: function(error, file) {
    console.error('Worker error for file', file.name, ':', error);
  },
  before: function(file) {
    // Check file size and conditionally use worker
    if (file.size < 1024 * 1024) { // Less than 1MB
      return { config: { worker: false } }; // Use main thread
    }
    // Use worker for larger files (default config)
  }
});

Browser Compatibility

Web worker support varies by browser and environment.

// Check comprehensive worker support
function checkWorkerSupport() {
  return Papa.WORKERS_SUPPORTED && 
         typeof Worker !== 'undefined' && 
         typeof Blob !== 'undefined' && 
         typeof URL !== 'undefined';
}

Usage Examples:

function parseWithBestAvailableMethod(file) {
  if (checkWorkerSupport()) {
    console.log('Using web worker for optimal performance');
    Papa.parse(file, {
      worker: true,
      header: true,
      complete: handleResults
    });
  } else {
    console.log('Web workers not fully supported, using main thread');
    
    // Use chunked processing to prevent blocking
    Papa.parse(file, {
      header: true,
      chunk: function(result) {
        // Process in smaller chunks to maintain responsiveness
        setTimeout(() => processChunk(result.data), 0);
      },
      complete: handleResults
    });
  }
}

function processChunk(chunkData) {
  // Process chunk with timeout to yield control
  chunkData.forEach((row, index) => {
    if (index % 100 === 0) {
      // Yield control every 100 rows
      setTimeout(() => processRow(row), 0);
    } else {
      processRow(row);
    }
  });
}

Install with Tessl CLI