Tessl Tile for npm/papaparse@5.5.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

index.md parsing.md streaming.md unparsing.md workers.md

streaming.mddocs/

0
# File Streaming
1

2
High-performance streaming capabilities for processing large CSV files with chunk-based processing, progress callbacks, and memory-efficient parsing. Supports local files, remote URLs, and Node.js streams.
3

4
## Capabilities
5

6
### File Upload Processing
7

8
Parse files directly from HTML file input elements with progress tracking.
9

10
```javascript { .api }
11
/**
12
 * Parse File object with streaming support
13
 * @param file - Browser File object from input element
14
 * @param config - Configuration with streaming callbacks
15
 */
16
Papa.parse(file: File, config: {
17
  step?: (result: ParseResult, parser: ParserHandle) => void;
18
  chunk?: (result: ParseResult, parser: ParserHandle) => void;
19
  complete?: (result: ParseResult) => void;
20
  error?: (error: ParseError) => void;
21
}): void;
22
```
23

24
**Usage Examples:**
25

26
```javascript
27
// HTML file input processing
28
const fileInput = document.getElementById('csvFile');
29
fileInput.addEventListener('change', function(event) {
30
  const file = event.target.files[0];
31
  
32
  Papa.parse(file, {
33
    header: true,
34
    step: function(result, parser) {
35
      // Process each row as it's parsed
36
      console.log('Row parsed:', result.data);
37
      
38
      // Optionally pause parsing
39
      if (someCondition) {
40
        parser.pause();
41
        // Resume later with parser.resume()
42
      }
43
    },
44
    complete: function(results) {
45
      console.log('File parsing completed');
46
      console.log('Total rows:', results.data.length);
47
    },
48
    error: function(error) {
49
      console.error('Parse error:', error);
50
    }
51
  });
52
});
53
```
54

55
### Remote File Download
56

57
Download and parse CSV files from URLs with automatic streaming.
58

59
```javascript { .api }
60
/**
61
 * Download and parse CSV from URL
62
 * @param url - URL string pointing to CSV file
63
 * @param config - Configuration with download options
64
 */
65
Papa.parse(url: string, config: {
66
  download: true;
67
  downloadRequestHeaders?: { [key: string]: string };
68
  downloadRequestBody?: string | FormData;
69
  withCredentials?: boolean;
70
  step?: (result: ParseResult, parser: ParserHandle) => void;
71
  chunk?: (result: ParseResult, parser: ParserHandle) => void;
72
  complete?: (result: ParseResult) => void;
73
  error?: (error: ParseError) => void;
74
}): void;
75
```
76

77
**Usage Examples:**
78

79
```javascript
80
// Download and stream parse remote CSV
81
Papa.parse('https://example.com/data.csv', {
82
  download: true,
83
  header: true,
84
  step: function(result, parser) {
85
    // Process each row as it streams
86
    console.log('Streaming row:', result.data);
87
  },
88
  complete: function(results) {
89
    console.log('Download and parsing completed');
90
  },
91
  error: function(error) {
92
    console.error('Download or parse error:', error);
93
  }
94
});
95

96
// Download with custom headers and credentials
97
Papa.parse('https://api.example.com/export.csv', {
98
  download: true,
99
  downloadRequestHeaders: {
100
    'Authorization': 'Bearer token123',
101
    'Content-Type': 'application/json'
102
  },
103
  downloadRequestBody: JSON.stringify({ format: 'csv' }),
104
  withCredentials: true,
105
  header: true,
106
  step: function(result) {
107
    console.log('Authenticated data:', result.data);
108
  }
109
});
110
```
111

112
### Step-by-Step Processing
113

114
Process CSV data row by row as it's parsed for memory efficiency.
115

116
```javascript { .api }
117
interface StepConfig {
118
  step?: (result: ParseResult, parser: ParserHandle) => void; // Row-by-row callback
119
}
120

121
interface ParserHandle {
122
  pause(): void;                  // Pause parsing
123
  resume(): void;                 // Resume parsing
124
  abort(): void;                  // Abort parsing
125
}
126
```
127

128
**Usage Examples:**
129

130
```javascript
131
let processedCount = 0;
132
const maxRows = 1000;
133

134
Papa.parse(largeCsvFile, {
135
  header: true,
136
  step: function(result, parser) {
137
    // Process each row individually
138
    const row = result.data;
139
    
140
    // Validate and process row
141
    if (isValidRow(row)) {
142
      processRow(row);
143
      processedCount++;
144
    }
145
    
146
    // Pause after processing certain number of rows
147
    if (processedCount >= maxRows) {
148
      parser.pause();
149
      
150
      // Show progress to user, then resume
151
      showProgress(processedCount);
152
      setTimeout(() => parser.resume(), 100);
153
    }
154
    
155
    // Abort on error condition
156
    if (result.errors.length > 0) {
157
      console.error('Row errors:', result.errors);
158
      parser.abort();
159
    }
160
  },
161
  complete: function(results) {
162
    console.log('Processing completed:', processedCount, 'rows');
163
  }
164
});
165

166
function isValidRow(row) {
167
  return row && row.name && row.email;
168
}
169

170
function processRow(row) {
171
  // Process individual row (save to database, etc.)
172
  console.log('Processing:', row.name);
173
}
174
```
175

176
### Chunk Processing
177

178
Process data in larger chunks for better performance with very large files.
179

180
```javascript { .api }
181
interface ChunkConfig {
182
  chunk?: (result: ParseResult, parser: ParserHandle) => void; // Chunk callback
183
  chunkSize?: number;             // Chunk size in bytes
184
}
185
```
186

187
**Usage Examples:**
188

189
```javascript
190
Papa.parse(hugeFile, {
191
  header: true,
192
  chunkSize: Papa.LocalChunkSize, // 10MB chunks
193
  chunk: function(result, parser) {
194
    // Process entire chunk at once
195
    console.log('Chunk received:', result.data.length, 'rows');
196
    
197
    // Batch process rows in chunk
198
    processBatch(result.data);
199
    
200
    // Show progress
201
    updateProgressBar(result.meta.cursor);
202
    
203
    // Pause processing if needed
204
    if (shouldPauseProcessing()) {
205
      parser.pause();
206
      // Resume when ready
207
      setTimeout(() => parser.resume(), 1000);
208
    }
209
  },
210
  complete: function(results) {
211
    console.log('All chunks processed');
212
  }
213
});
214

215
function processBatch(rows) {
216
  // Efficiently process batch of rows
217
  const validRows = rows.filter(isValidRow);
218
  saveBatchToDatabase(validRows);
219
}
220
```
221

222
### Node.js Stream Support
223

224
Use Node.js Readable Streams as input for server-side processing.
225

226
```javascript { .api }
227
/**
228
 * Parse Node.js ReadableStream
229
 * @param stream - Node.js ReadableStream
230
 * @param config - Configuration for stream parsing
231
 */
232
Papa.parse(stream: NodeJS.ReadableStream, config: {
233
  encoding?: string;              // Character encoding
234
  step?: (result: ParseResult) => void;
235
  complete?: (result: ParseResult) => void;
236
  error?: (error: ParseError) => void;
237
}): void;
238
```
239

240
**Usage Examples:**
241

242
```javascript
243
const fs = require('fs');
244
const Papa = require('papaparse');
245

246
// Parse file stream
247
const fileStream = fs.createReadStream('large-data.csv');
248
Papa.parse(fileStream, {
249
  header: true,
250
  encoding: 'utf8',
251
  step: function(result) {
252
    // Process each row
253
    console.log('Stream row:', result.data);
254
  },
255
  complete: function(results) {
256
    console.log('Stream parsing completed');
257
  },
258
  error: function(error) {
259
    console.error('Stream error:', error);
260
  }
261
});
262

263
// Parse HTTP response stream
264
const https = require('https');
265
https.get('https://example.com/data.csv', (response) => {
266
  Papa.parse(response, {
267
    header: true,
268
    step: function(result) {
269
      console.log('HTTP stream row:', result.data);
270
    }
271
  });
272
});
273
```
274

275
### Duplex Stream Mode
276

277
Create a duplex stream for use with Node.js `.pipe()` operations.
278

279
```javascript { .api }
280
/**
281
 * Create duplex stream for piping
282
 * @param streamInput - Special constant Papa.NODE_STREAM_INPUT
283
 * @param config - Stream configuration
284
 * @returns Node.js Duplex Stream
285
 */
286
Papa.parse(streamInput: 1, config: {
287
  header?: boolean;
288
  delimiter?: string;
289
  // Note: step, complete, worker not available in duplex mode
290
}): NodeJS.ReadWriteStream;
291
```
292

293
**Usage Examples:**
294

295
```javascript
296
const fs = require('fs');
297
const Papa = require('papaparse');
298

299
// Create duplex stream
300
const parseStream = Papa.parse(Papa.NODE_STREAM_INPUT, {
301
  header: true,
302
  delimiter: ','
303
});
304

305
// Handle parsed data
306
parseStream.on('data', function(chunk) {
307
  console.log('Parsed chunk:', chunk);
308
});
309

310
parseStream.on('end', function() {
311
  console.log('Stream parsing completed');
312
});
313

314
parseStream.on('error', function(error) {
315
  console.error('Stream error:', error);
316
});
317

318
// Pipe file through parser
319
fs.createReadStream('input.csv')
320
  .pipe(parseStream)
321
  .pipe(fs.createWriteStream('output.json'));
322
```
323

324
### Progress Tracking
325

326
Track parsing progress for large files with user feedback.
327

328
```javascript { .api }
329
interface ProgressConfig {
330
  step?: (result: ParseResult, parser: ParserHandle) => void;
331
  chunk?: (result: ParseResult, parser: ParserHandle) => void;
332
  beforeFirstChunk?: (chunk: string) => string; // Pre-process first chunk
333
}
334

335
interface ParseMeta {
336
  cursor: number;                 // Current parsing position
337
  aborted: boolean;               // Whether parsing was aborted
338
  truncated: boolean;             // Whether data was truncated
339
}
340
```
341

342
**Usage Examples:**
343

344
```javascript
345
const progressBar = document.getElementById('progress');
346
const statusText = document.getElementById('status');
347
let fileSize = 0;
348

349
// Get file size for progress calculation
350
fileInput.addEventListener('change', function(event) {
351
  const file = event.target.files[0];
352
  fileSize = file.size;
353
  
354
  Papa.parse(file, {
355
    header: true,
356
    step: function(result, parser) {
357
      // Calculate progress
358
      const progress = (result.meta.cursor / fileSize) * 100;
359
      progressBar.value = progress;
360
      statusText.textContent = `Processing: ${progress.toFixed(1)}%`;
361
      
362
      // Process row
363
      processRow(result.data);
364
    },
365
    complete: function(results) {
366
      progressBar.value = 100;
367
      statusText.textContent = 'Completed!';
368
      console.log('Final results:', results);
369
    }
370
  });
371
});
372
```
373

374
### Error Recovery
375

376
Handle errors gracefully during streaming operations.
377

378
```javascript { .api }
379
interface ErrorHandling {
380
  error?: (error: ParseError, file?: File) => void;
381
}
382
```
383

384
**Usage Examples:**
385

386
```javascript
387
Papa.parse(file, {
388
  header: true,
389
  step: function(result, parser) {
390
    // Check for row-level errors
391
    if (result.errors.length > 0) {
392
      console.warn('Row errors:', result.errors);
393
      // Continue processing despite errors
394
    }
395
    
396
    // Process valid data
397
    if (result.data && Object.keys(result.data).length > 0) {
398
      processRow(result.data);
399
    }
400
  },
401
  error: function(error, file) {
402
    console.error('Fatal parse error:', error);
403
    console.error('File:', file ? file.name : 'unknown');
404
    
405
    // Handle different error types
406
    switch (error.code) {
407
      case 'NetworkError':
408
        showMessage('Network error: Please check your connection');
409
        break;
410
      case 'AbortError':
411
        showMessage('Parsing was cancelled');
412
        break;
413
      default:
414
        showMessage('An error occurred while parsing the file');
415
    }
416
  },
417
  complete: function(results) {
418
    // Show summary including any errors
419
    console.log('Parsing completed with', results.errors.length, 'errors');
420
    showSummary(results);
421
  }
422
});
423
```
424

425
### Memory Management
426

427
Optimize memory usage for very large files.
428

429
```javascript { .api }
430
interface MemoryConfig {
431
  chunkSize?: number;             // Control memory usage with chunk size
432
  step?: (result: ParseResult, parser: ParserHandle) => void; // Process immediately
433
}
434
```
435

436
**Usage Examples:**
437

438
```javascript
439
// Process huge files with minimal memory footprint
440
Papa.parse(massiveFile, {
441
  header: true,
442
  chunkSize: 1024 * 1024, // 1MB chunks for memory efficiency
443
  step: function(result, parser) {
444
    // Process and discard each row immediately
445
    const processedRow = transformRow(result.data);
446
    saveToDatabase(processedRow);
447
    
448
    // Row data can be garbage collected after this
449
  },
450
  complete: function(results) {
451
    // Only metadata is retained, not all the data
452
    console.log('Processed file with minimal memory usage');
453
    console.log('Total rows processed:', results.meta.cursor);
454
  }
455
});
456

457
function transformRow(row) {
458
  // Transform row data as needed
459
  return {
460
    id: parseInt(row.id),
461
    name: row.name.trim(),
462
    processed_at: new Date()
463
  };
464
}
465
```

Version

Tile

Files

streaming.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

streaming.mddocs/