0
# File Streaming
1
2
High-performance streaming capabilities for processing large CSV files with chunk-based processing, progress callbacks, and memory-efficient parsing. Supports local files, remote URLs, and Node.js streams.
3
4
## Capabilities
5
6
### File Upload Processing
7
8
Parse files directly from HTML file input elements with progress tracking.
9
10
```javascript { .api }
11
/**
12
* Parse File object with streaming support
13
* @param file - Browser File object from input element
14
* @param config - Configuration with streaming callbacks
15
*/
16
Papa.parse(file: File, config: {
17
step?: (result: ParseResult, parser: ParserHandle) => void;
18
chunk?: (result: ParseResult, parser: ParserHandle) => void;
19
complete?: (result: ParseResult) => void;
20
error?: (error: ParseError) => void;
21
}): void;
22
```
23
24
**Usage Examples:**
25
26
```javascript
27
// HTML file input processing
28
const fileInput = document.getElementById('csvFile');
29
fileInput.addEventListener('change', function(event) {
30
const file = event.target.files[0];
31
32
Papa.parse(file, {
33
header: true,
34
step: function(result, parser) {
35
// Process each row as it's parsed
36
console.log('Row parsed:', result.data);
37
38
// Optionally pause parsing
39
if (someCondition) {
40
parser.pause();
41
// Resume later with parser.resume()
42
}
43
},
44
complete: function(results) {
45
console.log('File parsing completed');
46
console.log('Total rows:', results.data.length);
47
},
48
error: function(error) {
49
console.error('Parse error:', error);
50
}
51
});
52
});
53
```
54
55
### Remote File Download
56
57
Download and parse CSV files from URLs with automatic streaming.
58
59
```javascript { .api }
60
/**
61
* Download and parse CSV from URL
62
* @param url - URL string pointing to CSV file
63
* @param config - Configuration with download options
64
*/
65
Papa.parse(url: string, config: {
66
download: true;
67
downloadRequestHeaders?: { [key: string]: string };
68
downloadRequestBody?: string | FormData;
69
withCredentials?: boolean;
70
step?: (result: ParseResult, parser: ParserHandle) => void;
71
chunk?: (result: ParseResult, parser: ParserHandle) => void;
72
complete?: (result: ParseResult) => void;
73
error?: (error: ParseError) => void;
74
}): void;
75
```
76
77
**Usage Examples:**
78
79
```javascript
80
// Download and stream parse remote CSV
81
Papa.parse('https://example.com/data.csv', {
82
download: true,
83
header: true,
84
step: function(result, parser) {
85
// Process each row as it streams
86
console.log('Streaming row:', result.data);
87
},
88
complete: function(results) {
89
console.log('Download and parsing completed');
90
},
91
error: function(error) {
92
console.error('Download or parse error:', error);
93
}
94
});
95
96
// Download with custom headers and credentials
97
Papa.parse('https://api.example.com/export.csv', {
98
download: true,
99
downloadRequestHeaders: {
100
'Authorization': 'Bearer token123',
101
'Content-Type': 'application/json'
102
},
103
downloadRequestBody: JSON.stringify({ format: 'csv' }),
104
withCredentials: true,
105
header: true,
106
step: function(result) {
107
console.log('Authenticated data:', result.data);
108
}
109
});
110
```
111
112
### Step-by-Step Processing
113
114
Process CSV data row by row as it's parsed for memory efficiency.
115
116
```javascript { .api }
117
interface StepConfig {
118
step?: (result: ParseResult, parser: ParserHandle) => void; // Row-by-row callback
119
}
120
121
interface ParserHandle {
122
pause(): void; // Pause parsing
123
resume(): void; // Resume parsing
124
abort(): void; // Abort parsing
125
}
126
```
127
128
**Usage Examples:**
129
130
```javascript
131
let processedCount = 0;
132
const maxRows = 1000;
133
134
Papa.parse(largeCsvFile, {
135
header: true,
136
step: function(result, parser) {
137
// Process each row individually
138
const row = result.data;
139
140
// Validate and process row
141
if (isValidRow(row)) {
142
processRow(row);
143
processedCount++;
144
}
145
146
// Pause after processing certain number of rows
147
if (processedCount >= maxRows) {
148
parser.pause();
149
150
// Show progress to user, then resume
151
showProgress(processedCount);
152
setTimeout(() => parser.resume(), 100);
153
}
154
155
// Abort on error condition
156
if (result.errors.length > 0) {
157
console.error('Row errors:', result.errors);
158
parser.abort();
159
}
160
},
161
complete: function(results) {
162
console.log('Processing completed:', processedCount, 'rows');
163
}
164
});
165
166
function isValidRow(row) {
167
return row && row.name && row.email;
168
}
169
170
function processRow(row) {
171
// Process individual row (save to database, etc.)
172
console.log('Processing:', row.name);
173
}
174
```
175
176
### Chunk Processing
177
178
Process data in larger chunks for better performance with very large files.
179
180
```javascript { .api }
181
interface ChunkConfig {
182
chunk?: (result: ParseResult, parser: ParserHandle) => void; // Chunk callback
183
chunkSize?: number; // Chunk size in bytes
184
}
185
```
186
187
**Usage Examples:**
188
189
```javascript
190
Papa.parse(hugeFile, {
191
header: true,
192
chunkSize: Papa.LocalChunkSize, // 10MB chunks
193
chunk: function(result, parser) {
194
// Process entire chunk at once
195
console.log('Chunk received:', result.data.length, 'rows');
196
197
// Batch process rows in chunk
198
processBatch(result.data);
199
200
// Show progress
201
updateProgressBar(result.meta.cursor);
202
203
// Pause processing if needed
204
if (shouldPauseProcessing()) {
205
parser.pause();
206
// Resume when ready
207
setTimeout(() => parser.resume(), 1000);
208
}
209
},
210
complete: function(results) {
211
console.log('All chunks processed');
212
}
213
});
214
215
function processBatch(rows) {
216
// Efficiently process batch of rows
217
const validRows = rows.filter(isValidRow);
218
saveBatchToDatabase(validRows);
219
}
220
```
221
222
### Node.js Stream Support
223
224
Use Node.js Readable Streams as input for server-side processing.
225
226
```javascript { .api }
227
/**
228
* Parse Node.js ReadableStream
229
* @param stream - Node.js ReadableStream
230
* @param config - Configuration for stream parsing
231
*/
232
Papa.parse(stream: NodeJS.ReadableStream, config: {
233
encoding?: string; // Character encoding
234
step?: (result: ParseResult) => void;
235
complete?: (result: ParseResult) => void;
236
error?: (error: ParseError) => void;
237
}): void;
238
```
239
240
**Usage Examples:**
241
242
```javascript
243
const fs = require('fs');
244
const Papa = require('papaparse');
245
246
// Parse file stream
247
const fileStream = fs.createReadStream('large-data.csv');
248
Papa.parse(fileStream, {
249
header: true,
250
encoding: 'utf8',
251
step: function(result) {
252
// Process each row
253
console.log('Stream row:', result.data);
254
},
255
complete: function(results) {
256
console.log('Stream parsing completed');
257
},
258
error: function(error) {
259
console.error('Stream error:', error);
260
}
261
});
262
263
// Parse HTTP response stream
264
const https = require('https');
265
https.get('https://example.com/data.csv', (response) => {
266
Papa.parse(response, {
267
header: true,
268
step: function(result) {
269
console.log('HTTP stream row:', result.data);
270
}
271
});
272
});
273
```
274
275
### Duplex Stream Mode
276
277
Create a duplex stream for use with Node.js `.pipe()` operations.
278
279
```javascript { .api }
280
/**
281
* Create duplex stream for piping
282
* @param streamInput - Special constant Papa.NODE_STREAM_INPUT
283
* @param config - Stream configuration
284
* @returns Node.js Duplex Stream
285
*/
286
Papa.parse(streamInput: 1, config: {
287
header?: boolean;
288
delimiter?: string;
289
// Note: step, complete, worker not available in duplex mode
290
}): NodeJS.ReadWriteStream;
291
```
292
293
**Usage Examples:**
294
295
```javascript
296
const fs = require('fs');
297
const Papa = require('papaparse');
298
299
// Create duplex stream
300
const parseStream = Papa.parse(Papa.NODE_STREAM_INPUT, {
301
header: true,
302
delimiter: ','
303
});
304
305
// Handle parsed data
306
parseStream.on('data', function(chunk) {
307
console.log('Parsed chunk:', chunk);
308
});
309
310
parseStream.on('end', function() {
311
console.log('Stream parsing completed');
312
});
313
314
parseStream.on('error', function(error) {
315
console.error('Stream error:', error);
316
});
317
318
// Pipe file through parser
319
fs.createReadStream('input.csv')
320
.pipe(parseStream)
321
.pipe(fs.createWriteStream('output.json'));
322
```
323
324
### Progress Tracking
325
326
Track parsing progress for large files with user feedback.
327
328
```javascript { .api }
329
interface ProgressConfig {
330
step?: (result: ParseResult, parser: ParserHandle) => void;
331
chunk?: (result: ParseResult, parser: ParserHandle) => void;
332
beforeFirstChunk?: (chunk: string) => string; // Pre-process first chunk
333
}
334
335
interface ParseMeta {
336
cursor: number; // Current parsing position
337
aborted: boolean; // Whether parsing was aborted
338
truncated: boolean; // Whether data was truncated
339
}
340
```
341
342
**Usage Examples:**
343
344
```javascript
345
const progressBar = document.getElementById('progress');
346
const statusText = document.getElementById('status');
347
let fileSize = 0;
348
349
// Get file size for progress calculation
350
fileInput.addEventListener('change', function(event) {
351
const file = event.target.files[0];
352
fileSize = file.size;
353
354
Papa.parse(file, {
355
header: true,
356
step: function(result, parser) {
357
// Calculate progress
358
const progress = (result.meta.cursor / fileSize) * 100;
359
progressBar.value = progress;
360
statusText.textContent = `Processing: ${progress.toFixed(1)}%`;
361
362
// Process row
363
processRow(result.data);
364
},
365
complete: function(results) {
366
progressBar.value = 100;
367
statusText.textContent = 'Completed!';
368
console.log('Final results:', results);
369
}
370
});
371
});
372
```
373
374
### Error Recovery
375
376
Handle errors gracefully during streaming operations.
377
378
```javascript { .api }
379
interface ErrorHandling {
380
error?: (error: ParseError, file?: File) => void;
381
}
382
```
383
384
**Usage Examples:**
385
386
```javascript
387
Papa.parse(file, {
388
header: true,
389
step: function(result, parser) {
390
// Check for row-level errors
391
if (result.errors.length > 0) {
392
console.warn('Row errors:', result.errors);
393
// Continue processing despite errors
394
}
395
396
// Process valid data
397
if (result.data && Object.keys(result.data).length > 0) {
398
processRow(result.data);
399
}
400
},
401
error: function(error, file) {
402
console.error('Fatal parse error:', error);
403
console.error('File:', file ? file.name : 'unknown');
404
405
// Handle different error types
406
switch (error.code) {
407
case 'NetworkError':
408
showMessage('Network error: Please check your connection');
409
break;
410
case 'AbortError':
411
showMessage('Parsing was cancelled');
412
break;
413
default:
414
showMessage('An error occurred while parsing the file');
415
}
416
},
417
complete: function(results) {
418
// Show summary including any errors
419
console.log('Parsing completed with', results.errors.length, 'errors');
420
showSummary(results);
421
}
422
});
423
```
424
425
### Memory Management
426
427
Optimize memory usage for very large files.
428
429
```javascript { .api }
430
interface MemoryConfig {
431
chunkSize?: number; // Control memory usage with chunk size
432
step?: (result: ParseResult, parser: ParserHandle) => void; // Process immediately
433
}
434
```
435
436
**Usage Examples:**
437
438
```javascript
439
// Process huge files with minimal memory footprint
440
Papa.parse(massiveFile, {
441
header: true,
442
chunkSize: 1024 * 1024, // 1MB chunks for memory efficiency
443
step: function(result, parser) {
444
// Process and discard each row immediately
445
const processedRow = transformRow(result.data);
446
saveToDatabase(processedRow);
447
448
// Row data can be garbage collected after this
449
},
450
complete: function(results) {
451
// Only metadata is retained, not all the data
452
console.log('Processed file with minimal memory usage');
453
console.log('Total rows processed:', results.meta.cursor);
454
}
455
});
456
457
function transformRow(row) {
458
// Transform row data as needed
459
return {
460
id: parseInt(row.id),
461
name: row.name.trim(),
462
processed_at: new Date()
463
};
464
}
465
```