or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

index.mdparsing.mdstreaming.mdunparsing.mdworkers.md

streaming.mddocs/

0

# File Streaming

1

2

High-performance streaming capabilities for processing large CSV files with chunk-based processing, progress callbacks, and memory-efficient parsing. Supports local files, remote URLs, and Node.js streams.

3

4

## Capabilities

5

6

### File Upload Processing

7

8

Parse files directly from HTML file input elements with progress tracking.

9

10

```javascript { .api }

11

/**

12

* Parse File object with streaming support

13

* @param file - Browser File object from input element

14

* @param config - Configuration with streaming callbacks

15

*/

16

Papa.parse(file: File, config: {

17

step?: (result: ParseResult, parser: ParserHandle) => void;

18

chunk?: (result: ParseResult, parser: ParserHandle) => void;

19

complete?: (result: ParseResult) => void;

20

error?: (error: ParseError) => void;

21

}): void;

22

```

23

24

**Usage Examples:**

25

26

```javascript

27

// HTML file input processing

28

const fileInput = document.getElementById('csvFile');

29

fileInput.addEventListener('change', function(event) {

30

const file = event.target.files[0];

31

32

Papa.parse(file, {

33

header: true,

34

step: function(result, parser) {

35

// Process each row as it's parsed

36

console.log('Row parsed:', result.data);

37

38

// Optionally pause parsing

39

if (someCondition) {

40

parser.pause();

41

// Resume later with parser.resume()

42

}

43

},

44

complete: function(results) {

45

console.log('File parsing completed');

46

console.log('Total rows:', results.data.length);

47

},

48

error: function(error) {

49

console.error('Parse error:', error);

50

}

51

});

52

});

53

```

54

55

### Remote File Download

56

57

Download and parse CSV files from URLs with automatic streaming.

58

59

```javascript { .api }

60

/**

61

* Download and parse CSV from URL

62

* @param url - URL string pointing to CSV file

63

* @param config - Configuration with download options

64

*/

65

Papa.parse(url: string, config: {

66

download: true;

67

downloadRequestHeaders?: { [key: string]: string };

68

downloadRequestBody?: string | FormData;

69

withCredentials?: boolean;

70

step?: (result: ParseResult, parser: ParserHandle) => void;

71

chunk?: (result: ParseResult, parser: ParserHandle) => void;

72

complete?: (result: ParseResult) => void;

73

error?: (error: ParseError) => void;

74

}): void;

75

```

76

77

**Usage Examples:**

78

79

```javascript

80

// Download and stream parse remote CSV

81

Papa.parse('https://example.com/data.csv', {

82

download: true,

83

header: true,

84

step: function(result, parser) {

85

// Process each row as it streams

86

console.log('Streaming row:', result.data);

87

},

88

complete: function(results) {

89

console.log('Download and parsing completed');

90

},

91

error: function(error) {

92

console.error('Download or parse error:', error);

93

}

94

});

95

96

// Download with custom headers and credentials

97

Papa.parse('https://api.example.com/export.csv', {

98

download: true,

99

downloadRequestHeaders: {

100

'Authorization': 'Bearer token123',

101

'Content-Type': 'application/json'

102

},

103

downloadRequestBody: JSON.stringify({ format: 'csv' }),

104

withCredentials: true,

105

header: true,

106

step: function(result) {

107

console.log('Authenticated data:', result.data);

108

}

109

});

110

```

111

112

### Step-by-Step Processing

113

114

Process CSV data row by row as it's parsed for memory efficiency.

115

116

```javascript { .api }

117

interface StepConfig {

118

step?: (result: ParseResult, parser: ParserHandle) => void; // Row-by-row callback

119

}

120

121

interface ParserHandle {

122

pause(): void; // Pause parsing

123

resume(): void; // Resume parsing

124

abort(): void; // Abort parsing

125

}

126

```

127

128

**Usage Examples:**

129

130

```javascript

131

let processedCount = 0;

132

const maxRows = 1000;

133

134

Papa.parse(largeCsvFile, {

135

header: true,

136

step: function(result, parser) {

137

// Process each row individually

138

const row = result.data;

139

140

// Validate and process row

141

if (isValidRow(row)) {

142

processRow(row);

143

processedCount++;

144

}

145

146

// Pause after processing certain number of rows

147

if (processedCount >= maxRows) {

148

parser.pause();

149

150

// Show progress to user, then resume

151

showProgress(processedCount);

152

setTimeout(() => parser.resume(), 100);

153

}

154

155

// Abort on error condition

156

if (result.errors.length > 0) {

157

console.error('Row errors:', result.errors);

158

parser.abort();

159

}

160

},

161

complete: function(results) {

162

console.log('Processing completed:', processedCount, 'rows');

163

}

164

});

165

166

function isValidRow(row) {

167

return row && row.name && row.email;

168

}

169

170

function processRow(row) {

171

// Process individual row (save to database, etc.)

172

console.log('Processing:', row.name);

173

}

174

```

175

176

### Chunk Processing

177

178

Process data in larger chunks for better performance with very large files.

179

180

```javascript { .api }

181

interface ChunkConfig {

182

chunk?: (result: ParseResult, parser: ParserHandle) => void; // Chunk callback

183

chunkSize?: number; // Chunk size in bytes

184

}

185

```

186

187

**Usage Examples:**

188

189

```javascript

190

Papa.parse(hugeFile, {

191

header: true,

192

chunkSize: Papa.LocalChunkSize, // 10MB chunks

193

chunk: function(result, parser) {

194

// Process entire chunk at once

195

console.log('Chunk received:', result.data.length, 'rows');

196

197

// Batch process rows in chunk

198

processBatch(result.data);

199

200

// Show progress

201

updateProgressBar(result.meta.cursor);

202

203

// Pause processing if needed

204

if (shouldPauseProcessing()) {

205

parser.pause();

206

// Resume when ready

207

setTimeout(() => parser.resume(), 1000);

208

}

209

},

210

complete: function(results) {

211

console.log('All chunks processed');

212

}

213

});

214

215

function processBatch(rows) {

216

// Efficiently process batch of rows

217

const validRows = rows.filter(isValidRow);

218

saveBatchToDatabase(validRows);

219

}

220

```

221

222

### Node.js Stream Support

223

224

Use Node.js Readable Streams as input for server-side processing.

225

226

```javascript { .api }

227

/**

228

* Parse Node.js ReadableStream

229

* @param stream - Node.js ReadableStream

230

* @param config - Configuration for stream parsing

231

*/

232

Papa.parse(stream: NodeJS.ReadableStream, config: {

233

encoding?: string; // Character encoding

234

step?: (result: ParseResult) => void;

235

complete?: (result: ParseResult) => void;

236

error?: (error: ParseError) => void;

237

}): void;

238

```

239

240

**Usage Examples:**

241

242

```javascript

243

const fs = require('fs');

244

const Papa = require('papaparse');

245

246

// Parse file stream

247

const fileStream = fs.createReadStream('large-data.csv');

248

Papa.parse(fileStream, {

249

header: true,

250

encoding: 'utf8',

251

step: function(result) {

252

// Process each row

253

console.log('Stream row:', result.data);

254

},

255

complete: function(results) {

256

console.log('Stream parsing completed');

257

},

258

error: function(error) {

259

console.error('Stream error:', error);

260

}

261

});

262

263

// Parse HTTP response stream

264

const https = require('https');

265

https.get('https://example.com/data.csv', (response) => {

266

Papa.parse(response, {

267

header: true,

268

step: function(result) {

269

console.log('HTTP stream row:', result.data);

270

}

271

});

272

});

273

```

274

275

### Duplex Stream Mode

276

277

Create a duplex stream for use with Node.js `.pipe()` operations.

278

279

```javascript { .api }

280

/**

281

* Create duplex stream for piping

282

* @param streamInput - Special constant Papa.NODE_STREAM_INPUT

283

* @param config - Stream configuration

284

* @returns Node.js Duplex Stream

285

*/

286

Papa.parse(streamInput: 1, config: {

287

header?: boolean;

288

delimiter?: string;

289

// Note: step, complete, worker not available in duplex mode

290

}): NodeJS.ReadWriteStream;

291

```

292

293

**Usage Examples:**

294

295

```javascript

296

const fs = require('fs');

297

const Papa = require('papaparse');

298

299

// Create duplex stream

300

const parseStream = Papa.parse(Papa.NODE_STREAM_INPUT, {

301

header: true,

302

delimiter: ','

303

});

304

305

// Handle parsed data

306

parseStream.on('data', function(chunk) {

307

console.log('Parsed chunk:', chunk);

308

});

309

310

parseStream.on('end', function() {

311

console.log('Stream parsing completed');

312

});

313

314

parseStream.on('error', function(error) {

315

console.error('Stream error:', error);

316

});

317

318

// Pipe file through parser

319

fs.createReadStream('input.csv')

320

.pipe(parseStream)

321

.pipe(fs.createWriteStream('output.json'));

322

```

323

324

### Progress Tracking

325

326

Track parsing progress for large files with user feedback.

327

328

```javascript { .api }

329

interface ProgressConfig {

330

step?: (result: ParseResult, parser: ParserHandle) => void;

331

chunk?: (result: ParseResult, parser: ParserHandle) => void;

332

beforeFirstChunk?: (chunk: string) => string; // Pre-process first chunk

333

}

334

335

interface ParseMeta {

336

cursor: number; // Current parsing position

337

aborted: boolean; // Whether parsing was aborted

338

truncated: boolean; // Whether data was truncated

339

}

340

```

341

342

**Usage Examples:**

343

344

```javascript

345

const progressBar = document.getElementById('progress');

346

const statusText = document.getElementById('status');

347

let fileSize = 0;

348

349

// Get file size for progress calculation

350

fileInput.addEventListener('change', function(event) {

351

const file = event.target.files[0];

352

fileSize = file.size;

353

354

Papa.parse(file, {

355

header: true,

356

step: function(result, parser) {

357

// Calculate progress

358

const progress = (result.meta.cursor / fileSize) * 100;

359

progressBar.value = progress;

360

statusText.textContent = `Processing: ${progress.toFixed(1)}%`;

361

362

// Process row

363

processRow(result.data);

364

},

365

complete: function(results) {

366

progressBar.value = 100;

367

statusText.textContent = 'Completed!';

368

console.log('Final results:', results);

369

}

370

});

371

});

372

```

373

374

### Error Recovery

375

376

Handle errors gracefully during streaming operations.

377

378

```javascript { .api }

379

interface ErrorHandling {

380

error?: (error: ParseError, file?: File) => void;

381

}

382

```

383

384

**Usage Examples:**

385

386

```javascript

387

Papa.parse(file, {

388

header: true,

389

step: function(result, parser) {

390

// Check for row-level errors

391

if (result.errors.length > 0) {

392

console.warn('Row errors:', result.errors);

393

// Continue processing despite errors

394

}

395

396

// Process valid data

397

if (result.data && Object.keys(result.data).length > 0) {

398

processRow(result.data);

399

}

400

},

401

error: function(error, file) {

402

console.error('Fatal parse error:', error);

403

console.error('File:', file ? file.name : 'unknown');

404

405

// Handle different error types

406

switch (error.code) {

407

case 'NetworkError':

408

showMessage('Network error: Please check your connection');

409

break;

410

case 'AbortError':

411

showMessage('Parsing was cancelled');

412

break;

413

default:

414

showMessage('An error occurred while parsing the file');

415

}

416

},

417

complete: function(results) {

418

// Show summary including any errors

419

console.log('Parsing completed with', results.errors.length, 'errors');

420

showSummary(results);

421

}

422

});

423

```

424

425

### Memory Management

426

427

Optimize memory usage for very large files.

428

429

```javascript { .api }

430

interface MemoryConfig {

431

chunkSize?: number; // Control memory usage with chunk size

432

step?: (result: ParseResult, parser: ParserHandle) => void; // Process immediately

433

}

434

```

435

436

**Usage Examples:**

437

438

```javascript

439

// Process huge files with minimal memory footprint

440

Papa.parse(massiveFile, {

441

header: true,

442

chunkSize: 1024 * 1024, // 1MB chunks for memory efficiency

443

step: function(result, parser) {

444

// Process and discard each row immediately

445

const processedRow = transformRow(result.data);

446

saveToDatabase(processedRow);

447

448

// Row data can be garbage collected after this

449

},

450

complete: function(results) {

451

// Only metadata is retained, not all the data

452

console.log('Processed file with minimal memory usage');

453

console.log('Total rows processed:', results.meta.cursor);

454

}

455

});

456

457

function transformRow(row) {

458

// Transform row data as needed

459

return {

460

id: parseInt(row.id),

461

name: row.name.trim(),

462

processed_at: new Date()

463

};

464

}

465

```