0
# Streaming Operations
1
2
Node.js streaming interface for processing large datasets without loading entire files into memory. Ideal for server-side applications processing large spreadsheets, data exports, and real-time data transformation.
3
4
**Note: Streaming operations are only available in Node.js environments.**
5
6
## Capabilities
7
8
### CSV Streaming
9
10
Create readable streams that output CSV data from worksheets, enabling memory-efficient processing of large datasets.
11
12
```typescript { .api }
13
/**
14
* Create CSV readable stream from worksheet (Node.js only)
15
* @param sheet - WorkSheet to stream as CSV
16
* @param opts - CSV streaming options
17
* @returns Readable stream outputting CSV data
18
*/
19
function to_csv(sheet: WorkSheet, opts?: Sheet2CSVOpts): any;
20
21
interface Sheet2CSVOpts {
22
/** Field separator character */
23
FS?: string;
24
/** Record separator character */
25
RS?: string;
26
/** Date format string */
27
dateNF?: string;
28
/** Strip whitespace */
29
strip?: boolean;
30
/** Include blank rows */
31
blankrows?: boolean;
32
/** Skip empty rows */
33
skipEmpty?: boolean;
34
}
35
```
36
37
**Usage Examples:**
38
39
```typescript
40
import { stream, utils, readFile } from "xlsx";
41
import * as fs from "fs";
42
43
// Read large worksheet
44
const workbook = readFile("large-dataset.xlsx");
45
const worksheet = workbook.Sheets[workbook.SheetNames[0]];
46
47
// Create CSV stream
48
const csvStream = stream.to_csv(worksheet);
49
50
// Pipe to file
51
const writeStream = fs.createWriteStream("output.csv");
52
csvStream.pipe(writeStream);
53
54
// Handle stream events
55
csvStream.on("data", (chunk) => {
56
console.log(`Streamed ${chunk.length} bytes of CSV data`);
57
});
58
59
csvStream.on("end", () => {
60
console.log("CSV streaming completed");
61
});
62
63
csvStream.on("error", (error) => {
64
console.error("Streaming error:", error);
65
});
66
67
// Custom CSV format for European systems
68
const europeanCsvStream = stream.to_csv(worksheet, {
69
FS: ";", // Semicolon separator
70
RS: "\r\n", // Windows line endings
71
dateNF: "dd/mm/yyyy" // European date format
72
});
73
74
// Stream with custom processing
75
const processedStream = stream.to_csv(worksheet, {
76
skipEmpty: true, // Skip empty rows
77
strip: true // Remove whitespace
78
});
79
80
processedStream.on("data", (chunk) => {
81
// Process each chunk as it arrives
82
const csvText = chunk.toString();
83
const lines = csvText.split("\n");
84
85
lines.forEach(line => {
86
if (line.includes("ERROR")) {
87
console.warn("Found error in data:", line);
88
}
89
});
90
});
91
```
92
93
### HTML Streaming
94
95
Generate readable streams that output HTML table data, useful for web applications and report generation.
96
97
```typescript { .api }
98
/**
99
* Create HTML readable stream from worksheet (Node.js only)
100
* @param sheet - WorkSheet to stream as HTML
101
* @param opts - HTML streaming options
102
* @returns Readable stream outputting HTML table data
103
*/
104
function to_html(sheet: WorkSheet, opts?: Sheet2HTMLOpts): any;
105
106
interface Sheet2HTMLOpts {
107
/** Table ID attribute */
108
id?: string;
109
/** Enable editable cells */
110
editable?: boolean;
111
/** Custom header content */
112
header?: string;
113
/** Custom footer content */
114
footer?: string;
115
}
116
```
117
118
**Usage Examples:**
119
120
```typescript
121
import { stream, utils, readFile } from "xlsx";
122
import * as http from "http";
123
124
// Create web server that streams Excel data as HTML
125
const server = http.createServer((req, res) => {
126
if (req.url === "/data") {
127
const workbook = readFile("report.xlsx");
128
const worksheet = workbook.Sheets[workbook.SheetNames[0]];
129
130
// Set HTML response headers
131
res.writeHead(200, {
132
"Content-Type": "text/html",
133
"Transfer-Encoding": "chunked"
134
});
135
136
// Write HTML document start
137
res.write(`
138
<!DOCTYPE html>
139
<html>
140
<head>
141
<title>Live Data Report</title>
142
<style>
143
table { border-collapse: collapse; width: 100%; }
144
th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
145
th { background-color: #f2f2f2; }
146
</style>
147
</head>
148
<body>
149
<h1>Live Data Stream</h1>
150
`);
151
152
// Stream table data
153
const htmlStream = stream.to_html(worksheet, {
154
id: "live-data",
155
editable: false
156
});
157
158
htmlStream.pipe(res, { end: false });
159
160
htmlStream.on("end", () => {
161
res.write(`
162
</body>
163
</html>
164
`);
165
res.end();
166
});
167
}
168
});
169
170
server.listen(3000, () => {
171
console.log("Server running at http://localhost:3000/data");
172
});
173
174
// Generate HTML reports with custom styling
175
const workbook = readFile("sales-data.xlsx");
176
const worksheet = workbook.Sheets["Sales"];
177
178
const styledHtmlStream = stream.to_html(worksheet, {
179
id: "sales-table",
180
header: `
181
<div class="report-header">
182
<h2>Sales Report - ${new Date().toLocaleDateString()}</h2>
183
<p>Generated automatically from Excel data</p>
184
</div>
185
`,
186
footer: `
187
<div class="report-footer">
188
<p>Report generated at ${new Date().toLocaleString()}</p>
189
</div>
190
`
191
});
192
193
// Save to HTML file
194
import * as fs from "fs";
195
const htmlFile = fs.createWriteStream("sales-report.html");
196
197
htmlFile.write(`
198
<!DOCTYPE html>
199
<html>
200
<head>
201
<title>Sales Report</title>
202
<link rel="stylesheet" href="report-styles.css">
203
</head>
204
<body>
205
`);
206
207
styledHtmlStream.pipe(htmlFile, { end: false });
208
209
styledHtmlStream.on("end", () => {
210
htmlFile.write("</body></html>");
211
htmlFile.end();
212
});
213
```
214
215
### JSON Streaming
216
217
Stream worksheet data as JSON objects, perfect for API endpoints and data processing pipelines.
218
219
```typescript { .api }
220
/**
221
* Create JSON object stream from worksheet (Node.js only)
222
* @param sheet - WorkSheet to stream as JSON objects
223
* @param opts - JSON streaming options
224
* @returns Readable stream outputting JSON objects
225
*/
226
function to_json(sheet: WorkSheet, opts?: Sheet2JSONOpts): any;
227
228
interface Sheet2JSONOpts {
229
/** Column headers configuration */
230
header?: "A" | number | string[];
231
/** Cell range to process */
232
range?: any;
233
/** Include blank rows */
234
blankrows?: boolean;
235
/** Default value for empty cells */
236
defval?: any;
237
/** Date format string */
238
dateNF?: string;
239
/** Use raw values */
240
raw?: boolean;
241
/** Raw number handling */
242
rawNumbers?: boolean;
243
}
244
```
245
246
**Usage Examples:**
247
248
```typescript
249
import { stream, utils, readFile } from "xlsx";
250
import * as express from "express";
251
252
// Express API endpoint streaming Excel data as JSON
253
const app = express();
254
255
app.get("/api/data", (req, res) => {
256
const workbook = readFile("database.xlsx");
257
const worksheet = workbook.Sheets["Users"];
258
259
res.setHeader("Content-Type", "application/json");
260
res.setHeader("Transfer-Encoding", "chunked");
261
262
const jsonStream = stream.to_json(worksheet, {
263
header: 1, // Use first row as headers
264
raw: false, // Apply formatting
265
defval: null // Use null for empty cells
266
});
267
268
res.write("["); // Start JSON array
269
270
let first = true;
271
jsonStream.on("data", (chunk) => {
272
const objects = JSON.parse(chunk.toString());
273
274
objects.forEach(obj => {
275
if (!first) res.write(",");
276
res.write(JSON.stringify(obj));
277
first = false;
278
});
279
});
280
281
jsonStream.on("end", () => {
282
res.write("]"); // End JSON array
283
res.end();
284
});
285
286
jsonStream.on("error", (error) => {
287
res.status(500).json({ error: error.message });
288
});
289
});
290
291
app.listen(3000);
292
293
// Process large datasets with streaming transformation
294
const workbook = readFile("large-sales-data.xlsx");
295
const worksheet = workbook.Sheets["Transactions"];
296
297
const jsonStream = stream.to_json(worksheet, {
298
header: ["Date", "Product", "Amount", "Customer"],
299
raw: true
300
});
301
302
let totalAmount = 0;
303
let recordCount = 0;
304
305
jsonStream.on("data", (chunk) => {
306
const records = JSON.parse(chunk.toString());
307
308
records.forEach(record => {
309
// Process each record as it streams
310
totalAmount += record.Amount || 0;
311
recordCount++;
312
313
// Perform real-time analytics
314
if (record.Amount > 10000) {
315
console.log(`Large transaction: ${record.Customer} - $${record.Amount}`);
316
}
317
});
318
});
319
320
jsonStream.on("end", () => {
321
console.log(`Processed ${recordCount} records`);
322
console.log(`Total amount: $${totalAmount}`);
323
console.log(`Average: $${totalAmount / recordCount}`);
324
});
325
326
// Stream with filtering and transformation
327
const filteredStream = stream.to_json(worksheet, {
328
range: "A1:E1000", // Limit range for performance
329
blankrows: false // Skip empty rows
330
});
331
332
const transformedData: any[] = [];
333
334
filteredStream.on("data", (chunk) => {
335
const records = JSON.parse(chunk.toString());
336
337
const filtered = records
338
.filter(record => record.Amount > 1000) // Filter high-value transactions
339
.map(record => ({
340
...record,
341
Category: record.Amount > 5000 ? "High" : "Medium",
342
ProcessedAt: new Date().toISOString()
343
}));
344
345
transformedData.push(...filtered);
346
});
347
```
348
349
### Stream Configuration
350
351
Configure the streaming system for optimal performance with different Node.js stream implementations.
352
353
```typescript { .api }
354
/**
355
* Set readable stream constructor for streaming operations (Node.js only)
356
* @param Readable - Readable stream constructor (typically from Node.js stream module)
357
*/
358
function set_readable(Readable: any): void;
359
```
360
361
**Usage Examples:**
362
363
```typescript
364
import { stream } from "xlsx";
365
import { Readable } from "stream";
366
367
// Configure stream constructor (usually not needed as it's auto-detected)
368
stream.set_readable(Readable);
369
370
// Use custom stream implementation
371
import { Readable as CustomReadable } from "custom-stream-library";
372
stream.set_readable(CustomReadable);
373
374
// Now streaming functions will use the custom implementation
375
const csvStream = stream.to_csv(worksheet);
376
```
377
378
### Advanced Streaming Patterns
379
380
Combine streaming with other Node.js patterns for powerful data processing workflows.
381
382
```typescript
383
import { stream, readFile } from "xlsx";
384
import { Transform, pipeline } from "stream";
385
import * as fs from "fs";
386
import * as zlib from "zlib";
387
388
// Create transformation pipeline
389
function createDataPipeline(inputFile: string, outputFile: string) {
390
const workbook = readFile(inputFile);
391
const worksheet = workbook.Sheets[workbook.SheetNames[0]];
392
393
// Source: Excel worksheet as JSON stream
394
const jsonStream = stream.to_json(worksheet);
395
396
// Transform: Process and filter data
397
const transformStream = new Transform({
398
objectMode: true,
399
transform(chunk, encoding, callback) {
400
try {
401
const records = JSON.parse(chunk.toString());
402
403
const processed = records
404
.filter(record => record.active === true)
405
.map(record => ({
406
id: record.id,
407
name: record.name.toUpperCase(),
408
email: record.email.toLowerCase(),
409
processedAt: new Date().toISOString()
410
}));
411
412
callback(null, JSON.stringify(processed) + "\n");
413
} catch (error) {
414
callback(error);
415
}
416
}
417
});
418
419
// Destination: Compressed file
420
const writeStream = fs.createWriteStream(outputFile);
421
const gzipStream = zlib.createGzip();
422
423
// Create pipeline
424
pipeline(
425
jsonStream,
426
transformStream,
427
gzipStream,
428
writeStream,
429
(error) => {
430
if (error) {
431
console.error("Pipeline failed:", error);
432
} else {
433
console.log("Pipeline completed successfully");
434
}
435
}
436
);
437
}
438
439
// Usage
440
createDataPipeline("users.xlsx", "processed-users.json.gz");
441
442
// Real-time data streaming with WebSocket
443
import WebSocket from "ws";
444
445
const wss = new WebSocket.Server({ port: 8080 });
446
447
wss.on("connection", (ws) => {
448
console.log("Client connected");
449
450
const workbook = readFile("live-data.xlsx");
451
const worksheet = workbook.Sheets["RealTime"];
452
453
const jsonStream = stream.to_json(worksheet);
454
455
jsonStream.on("data", (chunk) => {
456
const records = JSON.parse(chunk.toString());
457
458
records.forEach(record => {
459
ws.send(JSON.stringify({
460
type: "data",
461
record: record,
462
timestamp: Date.now()
463
}));
464
});
465
});
466
467
jsonStream.on("end", () => {
468
ws.send(JSON.stringify({ type: "complete" }));
469
});
470
});
471
472
console.log("WebSocket server running on ws://localhost:8080");
473
```
474
475
### Memory Management and Performance
476
477
Best practices for streaming large datasets efficiently:
478
479
```typescript
480
import { stream, readFile } from "xlsx";
481
import * as fs from "fs";
482
483
// Monitor memory usage during streaming
484
function streamWithMemoryMonitoring(filename: string) {
485
const workbook = readFile(filename);
486
const worksheet = workbook.Sheets[workbook.SheetNames[0]];
487
488
const csvStream = stream.to_csv(worksheet);
489
const outputStream = fs.createWriteStream("output.csv");
490
491
let bytesStreamed = 0;
492
const startMemory = process.memoryUsage();
493
494
csvStream.on("data", (chunk) => {
495
bytesStreamed += chunk.length;
496
497
// Monitor memory every 10MB
498
if (bytesStreamed % (10 * 1024 * 1024) === 0) {
499
const currentMemory = process.memoryUsage();
500
console.log(`Streamed: ${bytesStreamed / 1024 / 1024}MB`);
501
console.log(`Memory delta: ${(currentMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024}MB`);
502
}
503
});
504
505
csvStream.pipe(outputStream);
506
}
507
508
// Chunked processing for very large files
509
function processInChunks(filename: string, chunkSize: number = 1000) {
510
const workbook = readFile(filename, { sheetRows: chunkSize });
511
const worksheet = workbook.Sheets[workbook.SheetNames[0]];
512
513
let processedRows = 0;
514
515
const jsonStream = stream.to_json(worksheet);
516
517
jsonStream.on("data", (chunk) => {
518
const records = JSON.parse(chunk.toString());
519
processedRows += records.length;
520
521
// Process chunk
522
console.log(`Processed ${processedRows} rows`);
523
524
// Optionally trigger garbage collection
525
if (global.gc && processedRows % 10000 === 0) {
526
global.gc();
527
}
528
});
529
}
530
```