CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-xlsx

SheetJS Spreadsheet data parser and writer for multiple formats including Excel, CSV, and OpenDocument

Overview
Eval results
Files

streaming.mddocs/

Streaming Operations

Node.js streaming interface for processing large datasets without loading entire files into memory. Ideal for server-side applications processing large spreadsheets, data exports, and real-time data transformation.

Note: Streaming operations are only available in Node.js environments.

Capabilities

CSV Streaming

Create readable streams that output CSV data from worksheets, enabling memory-efficient processing of large datasets.

/**
 * Create CSV readable stream from worksheet (Node.js only)
 * @param sheet - WorkSheet to stream as CSV
 * @param opts - CSV streaming options
 * @returns Readable stream outputting CSV data
 */
function to_csv(sheet: WorkSheet, opts?: Sheet2CSVOpts): any;

interface Sheet2CSVOpts {
  /** Field separator character */
  FS?: string;
  /** Record separator character */
  RS?: string;
  /** Date format string */
  dateNF?: string;
  /** Strip whitespace */
  strip?: boolean;
  /** Include blank rows */
  blankrows?: boolean;
  /** Skip empty rows */
  skipEmpty?: boolean;
}

Usage Examples:

import { stream, utils, readFile } from "xlsx";
import * as fs from "fs";

// Read large worksheet
const workbook = readFile("large-dataset.xlsx");
const worksheet = workbook.Sheets[workbook.SheetNames[0]];

// Create CSV stream
const csvStream = stream.to_csv(worksheet);

// Pipe to file
const writeStream = fs.createWriteStream("output.csv");
csvStream.pipe(writeStream);

// Handle stream events
csvStream.on("data", (chunk) => {
  console.log(`Streamed ${chunk.length} bytes of CSV data`);
});

csvStream.on("end", () => {
  console.log("CSV streaming completed");
});

csvStream.on("error", (error) => {
  console.error("Streaming error:", error);
});

// Custom CSV format for European systems
const europeanCsvStream = stream.to_csv(worksheet, {
  FS: ";",          // Semicolon separator
  RS: "\r\n",       // Windows line endings
  dateNF: "dd/mm/yyyy"  // European date format
});

// Stream with custom processing
const processedStream = stream.to_csv(worksheet, {
  skipEmpty: true,   // Skip empty rows
  strip: true        // Remove whitespace
});

processedStream.on("data", (chunk) => {
  // Process each chunk as it arrives
  const csvText = chunk.toString();
  const lines = csvText.split("\n");
  
  lines.forEach(line => {
    if (line.includes("ERROR")) {
      console.warn("Found error in data:", line);
    }
  });
});

HTML Streaming

Generate readable streams that output HTML table data, useful for web applications and report generation.

/**
 * Create HTML readable stream from worksheet (Node.js only)
 * @param sheet - WorkSheet to stream as HTML
 * @param opts - HTML streaming options
 * @returns Readable stream outputting HTML table data
 */
function to_html(sheet: WorkSheet, opts?: Sheet2HTMLOpts): any;

interface Sheet2HTMLOpts {
  /** Table ID attribute */
  id?: string;
  /** Enable editable cells */
  editable?: boolean;
  /** Custom header content */
  header?: string;
  /** Custom footer content */
  footer?: string;
}

Usage Examples:

import { stream, utils, readFile } from "xlsx";
import * as http from "http";

// Create web server that streams Excel data as HTML
const server = http.createServer((req, res) => {
  if (req.url === "/data") {
    const workbook = readFile("report.xlsx");
    const worksheet = workbook.Sheets[workbook.SheetNames[0]];
    
    // Set HTML response headers
    res.writeHead(200, {
      "Content-Type": "text/html",
      "Transfer-Encoding": "chunked"
    });
    
    // Write HTML document start
    res.write(`
      <!DOCTYPE html>
      <html>
      <head>
        <title>Live Data Report</title>
        <style>
          table { border-collapse: collapse; width: 100%; }
          th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
          th { background-color: #f2f2f2; }
        </style>
      </head>
      <body>
        <h1>Live Data Stream</h1>
    `);
    
    // Stream table data
    const htmlStream = stream.to_html(worksheet, {
      id: "live-data",
      editable: false
    });
    
    htmlStream.pipe(res, { end: false });
    
    htmlStream.on("end", () => {
      res.write(`
        </body>
        </html>
      `);
      res.end();
    });
  }
});

server.listen(3000, () => {
  console.log("Server running at http://localhost:3000/data");
});

// Generate HTML reports with custom styling
const workbook = readFile("sales-data.xlsx");
const worksheet = workbook.Sheets["Sales"];

const styledHtmlStream = stream.to_html(worksheet, {
  id: "sales-table",
  header: `
    <div class="report-header">
      <h2>Sales Report - ${new Date().toLocaleDateString()}</h2>
      <p>Generated automatically from Excel data</p>
    </div>
  `,
  footer: `
    <div class="report-footer">
      <p>Report generated at ${new Date().toLocaleString()}</p>
    </div>
  `
});

// Save to HTML file
import * as fs from "fs";
const htmlFile = fs.createWriteStream("sales-report.html");

htmlFile.write(`
<!DOCTYPE html>
<html>
<head>
  <title>Sales Report</title>
  <link rel="stylesheet" href="report-styles.css">
</head>
<body>
`);

styledHtmlStream.pipe(htmlFile, { end: false });

styledHtmlStream.on("end", () => {
  htmlFile.write("</body></html>");
  htmlFile.end();
});

JSON Streaming

Stream worksheet data as JSON objects, perfect for API endpoints and data processing pipelines.

/**
 * Create JSON object stream from worksheet (Node.js only)
 * @param sheet - WorkSheet to stream as JSON objects
 * @param opts - JSON streaming options
 * @returns Readable stream outputting JSON objects
 */
function to_json(sheet: WorkSheet, opts?: Sheet2JSONOpts): any;

interface Sheet2JSONOpts {
  /** Column headers configuration */
  header?: "A" | number | string[];
  /** Cell range to process */
  range?: any;
  /** Include blank rows */
  blankrows?: boolean;
  /** Default value for empty cells */
  defval?: any;
  /** Date format string */
  dateNF?: string;
  /** Use raw values */
  raw?: boolean;
  /** Raw number handling */
  rawNumbers?: boolean;
}

Usage Examples:

import { stream, utils, readFile } from "xlsx";
import * as express from "express";

// Express API endpoint streaming Excel data as JSON
const app = express();

app.get("/api/data", (req, res) => {
  const workbook = readFile("database.xlsx");
  const worksheet = workbook.Sheets["Users"];
  
  res.setHeader("Content-Type", "application/json");
  res.setHeader("Transfer-Encoding", "chunked");
  
  const jsonStream = stream.to_json(worksheet, {
    header: 1,        // Use first row as headers
    raw: false,       // Apply formatting
    defval: null      // Use null for empty cells
  });
  
  res.write("[");  // Start JSON array
  
  let first = true;
  jsonStream.on("data", (chunk) => {
    const objects = JSON.parse(chunk.toString());
    
    objects.forEach(obj => {
      if (!first) res.write(",");
      res.write(JSON.stringify(obj));
      first = false;
    });
  });
  
  jsonStream.on("end", () => {
    res.write("]");  // End JSON array
    res.end();
  });
  
  jsonStream.on("error", (error) => {
    res.status(500).json({ error: error.message });
  });
});

app.listen(3000);

// Process large datasets with streaming transformation
const workbook = readFile("large-sales-data.xlsx");
const worksheet = workbook.Sheets["Transactions"];

const jsonStream = stream.to_json(worksheet, {
  header: ["Date", "Product", "Amount", "Customer"],
  raw: true
});

let totalAmount = 0;
let recordCount = 0;

jsonStream.on("data", (chunk) => {
  const records = JSON.parse(chunk.toString());
  
  records.forEach(record => {
    // Process each record as it streams
    totalAmount += record.Amount || 0;
    recordCount++;
    
    // Perform real-time analytics
    if (record.Amount > 10000) {
      console.log(`Large transaction: ${record.Customer} - $${record.Amount}`);
    }
  });
});

jsonStream.on("end", () => {
  console.log(`Processed ${recordCount} records`);
  console.log(`Total amount: $${totalAmount}`);
  console.log(`Average: $${totalAmount / recordCount}`);
});

// Stream with filtering and transformation
const filteredStream = stream.to_json(worksheet, {
  range: "A1:E1000",   // Limit range for performance
  blankrows: false     // Skip empty rows
});

const transformedData: any[] = [];

filteredStream.on("data", (chunk) => {
  const records = JSON.parse(chunk.toString());
  
  const filtered = records
    .filter(record => record.Amount > 1000)  // Filter high-value transactions
    .map(record => ({
      ...record,
      Category: record.Amount > 5000 ? "High" : "Medium",
      ProcessedAt: new Date().toISOString()
    }));
    
  transformedData.push(...filtered);
});

Stream Configuration

Configure the streaming system for optimal performance with different Node.js stream implementations.

/**
 * Set readable stream constructor for streaming operations (Node.js only)
 * @param Readable - Readable stream constructor (typically from Node.js stream module)
 */
function set_readable(Readable: any): void;

Usage Examples:

import { stream } from "xlsx";
import { Readable } from "stream";

// Configure stream constructor (usually not needed as it's auto-detected)
stream.set_readable(Readable);

// Use custom stream implementation
import { Readable as CustomReadable } from "custom-stream-library";
stream.set_readable(CustomReadable);

// Now streaming functions will use the custom implementation
const csvStream = stream.to_csv(worksheet);

Advanced Streaming Patterns

Combine streaming with other Node.js patterns for powerful data processing workflows.

import { stream, readFile } from "xlsx";
import { Transform, pipeline } from "stream";
import * as fs from "fs";
import * as zlib from "zlib";

// Create transformation pipeline
function createDataPipeline(inputFile: string, outputFile: string) {
  const workbook = readFile(inputFile);
  const worksheet = workbook.Sheets[workbook.SheetNames[0]];
  
  // Source: Excel worksheet as JSON stream
  const jsonStream = stream.to_json(worksheet);
  
  // Transform: Process and filter data
  const transformStream = new Transform({
    objectMode: true,
    transform(chunk, encoding, callback) {
      try {
        const records = JSON.parse(chunk.toString());
        
        const processed = records
          .filter(record => record.active === true)
          .map(record => ({
            id: record.id,
            name: record.name.toUpperCase(),
            email: record.email.toLowerCase(),
            processedAt: new Date().toISOString()
          }));
          
        callback(null, JSON.stringify(processed) + "\n");
      } catch (error) {
        callback(error);
      }
    }
  });
  
  // Destination: Compressed file
  const writeStream = fs.createWriteStream(outputFile);
  const gzipStream = zlib.createGzip();
  
  // Create pipeline
  pipeline(
    jsonStream,
    transformStream,
    gzipStream,
    writeStream,
    (error) => {
      if (error) {
        console.error("Pipeline failed:", error);
      } else {
        console.log("Pipeline completed successfully");
      }
    }
  );
}

// Usage
createDataPipeline("users.xlsx", "processed-users.json.gz");

// Real-time data streaming with WebSocket
import WebSocket from "ws";

const wss = new WebSocket.Server({ port: 8080 });

wss.on("connection", (ws) => {
  console.log("Client connected");
  
  const workbook = readFile("live-data.xlsx");
  const worksheet = workbook.Sheets["RealTime"];
  
  const jsonStream = stream.to_json(worksheet);
  
  jsonStream.on("data", (chunk) => {
    const records = JSON.parse(chunk.toString());
    
    records.forEach(record => {
      ws.send(JSON.stringify({
        type: "data",
        record: record,
        timestamp: Date.now()
      }));
    });
  });
  
  jsonStream.on("end", () => {
    ws.send(JSON.stringify({ type: "complete" }));
  });
});

console.log("WebSocket server running on ws://localhost:8080");

Memory Management and Performance

Best practices for streaming large datasets efficiently:

import { stream, readFile } from "xlsx";
import * as fs from "fs";

// Monitor memory usage during streaming
function streamWithMemoryMonitoring(filename: string) {
  const workbook = readFile(filename);
  const worksheet = workbook.Sheets[workbook.SheetNames[0]];
  
  const csvStream = stream.to_csv(worksheet);
  const outputStream = fs.createWriteStream("output.csv");
  
  let bytesStreamed = 0;
  const startMemory = process.memoryUsage();
  
  csvStream.on("data", (chunk) => {
    bytesStreamed += chunk.length;
    
    // Monitor memory every 10MB
    if (bytesStreamed % (10 * 1024 * 1024) === 0) {
      const currentMemory = process.memoryUsage();
      console.log(`Streamed: ${bytesStreamed / 1024 / 1024}MB`);
      console.log(`Memory delta: ${(currentMemory.heapUsed - startMemory.heapUsed) / 1024 / 1024}MB`);
    }
  });
  
  csvStream.pipe(outputStream);
}

// Chunked processing for very large files
function processInChunks(filename: string, chunkSize: number = 1000) {
  const workbook = readFile(filename, { sheetRows: chunkSize });
  const worksheet = workbook.Sheets[workbook.SheetNames[0]];
  
  let processedRows = 0;
  
  const jsonStream = stream.to_json(worksheet);
  
  jsonStream.on("data", (chunk) => {
    const records = JSON.parse(chunk.toString());
    processedRows += records.length;
    
    // Process chunk
    console.log(`Processed ${processedRows} rows`);
    
    // Optionally trigger garbage collection
    if (global.gc && processedRows % 10000 === 0) {
      global.gc();
    }
  });
}

Install with Tessl CLI

npx tessl i tessl/npm-xlsx

docs

cell-operations.md

core-io.md

data-conversion.md

index.md

streaming.md

workbook-management.md

tile.json