CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-sitemap

Sitemap-generating library and CLI tool for creating XML sitemaps that comply with the sitemaps.org protocol

Pending
Overview
Eval results
Files

sitemap-parsing.mddocs/

Sitemap Parsing

Functionality for parsing existing XML sitemaps back into JavaScript objects for analysis and manipulation. These utilities allow you to read and process existing sitemaps from files or streams.

Capabilities

Sitemap Parsing Functions

High-level functions for parsing complete sitemaps into arrays of items.

/**
 * Parse XML sitemap and resolve with array of sitemap items
 * @param xml - Readable stream containing XML sitemap data
 * @returns Promise resolving to array of parsed sitemap items
 */
function parseSitemap(xml: Readable): Promise<SitemapItem[]>;

/**
 * Parse XML sitemap index and resolve with array of index items  
 * @param xml - Readable stream containing XML sitemap index data
 * @returns Promise resolving to array of parsed index items
 */
function parseSitemapIndex(xml: Readable): Promise<IndexItem[]>;

Usage Examples:

import { parseSitemap, parseSitemapIndex } from "sitemap";
import { createReadStream } from "fs";

// Parse sitemap from file
const sitemapItems = await parseSitemap(
  createReadStream("./sitemap.xml")
);

console.log("Found", sitemapItems.length, "URLs");
sitemapItems.forEach(item => {
  console.log(`${item.url} - ${item.changefreq} - ${item.priority}`);
});

// Parse sitemap index
const indexItems = await parseSitemapIndex(
  createReadStream("./sitemap-index.xml")
);

console.log("Found", indexItems.length, "sitemaps");
indexItems.forEach(item => {
  console.log(`${item.url} - ${item.lastmod}`);
});

XMLToSitemapItemStream

Transform stream for parsing XML sitemap data into individual sitemap items.

/**
 * Transform stream that converts XML sitemap data into SitemapItem objects
 * Use this to parse existing sitemaps into config options compatible with this library
 */
class XMLToSitemapItemStream extends Transform {
  constructor(opts?: XMLToSitemapItemStreamOptions);
  
  /** Error handling level */
  level: ErrorLevel;
  
  /** Logger function for warnings and errors */
  logger: Logger;
  
  /** Current parsing error */
  error: Error | null;
  
  /** SAX stream parser */
  saxStream: SAXStream;
}

interface XMLToSitemapItemStreamOptions extends TransformOptions {
  /** Error handling level for validation */
  level?: ErrorLevel;
  
  /** Custom logger function or false to disable logging */
  logger?: Logger | false;
}

type Logger = (
  level: 'warn' | 'error' | 'info' | 'log',
  ...message: Parameters<Console['log']>[0]
) => void;

Usage Examples:

import { XMLToSitemapItemStream, ErrorLevel } from "sitemap";
import { createReadStream } from "fs";

// Parse with strict error handling
const parser = new XMLToSitemapItemStream({
  level: ErrorLevel.THROW
});

const sitemapItems: SitemapItem[] = [];

createReadStream("sitemap.xml")
  .pipe(parser)
  .on('data', (item: SitemapItem) => {
    sitemapItems.push(item);
  })
  .on('end', () => {
    console.log("Parsed", sitemapItems.length, "items");
  })
  .on('error', (error) => {
    console.error("Parse error:", error);
  });

// Parse with custom logging
const customParser = new XMLToSitemapItemStream({
  level: ErrorLevel.WARN,
  logger: (level, ...args) => {
    console.log(`[${level.toUpperCase()}]`, ...args);
  }
});

XMLToSitemapIndexStream

Transform stream for parsing XML sitemap index data into index items.

/**
 * Transform stream that converts XML sitemap index data into IndexItem objects
 * Use this to parse existing sitemap indices into config options
 */
class XMLToSitemapIndexStream extends Transform {
  constructor(opts?: XMLToSitemapIndexItemStreamOptions);
  
  /** Error handling level */
  level: ErrorLevel;
  
  /** Logger function */
  logger: Logger;
  
  /** SAX stream parser */
  saxStream: SAXStream;
}

interface XMLToSitemapIndexItemStreamOptions extends TransformOptions {
  /** Error handling level for validation */
  level?: ErrorLevel;
  
  /** Custom logger function or false to disable logging */
  logger?: Logger | false;
}

Usage Examples:

import { XMLToSitemapIndexStream } from "sitemap";
import { createReadStream } from "fs";

const indexParser = new XMLToSitemapIndexStream();
const indexItems: IndexItem[] = [];

createReadStream("sitemap-index.xml")
  .pipe(indexParser)
  .on('data', (item: IndexItem) => {
    indexItems.push(item);
  })
  .on('end', () => {
    console.log("Found sitemaps:", indexItems.map(i => i.url));
  });

JSON Conversion Streams

Transform streams for converting parsed objects to JSON format.

/**
 * Transform stream that converts sitemap items to JSON format
 * @param lineSeparated - Whether to separate entries by newline or comma
 */
class ObjectStreamToJSON extends Transform {
  constructor(opts?: ObjectStreamToJSONOptions);
  
  /** Whether to use line-separated JSON */
  lineSeparated: boolean;
  
  /** Whether first item has been written */
  firstWritten: boolean;
}

interface ObjectStreamToJSONOptions extends TransformOptions {
  /** Whether to separate entries by newline instead of comma */
  lineSeparated: boolean;
}

/**
 * Transform stream that converts index items to JSON format
 */
class IndexObjectStreamToJSON extends Transform {
  constructor(opts?: IndexObjectStreamToJSONOptions);
  
  /** Whether to use line-separated JSON */
  lineSeparated: boolean;
  
  /** Whether first item has been written */
  firstWritten: boolean;
}

interface IndexObjectStreamToJSONOptions extends TransformOptions {
  /** Whether to separate entries by newline instead of comma */
  lineSeparated: boolean;
}

Usage Examples:

import { 
  XMLToSitemapItemStream, 
  ObjectStreamToJSON 
} from "sitemap";
import { createReadStream, createWriteStream } from "fs";

// Convert sitemap XML to JSON array
createReadStream("sitemap.xml")
  .pipe(new XMLToSitemapItemStream())
  .pipe(new ObjectStreamToJSON({ lineSeparated: false }))
  .pipe(createWriteStream("sitemap.json"));

// Convert to line-separated JSON (JSONL)
createReadStream("sitemap.xml")
  .pipe(new XMLToSitemapItemStream())
  .pipe(new ObjectStreamToJSON({ lineSeparated: true }))
  .pipe(createWriteStream("sitemap.jsonl"));

Advanced Parsing Features

Supported Sitemap Extensions

The parser supports all standard sitemap extensions:

  • Images: <image:image> elements with captions, titles, geo-location
  • Videos: <video:video> elements with thumbnails, descriptions, metadata
  • News: <news:news> elements with publication data
  • Alternate Languages: <xhtml:link> elements for multilingual content
  • Mobile: <mobile:mobile> elements (legacy)

Error Handling in Parsing

import { XMLToSitemapItemStream, ErrorLevel } from "sitemap";

// Different error handling strategies
const silentParser = new XMLToSitemapItemStream({
  level: ErrorLevel.SILENT  // Ignore all validation errors
});

const warningParser = new XMLToSitemapItemStream({
  level: ErrorLevel.WARN    // Log warnings but continue
});

const strictParser = new XMLToSitemapItemStream({
  level: ErrorLevel.THROW   // Throw on any validation error
});

Custom Validation During Parsing

import { XMLToSitemapItemStream } from "sitemap";

const parser = new XMLToSitemapItemStream({
  level: ErrorLevel.WARN,
  logger: (level, ...args) => {
    // Custom validation logic
    if (level === 'warn' && args[0].includes('unhandled')) {
      console.warn("Found unsupported sitemap element:", ...args);
    }
  }
});

Complete Parsing Example

import { 
  parseSitemap, 
  SitemapStream, 
  streamToPromise 
} from "sitemap";
import { createReadStream } from "fs";

async function processSitemap() {
  // Parse existing sitemap
  const items = await parseSitemap(createReadStream("input.xml"));
  
  // Filter and modify items
  const filteredItems = items
    .filter(item => item.priority && item.priority > 0.5)
    .map(item => ({
      ...item,
      lastmod: new Date().toISOString(),
      changefreq: "weekly" as const
    }));
  
  // Generate new sitemap
  const newSitemap = new SitemapStream({
    hostname: "https://example.com"
  });
  
  filteredItems.forEach(item => newSitemap.write(item));
  newSitemap.end();
  
  const xmlBuffer = await streamToPromise(newSitemap);
  console.log(xmlBuffer.toString());
}

Install with Tessl CLI

npx tessl i tessl/npm-sitemap

docs

cli-interface.md

error-handling.md

index.md

simple-api.md

sitemap-index.md

sitemap-parsing.md

sitemap-streams.md

validation-utilities.md

xml-validation.md

tile.json