CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-sitemap

Sitemap-generating library and CLI tool for creating XML sitemaps that comply with the sitemaps.org protocol

Pending
Overview
Eval results
Files

validation-utilities.mddocs/

Validation and Utilities

Validation functions and utility methods for working with sitemap data, URL normalization, and error handling. These functions provide the foundation for data validation and processing throughout the library.

Capabilities

Validation Functions

Core validation functionality that ensures sitemap data conforms to specifications.

/**
 * Verifies all data passed in will comply with sitemap spec
 * @param conf - Sitemap item to validate
 * @param level - Logging/error level for validation failures
 * @param errorHandler - Custom error handling function
 * @returns Validated sitemap item
 */
function validateSMIOptions(
  conf: SitemapItem,
  level?: ErrorLevel,
  errorHandler?: ErrorHandler
): SitemapItem;

/**
 * Custom error handler function type
 * @param error - The validation error
 * @param level - Error level that triggered the handler
 */
type ErrorHandler = (error: Error, level: ErrorLevel) => void;

enum ErrorLevel {
  /** Validation will be skipped and nothing logged or thrown */
  SILENT = 'silent',
  
  /** If an invalid value is encountered, a console.warn will be called */
  WARN = 'warn',
  
  /** An Error will be thrown on encountering invalid data */
  THROW = 'throw'
}

Usage Examples:

import { validateSMIOptions, ErrorLevel } from "sitemap";

// Validate with default warning level
const validatedItem = validateSMIOptions({
  url: "https://example.com/page",
  changefreq: "daily",
  priority: 0.8,
  img: [],
  video: [],
  links: []
});

// Validate with strict error handling
const strictItem = validateSMIOptions(
  {
    url: "https://example.com/page",
    changefreq: "invalid-freq", // This will throw
    priority: 2.0, // This will also throw
    img: [],
    video: [],
    links: []
  },
  ErrorLevel.THROW
);

// Validate with custom error handler
const customValidatedItem = validateSMIOptions(
  someItem,
  ErrorLevel.WARN,
  (error, level) => {
    console.log(`Validation ${level}: ${error.message}`);
    // Log to external service, etc.
  }
);

URL Normalization

Functions for normalizing and processing URLs and sitemap items.

/**
 * Converts the passed in sitemap entry into one capable of being consumed by SitemapItem
 * @param elem - The string or object to be converted
 * @param hostname - Base hostname for relative URLs
 * @param lastmodDateOnly - Whether to include only date in lastmod
 * @returns Normalized sitemap item
 */
function normalizeURL(
  elem: string | SitemapItemLoose,
  hostname?: string,
  lastmodDateOnly?: boolean
): SitemapItem;

Usage Examples:

import { normalizeURL } from "sitemap";

// Normalize a simple URL string
const normalized = normalizeURL(
  "/relative/path",
  "https://example.com"
);
// Result: { url: "https://example.com/relative/path", img: [], video: [], links: [] }

// Normalize complex sitemap item
const complexItem = normalizeURL(
  {
    url: "/product/123",
    lastmod: "2023-01-01T12:30:45.123Z",
    img: "https://example.com/image.jpg", // String converted to Img object
    video: {
      thumbnail_loc: "https://example.com/thumb.jpg",
      title: "Product Video",
      description: "A great product demo"
    },
    priority: 0.8
  },
  "https://example.com",
  true // lastmodDateOnly
);
// Result: Full SitemapItem with normalized URLs and date-only lastmod

Stream Utilities

Utility functions for working with streams and data processing.

/**
 * Combines multiple streams into one
 * @param streams - Array of readable streams to combine
 * @param options - Transform options for the combined stream
 * @returns Single readable stream containing all data
 */
function mergeStreams(
  streams: Readable[],
  options?: TransformOptions
): Readable;

/**
 * Takes a stream likely from fs.createReadStream and returns a stream of sitemap items
 * @param stream - Stream of line-separated URLs or JSON
 * @param opts.isJSON - Whether stream contains JSON (auto-detected if undefined)
 * @returns Stream of sitemap options
 */
function lineSeparatedURLsToSitemapOptions(
  stream: Readable,
  opts?: { isJSON?: boolean }
): Readable;

/**
 * Wraps node's ReadLine in a stream for processing line-by-line input
 */
class ReadlineStream extends Readable {
  constructor(options: ReadlineStreamOptions);
  
  /** Internal readline interface */
  private _source: Interface;
}

interface ReadlineStreamOptions extends ReadableOptions {
  /** Input stream to read lines from */
  input: Readable;
}

Usage Examples:

import { 
  mergeStreams, 
  lineSeparatedURLsToSitemapOptions,
  ReadlineStream 
} from "sitemap";
import { createReadStream } from "fs";

// Merge multiple URL files into one stream
const stream1 = createReadStream("urls1.txt");
const stream2 = createReadStream("urls2.txt");
const stream3 = createReadStream("urls3.txt");

const combinedStream = mergeStreams([stream1, stream2, stream3]);

// Convert line-separated URLs to sitemap options
const urlStream = lineSeparatedURLsToSitemapOptions(
  createReadStream("urls.txt")
);

urlStream.on('data', (item) => {
  console.log("Parsed item:", item);
});

// Process JSON lines
const jsonStream = lineSeparatedURLsToSitemapOptions(
  createReadStream("urls.jsonl"),
  { isJSON: true }
);

// Manual ReadlineStream usage
const readline = new ReadlineStream({
  input: createReadStream("large-file.txt")
});

readline.on('data', (line) => {
  console.log("Line:", line);
});

Array Utilities

Utility functions for array processing.

/**
 * Splits array into chunks of specified size (based on lodash implementation)
 * @param array - Array to split into chunks
 * @param size - Size of each chunk (default: 1)
 * @returns Array of chunks
 */
function chunk(array: any[], size?: number): any[];

Usage Examples:

import { chunk } from "sitemap";

// Split URLs into batches for processing
const allUrls = Array.from({ length: 1000 }, (_, i) => `/page-${i}`);
const batches = chunk(allUrls, 100);

console.log(`Split ${allUrls.length} URLs into ${batches.length} batches`);

// Process each batch
for (const batch of batches) {
  console.log(`Processing batch of ${batch.length} URLs`);
  // Process batch...
}

Type Guards and Validators

Functions for type checking and validation of sitemap data.

/**
 * Type guard for change frequency values
 * @param freq - String to validate
 * @returns True if valid change frequency
 */
function isValidChangeFreq(freq: string): freq is EnumChangefreq;

/**
 * Type guard for yes/no values
 * @param yn - String to validate  
 * @returns True if valid yes/no value
 */
function isValidYesNo(yn: string): yn is EnumYesNo;

/**
 * Type guard for allow/deny values
 * @param ad - String to validate
 * @returns True if valid allow/deny value
 */
function isAllowDeny(ad: string): ad is EnumAllowDeny;

/**
 * Type guard for video price types
 * @param pt - String to validate
 * @returns True if valid price type
 */
function isPriceType(pt: string | PriceType): pt is PriceType;

/**
 * Type guard for video resolutions
 * @param res - String to validate
 * @returns True if valid resolution
 */
function isResolution(res: string): res is Resolution;

Usage Examples:

import { 
  isValidChangeFreq, 
  isValidYesNo, 
  isPriceType 
} from "sitemap";

// Validate user input
function processChangeFreq(input: string) {
  if (isValidChangeFreq(input)) {
    // TypeScript knows input is EnumChangefreq
    console.log("Valid change frequency:", input);
    return input;
  } else {
    throw new Error(`Invalid change frequency: ${input}`);
  }
}

// Validate video metadata
function processVideoData(data: any) {
  if (data.family_friendly && !isValidYesNo(data.family_friendly)) {
    throw new Error("Invalid family_friendly value");
  }
  
  if (data.price && data.price_type && !isPriceType(data.price_type)) {
    throw new Error("Invalid price type");
  }
  
  return data;
}

Error Handling Patterns

Custom Error Handlers

import { validateSMIOptions, ErrorLevel } from "sitemap";

// Collect all validation errors
const validationErrors: Error[] = [];

const errorCollector: ErrorHandler = (error, level) => {
  validationErrors.push(error);
  
  if (level === ErrorLevel.THROW) {
    throw error;
  }
};

// Validate multiple items
const items = [...]; // Array of sitemap items
const validatedItems = items.map(item => 
  validateSMIOptions(item, ErrorLevel.WARN, errorCollector)
);

console.log(`Validation complete. Found ${validationErrors.length} issues.`);

Batch Processing with Error Recovery

import { normalizeURL, validateSMIOptions, ErrorLevel } from "sitemap";

function processSitemapBatch(rawItems: any[], hostname: string) {
  const successful: SitemapItem[] = [];
  const failed: { item: any; error: Error }[] = [];
  
  for (const rawItem of rawItems) {
    try {
      const normalized = normalizeURL(rawItem, hostname);
      const validated = validateSMIOptions(normalized, ErrorLevel.THROW);
      successful.push(validated);
    } catch (error) {
      failed.push({ item: rawItem, error: error as Error });
    }
  }
  
  return { successful, failed };
}

Constants and Enums

/** Array of all valid change frequency values */
const CHANGEFREQ: EnumChangefreq[];

/** Regex validators for various sitemap attributes */
const validators: { [index: string]: RegExp };

enum EnumChangefreq {
  DAILY = 'daily',
  MONTHLY = 'monthly', 
  ALWAYS = 'always',
  HOURLY = 'hourly',
  WEEKLY = 'weekly',
  YEARLY = 'yearly',
  NEVER = 'never'
}

enum EnumYesNo {
  YES = 'YES',
  NO = 'NO',
  Yes = 'Yes', 
  No = 'No',
  yes = 'yes',
  no = 'no'
}

enum EnumAllowDeny {
  ALLOW = 'allow',
  DENY = 'deny'
}

type PriceType = 'rent' | 'purchase' | 'RENT' | 'PURCHASE';
type Resolution = 'HD' | 'hd' | 'sd' | 'SD';

Install with Tessl CLI

npx tessl i tessl/npm-sitemap

docs

cli-interface.md

error-handling.md

index.md

simple-api.md

sitemap-index.md

sitemap-parsing.md

sitemap-streams.md

validation-utilities.md

xml-validation.md

tile.json