Powerfully flexible XML Sitemaps that integrate seamlessly, for Nuxt.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Utility functions for parsing existing XML sitemaps and extracting sitemap metadata from HTML documents for analysis and integration purposes.
import { parseSitemapXml, parseHtmlExtractSitemapMeta } from '@nuxtjs/sitemap/utils';
import type { SitemapParseResult, SitemapWarning } from '@nuxtjs/sitemap/utils';Parse existing XML sitemap content into structured data with validation and warning reporting.
/**
* Parse XML sitemap content into structured data
* Handles both regular sitemaps and sitemap index files
* @param xml - Raw XML sitemap content as string
* @returns Promise resolving to parsed sitemap data with URLs and validation warnings
*/
function parseSitemapXml(xml: string): Promise<SitemapParseResult>;
interface SitemapParseResult {
/** Array of parsed sitemap URLs */
urls: SitemapUrlInput[];
/** Array of validation warnings encountered during parsing */
warnings: SitemapWarning[];
}
interface SitemapWarning {
/** Type of warning encountered */
type: 'validation';
/** Human-readable warning message */
message: string;
/** Context information about where the warning occurred */
context?: {
url?: string;
field?: string;
value?: unknown;
};
}Extract sitemap-relevant metadata from HTML documents for automatic discovery and analysis.
/**
* Extract sitemap metadata from HTML document content
* Discovers images, videos, and other sitemap-relevant information
* @param html - Raw HTML content as string
* @param options - Optional configuration for metadata extraction
* @returns Array of sitemap URLs with discovered metadata
*/
function parseHtmlExtractSitemapMeta(
html: string,
options?: {
/** Whether to discover images in the HTML content */
images?: boolean;
/** Whether to discover videos in the HTML content */
videos?: boolean;
/** Whether to extract lastmod information */
lastmod?: boolean;
/** Whether to extract alternative language links */
alternatives?: boolean;
/** Function to resolve relative URLs to absolute URLs */
resolveUrl?: (url: string) => string;
}
): SitemapUrl[];URL Entry Structure
interface SitemapUrl {
/** URL location (required) */
loc: string;
/** Last modification date */
lastmod?: string | Date;
/** Change frequency indicator */
changefreq?: Changefreq;
/** Priority value between 0.0 and 1.0 */
priority?: 0 | 0.1 | 0.2 | 0.3 | 0.4 | 0.5 | 0.6 | 0.7 | 0.8 | 0.9 | 1;
/** Alternative language versions */
alternatives?: AlternativeEntry[];
/** Google News metadata */
news?: GoogleNewsEntry;
/** Associated images */
images?: ImageEntry[];
/** Associated videos */
videos?: VideoEntry[];
}
type Changefreq =
| 'always'
| 'hourly'
| 'daily'
| 'weekly'
| 'monthly'
| 'yearly'
| 'never';Image Metadata Structure
interface ImageEntry {
/** Image URL location */
loc: string | URL;
/** Image caption text */
caption?: string;
/** Geographic location information */
geoLocation?: string;
/** Image title */
title?: string;
/** License URL */
license?: string | URL;
}Video Metadata Structure
interface VideoEntry {
/** Video title (required) */
title: string;
/** Video thumbnail URL (required) */
thumbnail_loc: string | URL;
/** Video description (required) */
description: string;
/** Direct video content URL */
content_loc?: string | URL;
/** Video player page URL */
player_loc?: string | URL;
/** Video duration in seconds */
duration?: number;
/** Video expiration date */
expiration_date?: Date | string;
/** Video rating (0.0 to 5.0) */
rating?: number;
/** View count */
view_count?: number;
/** Publication date */
publication_date?: Date | string;
/** Family-friendly flag */
family_friendly?: 'yes' | 'no' | boolean;
/** Geographic restrictions */
restriction?: Restriction;
/** Platform restrictions */
platform?: Platform;
/** Pricing information */
price?: PriceEntry[];
/** Subscription requirement */
requires_subscription?: 'yes' | 'no' | boolean;
/** Uploader information */
uploader?: {
uploader: string;
info?: string | URL;
};
/** Live content indicator */
live?: 'yes' | 'no' | boolean;
/** Content tags */
tag?: string | string[];
}
interface Restriction {
relationship: 'allow' | 'deny';
restriction: string;
}
interface Platform {
relationship: 'allow' | 'deny';
platform: string;
}
interface PriceEntry {
price?: number | string;
currency?: string;
type?: 'rent' | 'purchase' | 'package' | 'subscription';
}Alternative URL Structure
interface AlternativeEntry {
/** Language/locale code (hreflang attribute) */
hreflang: string;
/** Alternative URL */
href: string | URL;
}Google News Structure
interface GoogleNewsEntry {
/** News article title */
title: string;
/** Article publication date in W3C format */
publication_date: Date | string;
/** Publication information */
publication: {
/** Publication name as it appears on news.google.com */
name: string;
/** Publication language (ISO 639 code) */
language: string;
};
}Usage Examples:
// Parse an existing XML sitemap
import { parseSitemapXml } from '@nuxtjs/sitemap/utils';
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://example.com/</loc>
<lastmod>2023-12-01</lastmod>
<changefreq>daily</changefreq>
<priority>1.0</priority>
</url>
<url>
<loc>https://example.com/about</loc>
<lastmod>invalid-date</lastmod>
<priority>0.8</priority>
</url>
</urlset>`;
const result = parseSitemapXml(xmlContent);
console.log(result.urls);
// [
// {
// loc: 'https://example.com/',
// lastmod: '2023-12-01',
// changefreq: 'daily',
// priority: 1.0
// },
// {
// loc: 'https://example.com/about',
// priority: 0.8
// }
// ]
console.log(result.warnings);
// [
// {
// type: 'invalid-date',
// message: 'Invalid lastmod date: invalid-date',
// context: 'https://example.com/about'
// }
// ]
// Extract metadata from HTML content
import { parseHtmlExtractSitemapMeta } from '@nuxtjs/sitemap/utils';
const htmlContent = `
<!DOCTYPE html>
<html>
<head>
<title>My Blog Post</title>
<meta property="og:image" content="https://example.com/hero.jpg">
<meta property="article:published_time" content="2023-12-01T10:00:00Z">
</head>
<body>
<h1>My Blog Post</h1>
<img src="/images/diagram.png" alt="Technical diagram">
<video src="/videos/demo.mp4" poster="/videos/demo-thumb.jpg">
<source src="/videos/demo.mp4" type="video/mp4">
</video>
</body>
</html>
`;
const metadata = parseHtmlExtractSitemapMeta(htmlContent);
console.log(metadata);
// [
// {
// images: [
// {
// loc: 'https://example.com/hero.jpg',
// title: 'My Blog Post'
// },
// {
// loc: '/images/diagram.png',
// caption: 'Technical diagram'
// }
// ],
// videos: [
// {
// title: 'My Blog Post',
// content_loc: '/videos/demo.mp4',
// thumbnail_loc: '/videos/demo-thumb.jpg'
// }
// ],
// lastmod: '2023-12-01T10:00:00Z'
// }
// ]
// Handle parsing errors gracefully
try {
const result = parseSitemapXml(invalidXml);
// Process results
result.urls.forEach(url => {
console.log(`Processing URL: ${url.loc}`);
});
// Handle warnings
if (result.warnings.length > 0) {
console.warn('Parsing warnings:');
result.warnings.forEach(warning => {
console.warn(`- ${warning.type}: ${warning.message}`);
});
}
} catch (error) {
console.error('Failed to parse sitemap XML:', error);
}
// Integration with existing sitemap generation
import { parseSitemapXml, parseHtmlExtractSitemapMeta } from '@nuxtjs/sitemap/utils';
// Parse competitor's sitemap for analysis
const competitorSitemap = await $fetch('https://competitor.com/sitemap.xml');
const parsed = parseSitemapXml(competitorSitemap);
// Use parsed data to inform your sitemap structure
const competitorUrls = parsed.urls.map(url => ({
loc: url.loc.replace('competitor.com', 'mysite.com'),
priority: Math.max(0.1, (url.priority || 0.5) - 0.1) // Slightly lower priority
}));
// Extract metadata from rendered pages for automatic discovery
const pageHtml = await $fetch('https://mysite.com/blog/post-1');
const extractedMeta = parseHtmlExtractSitemapMeta(pageHtml);
// Combine with existing sitemap data
const enrichedUrl = {
loc: '/blog/post-1',
...extractedMeta[0], // Use discovered metadata
priority: 0.8
};XML Sitemap Support
The XML parser supports:
HTML Metadata Discovery
The HTML parser extracts:
Error Handling and Validation
Both parsers provide:
Install with Tessl CLI
npx tessl i tessl/npm-nuxtjs--sitemap