0
# Sitemap Parsing
1
2
Functionality for parsing existing XML sitemaps back into JavaScript objects for analysis and manipulation. These utilities allow you to read and process existing sitemaps from files or streams.
3
4
## Capabilities
5
6
### Sitemap Parsing Functions
7
8
High-level functions for parsing complete sitemaps into arrays of items.
9
10
```typescript { .api }
11
/**
12
* Parse XML sitemap and resolve with array of sitemap items
13
* @param xml - Readable stream containing XML sitemap data
14
* @returns Promise resolving to array of parsed sitemap items
15
*/
16
function parseSitemap(xml: Readable): Promise<SitemapItem[]>;
17
18
/**
19
* Parse XML sitemap index and resolve with array of index items
20
* @param xml - Readable stream containing XML sitemap index data
21
* @returns Promise resolving to array of parsed index items
22
*/
23
function parseSitemapIndex(xml: Readable): Promise<IndexItem[]>;
24
```
25
26
**Usage Examples:**
27
28
```typescript
29
import { parseSitemap, parseSitemapIndex } from "sitemap";
30
import { createReadStream } from "fs";
31
32
// Parse sitemap from file
33
const sitemapItems = await parseSitemap(
34
createReadStream("./sitemap.xml")
35
);
36
37
console.log("Found", sitemapItems.length, "URLs");
38
sitemapItems.forEach(item => {
39
console.log(`${item.url} - ${item.changefreq} - ${item.priority}`);
40
});
41
42
// Parse sitemap index
43
const indexItems = await parseSitemapIndex(
44
createReadStream("./sitemap-index.xml")
45
);
46
47
console.log("Found", indexItems.length, "sitemaps");
48
indexItems.forEach(item => {
49
console.log(`${item.url} - ${item.lastmod}`);
50
});
51
```
52
53
### XMLToSitemapItemStream
54
55
Transform stream for parsing XML sitemap data into individual sitemap items.
56
57
```typescript { .api }
58
/**
59
* Transform stream that converts XML sitemap data into SitemapItem objects
60
* Use this to parse existing sitemaps into config options compatible with this library
61
*/
62
class XMLToSitemapItemStream extends Transform {
63
constructor(opts?: XMLToSitemapItemStreamOptions);
64
65
/** Error handling level */
66
level: ErrorLevel;
67
68
/** Logger function for warnings and errors */
69
logger: Logger;
70
71
/** Current parsing error */
72
error: Error | null;
73
74
/** SAX stream parser */
75
saxStream: SAXStream;
76
}
77
78
interface XMLToSitemapItemStreamOptions extends TransformOptions {
79
/** Error handling level for validation */
80
level?: ErrorLevel;
81
82
/** Custom logger function or false to disable logging */
83
logger?: Logger | false;
84
}
85
86
type Logger = (
87
level: 'warn' | 'error' | 'info' | 'log',
88
...message: Parameters<Console['log']>[0]
89
) => void;
90
```
91
92
**Usage Examples:**
93
94
```typescript
95
import { XMLToSitemapItemStream, ErrorLevel } from "sitemap";
96
import { createReadStream } from "fs";
97
98
// Parse with strict error handling
99
const parser = new XMLToSitemapItemStream({
100
level: ErrorLevel.THROW
101
});
102
103
const sitemapItems: SitemapItem[] = [];
104
105
createReadStream("sitemap.xml")
106
.pipe(parser)
107
.on('data', (item: SitemapItem) => {
108
sitemapItems.push(item);
109
})
110
.on('end', () => {
111
console.log("Parsed", sitemapItems.length, "items");
112
})
113
.on('error', (error) => {
114
console.error("Parse error:", error);
115
});
116
117
// Parse with custom logging
118
const customParser = new XMLToSitemapItemStream({
119
level: ErrorLevel.WARN,
120
logger: (level, ...args) => {
121
console.log(`[${level.toUpperCase()}]`, ...args);
122
}
123
});
124
```
125
126
### XMLToSitemapIndexStream
127
128
Transform stream for parsing XML sitemap index data into index items.
129
130
```typescript { .api }
131
/**
132
* Transform stream that converts XML sitemap index data into IndexItem objects
133
* Use this to parse existing sitemap indices into config options
134
*/
135
class XMLToSitemapIndexStream extends Transform {
136
constructor(opts?: XMLToSitemapIndexItemStreamOptions);
137
138
/** Error handling level */
139
level: ErrorLevel;
140
141
/** Logger function */
142
logger: Logger;
143
144
/** SAX stream parser */
145
saxStream: SAXStream;
146
}
147
148
interface XMLToSitemapIndexItemStreamOptions extends TransformOptions {
149
/** Error handling level for validation */
150
level?: ErrorLevel;
151
152
/** Custom logger function or false to disable logging */
153
logger?: Logger | false;
154
}
155
```
156
157
**Usage Examples:**
158
159
```typescript
160
import { XMLToSitemapIndexStream } from "sitemap";
161
import { createReadStream } from "fs";
162
163
const indexParser = new XMLToSitemapIndexStream();
164
const indexItems: IndexItem[] = [];
165
166
createReadStream("sitemap-index.xml")
167
.pipe(indexParser)
168
.on('data', (item: IndexItem) => {
169
indexItems.push(item);
170
})
171
.on('end', () => {
172
console.log("Found sitemaps:", indexItems.map(i => i.url));
173
});
174
```
175
176
### JSON Conversion Streams
177
178
Transform streams for converting parsed objects to JSON format.
179
180
```typescript { .api }
181
/**
182
* Transform stream that converts sitemap items to JSON format
183
* @param lineSeparated - Whether to separate entries by newline or comma
184
*/
185
class ObjectStreamToJSON extends Transform {
186
constructor(opts?: ObjectStreamToJSONOptions);
187
188
/** Whether to use line-separated JSON */
189
lineSeparated: boolean;
190
191
/** Whether first item has been written */
192
firstWritten: boolean;
193
}
194
195
interface ObjectStreamToJSONOptions extends TransformOptions {
196
/** Whether to separate entries by newline instead of comma */
197
lineSeparated: boolean;
198
}
199
200
/**
201
* Transform stream that converts index items to JSON format
202
*/
203
class IndexObjectStreamToJSON extends Transform {
204
constructor(opts?: IndexObjectStreamToJSONOptions);
205
206
/** Whether to use line-separated JSON */
207
lineSeparated: boolean;
208
209
/** Whether first item has been written */
210
firstWritten: boolean;
211
}
212
213
interface IndexObjectStreamToJSONOptions extends TransformOptions {
214
/** Whether to separate entries by newline instead of comma */
215
lineSeparated: boolean;
216
}
217
```
218
219
**Usage Examples:**
220
221
```typescript
222
import {
223
XMLToSitemapItemStream,
224
ObjectStreamToJSON
225
} from "sitemap";
226
import { createReadStream, createWriteStream } from "fs";
227
228
// Convert sitemap XML to JSON array
229
createReadStream("sitemap.xml")
230
.pipe(new XMLToSitemapItemStream())
231
.pipe(new ObjectStreamToJSON({ lineSeparated: false }))
232
.pipe(createWriteStream("sitemap.json"));
233
234
// Convert to line-separated JSON (JSONL)
235
createReadStream("sitemap.xml")
236
.pipe(new XMLToSitemapItemStream())
237
.pipe(new ObjectStreamToJSON({ lineSeparated: true }))
238
.pipe(createWriteStream("sitemap.jsonl"));
239
```
240
241
## Advanced Parsing Features
242
243
### Supported Sitemap Extensions
244
245
The parser supports all standard sitemap extensions:
246
247
- **Images**: `<image:image>` elements with captions, titles, geo-location
248
- **Videos**: `<video:video>` elements with thumbnails, descriptions, metadata
249
- **News**: `<news:news>` elements with publication data
250
- **Alternate Languages**: `<xhtml:link>` elements for multilingual content
251
- **Mobile**: `<mobile:mobile>` elements (legacy)
252
253
### Error Handling in Parsing
254
255
```typescript
256
import { XMLToSitemapItemStream, ErrorLevel } from "sitemap";
257
258
// Different error handling strategies
259
const silentParser = new XMLToSitemapItemStream({
260
level: ErrorLevel.SILENT // Ignore all validation errors
261
});
262
263
const warningParser = new XMLToSitemapItemStream({
264
level: ErrorLevel.WARN // Log warnings but continue
265
});
266
267
const strictParser = new XMLToSitemapItemStream({
268
level: ErrorLevel.THROW // Throw on any validation error
269
});
270
```
271
272
### Custom Validation During Parsing
273
274
```typescript
275
import { XMLToSitemapItemStream } from "sitemap";
276
277
const parser = new XMLToSitemapItemStream({
278
level: ErrorLevel.WARN,
279
logger: (level, ...args) => {
280
// Custom validation logic
281
if (level === 'warn' && args[0].includes('unhandled')) {
282
console.warn("Found unsupported sitemap element:", ...args);
283
}
284
}
285
});
286
```
287
288
## Complete Parsing Example
289
290
```typescript
291
import {
292
parseSitemap,
293
SitemapStream,
294
streamToPromise
295
} from "sitemap";
296
import { createReadStream } from "fs";
297
298
async function processSitemap() {
299
// Parse existing sitemap
300
const items = await parseSitemap(createReadStream("input.xml"));
301
302
// Filter and modify items
303
const filteredItems = items
304
.filter(item => item.priority && item.priority > 0.5)
305
.map(item => ({
306
...item,
307
lastmod: new Date().toISOString(),
308
changefreq: "weekly" as const
309
}));
310
311
// Generate new sitemap
312
const newSitemap = new SitemapStream({
313
hostname: "https://example.com"
314
});
315
316
filteredItems.forEach(item => newSitemap.write(item));
317
newSitemap.end();
318
319
const xmlBuffer = await streamToPromise(newSitemap);
320
console.log(xmlBuffer.toString());
321
}
322
```