0
# Site Mapping
1
2
Discover and map website URLs using sitemaps, crawling techniques, and intelligent URL discovery.
3
4
## Core Mapping Method
5
6
```typescript { .api }
7
/**
8
* Map a site to discover URLs (sitemap-aware)
9
* @param url - Root URL to map
10
* @param options - Mapping configuration options
11
* @returns Promise resolving to discovered links
12
*/
13
map(url: string, options?: MapOptions): Promise<MapData>;
14
```
15
16
## Mapping Configuration
17
18
```typescript { .api }
19
interface MapOptions {
20
// Search/filter discovered URLs
21
search?: string;
22
23
// Sitemap handling
24
sitemap?: "only" | "include" | "skip";
25
26
// Subdomain inclusion
27
includeSubdomains?: boolean;
28
29
// Result limits
30
limit?: number;
31
32
// Operation timeout (milliseconds)
33
timeout?: number;
34
35
// Integration tracking
36
integration?: string;
37
38
// Location configuration
39
location?: LocationConfig;
40
}
41
42
interface LocationConfig {
43
country?: string;
44
languages?: string[];
45
}
46
```
47
48
## Mapping Results
49
50
```typescript { .api }
51
interface MapData {
52
links: SearchResultWeb[];
53
}
54
55
interface SearchResultWeb {
56
url: string;
57
title?: string;
58
description?: string;
59
category?: string;
60
}
61
```
62
63
## Usage Examples
64
65
### Basic Site Mapping
66
67
```typescript
68
// Discover all URLs on a website
69
const mapResult = await app.map('https://example.com');
70
71
console.log(`Found ${mapResult.links.length} URLs:`);
72
mapResult.links.forEach(link => {
73
console.log(`- ${link.url}: ${link.title}`);
74
});
75
```
76
77
### Sitemap-Only Discovery
78
79
```typescript
80
// Use only sitemap.xml for URL discovery
81
const mapResult = await app.map('https://docs.example.com', {
82
sitemap: 'only',
83
limit: 500
84
});
85
86
console.log('URLs from sitemap:', mapResult.links);
87
// Returns only URLs found in sitemap.xml files
88
```
89
90
### Include Subdomains
91
92
```typescript
93
// Map main domain and all subdomains
94
const mapResult = await app.map('https://example.com', {
95
includeSubdomains: true,
96
limit: 1000,
97
timeout: 60000 // 60 seconds
98
});
99
100
// Will discover URLs from:
101
// - https://example.com
102
// - https://www.example.com
103
// - https://blog.example.com
104
// - https://api.example.com
105
// etc.
106
```
107
108
### Filtered URL Discovery
109
110
```typescript
111
// Search for specific types of content
112
const apiDocsMap = await app.map('https://docs.example.com', {
113
search: 'api',
114
sitemap: 'include',
115
limit: 100
116
});
117
118
const tutorialMap = await app.map('https://docs.example.com', {
119
search: 'tutorial guide',
120
limit: 50
121
});
122
123
console.log('API documentation URLs:', apiDocsMap.links);
124
console.log('Tutorial URLs:', tutorialMap.links);
125
```
126
127
### Large Site Mapping
128
129
```typescript
130
// Map a large site with high limits
131
const mapResult = await app.map('https://largewebsite.com', {
132
sitemap: 'include',
133
includeSubdomains: false,
134
limit: 5000,
135
timeout: 300000 // 5 minutes
136
});
137
138
// Organize results by path
139
const pathMap = new Map<string, typeof mapResult.links>();
140
141
mapResult.links.forEach(link => {
142
const url = new URL(link.url);
143
const pathSegment = url.pathname.split('/')[1] || 'root';
144
145
if (!pathMap.has(pathSegment)) {
146
pathMap.set(pathSegment, []);
147
}
148
pathMap.get(pathSegment)!.push(link);
149
});
150
151
console.log('URLs organized by path:');
152
pathMap.forEach((links, path) => {
153
console.log(`/${path}: ${links.length} URLs`);
154
});
155
```
156
157
### Documentation Site Mapping
158
159
```typescript
160
// Map documentation with categorization
161
const docsMap = await app.map('https://docs.example.com', {
162
sitemap: 'include',
163
limit: 1000
164
});
165
166
// Categorize documentation URLs
167
const categories = {
168
api: [] as typeof docsMap.links,
169
guides: [] as typeof docsMap.links,
170
tutorials: [] as typeof docsMap.links,
171
reference: [] as typeof docsMap.links,
172
other: [] as typeof docsMap.links
173
};
174
175
docsMap.links.forEach(link => {
176
const url = link.url.toLowerCase();
177
const title = (link.title || '').toLowerCase();
178
179
if (url.includes('/api/') || title.includes('api')) {
180
categories.api.push(link);
181
} else if (url.includes('/guide/') || title.includes('guide')) {
182
categories.guides.push(link);
183
} else if (url.includes('/tutorial/') || title.includes('tutorial')) {
184
categories.tutorials.push(link);
185
} else if (url.includes('/reference/') || title.includes('reference')) {
186
categories.reference.push(link);
187
} else {
188
categories.other.push(link);
189
}
190
});
191
192
console.log('Documentation categories:', {
193
api: categories.api.length,
194
guides: categories.guides.length,
195
tutorials: categories.tutorials.length,
196
reference: categories.reference.length,
197
other: categories.other.length
198
});
199
```
200
201
### E-commerce Site Mapping
202
203
```typescript
204
// Map product pages and categories
205
const productMap = await app.map('https://shop.example.com', {
206
search: 'product category',
207
includeSubdomains: false,
208
limit: 2000
209
});
210
211
// Filter and organize e-commerce URLs
212
const ecommerceUrls = {
213
products: [] as typeof productMap.links,
214
categories: [] as typeof productMap.links,
215
brands: [] as typeof productMap.links,
216
other: [] as typeof productMap.links
217
};
218
219
productMap.links.forEach(link => {
220
const url = link.url.toLowerCase();
221
222
if (url.includes('/product/') || url.includes('/item/')) {
223
ecommerceUrls.products.push(link);
224
} else if (url.includes('/category/') || url.includes('/collection/')) {
225
ecommerceUrls.categories.push(link);
226
} else if (url.includes('/brand/') || url.includes('/manufacturer/')) {
227
ecommerceUrls.brands.push(link);
228
} else {
229
ecommerceUrls.other.push(link);
230
}
231
});
232
233
console.log('E-commerce site structure:', {
234
totalProducts: ecommerceUrls.products.length,
235
categories: ecommerceUrls.categories.length,
236
brands: ecommerceUrls.brands.length
237
});
238
```
239
240
### Multi-Language Site Mapping
241
242
```typescript
243
// Map a multi-language website
244
const allLanguagesMap = await app.map('https://international.example.com', {
245
includeSubdomains: true,
246
limit: 3000,
247
location: {
248
languages: ['en', 'es', 'fr', 'de']
249
}
250
});
251
252
// Organize by language/locale
253
const languageUrls = new Map<string, typeof allLanguagesMap.links>();
254
255
allLanguagesMap.links.forEach(link => {
256
const url = new URL(link.url);
257
let language = 'unknown';
258
259
// Detect language from subdomain (en.example.com)
260
const subdomain = url.hostname.split('.')[0];
261
if (['en', 'es', 'fr', 'de', 'ja', 'zh'].includes(subdomain)) {
262
language = subdomain;
263
}
264
265
// Detect language from path (/en/, /es/, etc.)
266
const pathLanguage = url.pathname.match(/^\/([a-z]{2})\//)?.[1];
267
if (pathLanguage) {
268
language = pathLanguage;
269
}
270
271
if (!languageUrls.has(language)) {
272
languageUrls.set(language, []);
273
}
274
languageUrls.get(language)!.push(link);
275
});
276
277
console.log('URLs by language:');
278
languageUrls.forEach((links, lang) => {
279
console.log(`${lang}: ${links.length} URLs`);
280
});
281
```
282
283
### Content Audit Mapping
284
285
```typescript
286
// Map for content audit purposes
287
const auditMap = await app.map('https://company.example.com', {
288
sitemap: 'include',
289
includeSubdomains: true,
290
limit: 10000
291
});
292
293
// Analyze URL patterns for content audit
294
const analysis = {
295
totalUrls: auditMap.links.length,
296
httpUrls: 0,
297
httpsUrls: 0,
298
subdomains: new Set<string>(),
299
fileTypes: new Map<string, number>(),
300
pathDepths: new Map<number, number>()
301
};
302
303
auditMap.links.forEach(link => {
304
const url = new URL(link.url);
305
306
// Protocol analysis
307
if (url.protocol === 'http:') analysis.httpUrls++;
308
if (url.protocol === 'https:') analysis.httpsUrls++;
309
310
// Subdomain analysis
311
analysis.subdomains.add(url.hostname);
312
313
// File type analysis
314
const fileExtension = url.pathname.split('.').pop()?.toLowerCase();
315
if (fileExtension && fileExtension.length <= 5) {
316
const count = analysis.fileTypes.get(fileExtension) || 0;
317
analysis.fileTypes.set(fileExtension, count + 1);
318
}
319
320
// Path depth analysis
321
const depth = url.pathname.split('/').filter(segment => segment).length;
322
const depthCount = analysis.pathDepths.get(depth) || 0;
323
analysis.pathDepths.set(depth, depthCount + 1);
324
});
325
326
console.log('Content audit results:', {
327
totalUrls: analysis.totalUrls,
328
securityIssues: analysis.httpUrls > 0 ? `${analysis.httpUrls} non-HTTPS URLs` : 'None',
329
uniqueSubdomains: analysis.subdomains.size,
330
commonFileTypes: Array.from(analysis.fileTypes.entries())
331
.sort(([,a], [,b]) => b - a)
332
.slice(0, 5),
333
averagePathDepth: Array.from(analysis.pathDepths.entries())
334
.reduce((sum, [depth, count]) => sum + depth * count, 0) / analysis.totalUrls
335
});
336
```
337
338
### Competitive Analysis Mapping
339
340
```typescript
341
// Map competitor websites for analysis
342
const competitors = [
343
'https://competitor1.com',
344
'https://competitor2.com',
345
'https://competitor3.com'
346
];
347
348
const competitorMaps = await Promise.all(
349
competitors.map(async (url) => {
350
const mapResult = await app.map(url, {
351
sitemap: 'include',
352
limit: 500,
353
timeout: 30000
354
});
355
356
return {
357
domain: new URL(url).hostname,
358
urlCount: mapResult.links.length,
359
links: mapResult.links
360
};
361
})
362
);
363
364
// Analyze competitor site structures
365
competitorMaps.forEach(({ domain, urlCount, links }) => {
366
const pathAnalysis = new Map<string, number>();
367
368
links.forEach(link => {
369
const path = new URL(link.url).pathname.split('/')[1] || 'root';
370
pathAnalysis.set(path, (pathAnalysis.get(path) || 0) + 1);
371
});
372
373
console.log(`${domain}:`, {
374
totalUrls: urlCount,
375
topSections: Array.from(pathAnalysis.entries())
376
.sort(([,a], [,b]) => b - a)
377
.slice(0, 5)
378
.map(([path, count]) => ({ path, count }))
379
});
380
});
381
```
382
383
### Error Handling and Timeouts
384
385
```typescript
386
try {
387
const mapResult = await app.map('https://example.com', {
388
sitemap: 'include',
389
includeSubdomains: true,
390
limit: 5000,
391
timeout: 120000 // 2 minutes
392
});
393
394
console.log(`Successfully mapped ${mapResult.links.length} URLs`);
395
396
// Check for any issues in results
397
const problematicUrls = mapResult.links.filter(link =>
398
!link.title || link.title.length === 0
399
);
400
401
if (problematicUrls.length > 0) {
402
console.log(`${problematicUrls.length} URLs without titles found`);
403
}
404
405
} catch (error) {
406
console.error('Mapping failed:', error);
407
408
// Fallback to smaller mapping operation
409
try {
410
const fallbackResult = await app.map('https://example.com', {
411
sitemap: 'only',
412
limit: 100,
413
timeout: 30000
414
});
415
console.log(`Fallback mapping found ${fallbackResult.links.length} URLs`);
416
} catch (fallbackError) {
417
console.error('Fallback mapping also failed:', fallbackError);
418
}
419
}
420
```