or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

batch.mdcrawling.mdextraction.mdindex.mdmapping.mdmonitoring.mdscraping.mdsearch.mdusage.mdv1-api.md

mapping.mddocs/

0

# Site Mapping

1

2

Discover and map website URLs using sitemaps, crawling techniques, and intelligent URL discovery.

3

4

## Core Mapping Method

5

6

```typescript { .api }

7

/**

8

* Map a site to discover URLs (sitemap-aware)

9

* @param url - Root URL to map

10

* @param options - Mapping configuration options

11

* @returns Promise resolving to discovered links

12

*/

13

map(url: string, options?: MapOptions): Promise<MapData>;

14

```

15

16

## Mapping Configuration

17

18

```typescript { .api }

19

interface MapOptions {

20

// Search/filter discovered URLs

21

search?: string;

22

23

// Sitemap handling

24

sitemap?: "only" | "include" | "skip";

25

26

// Subdomain inclusion

27

includeSubdomains?: boolean;

28

29

// Result limits

30

limit?: number;

31

32

// Operation timeout (milliseconds)

33

timeout?: number;

34

35

// Integration tracking

36

integration?: string;

37

38

// Location configuration

39

location?: LocationConfig;

40

}

41

42

interface LocationConfig {

43

country?: string;

44

languages?: string[];

45

}

46

```

47

48

## Mapping Results

49

50

```typescript { .api }

51

interface MapData {

52

links: SearchResultWeb[];

53

}

54

55

interface SearchResultWeb {

56

url: string;

57

title?: string;

58

description?: string;

59

category?: string;

60

}

61

```

62

63

## Usage Examples

64

65

### Basic Site Mapping

66

67

```typescript

68

// Discover all URLs on a website

69

const mapResult = await app.map('https://example.com');

70

71

console.log(`Found ${mapResult.links.length} URLs:`);

72

mapResult.links.forEach(link => {

73

console.log(`- ${link.url}: ${link.title}`);

74

});

75

```

76

77

### Sitemap-Only Discovery

78

79

```typescript

80

// Use only sitemap.xml for URL discovery

81

const mapResult = await app.map('https://docs.example.com', {

82

sitemap: 'only',

83

limit: 500

84

});

85

86

console.log('URLs from sitemap:', mapResult.links);

87

// Returns only URLs found in sitemap.xml files

88

```

89

90

### Include Subdomains

91

92

```typescript

93

// Map main domain and all subdomains

94

const mapResult = await app.map('https://example.com', {

95

includeSubdomains: true,

96

limit: 1000,

97

timeout: 60000 // 60 seconds

98

});

99

100

// Will discover URLs from:

101

// - https://example.com

102

// - https://www.example.com

103

// - https://blog.example.com

104

// - https://api.example.com

105

// etc.

106

```

107

108

### Filtered URL Discovery

109

110

```typescript

111

// Search for specific types of content

112

const apiDocsMap = await app.map('https://docs.example.com', {

113

search: 'api',

114

sitemap: 'include',

115

limit: 100

116

});

117

118

const tutorialMap = await app.map('https://docs.example.com', {

119

search: 'tutorial guide',

120

limit: 50

121

});

122

123

console.log('API documentation URLs:', apiDocsMap.links);

124

console.log('Tutorial URLs:', tutorialMap.links);

125

```

126

127

### Large Site Mapping

128

129

```typescript

130

// Map a large site with high limits

131

const mapResult = await app.map('https://largewebsite.com', {

132

sitemap: 'include',

133

includeSubdomains: false,

134

limit: 5000,

135

timeout: 300000 // 5 minutes

136

});

137

138

// Organize results by path

139

const pathMap = new Map<string, typeof mapResult.links>();

140

141

mapResult.links.forEach(link => {

142

const url = new URL(link.url);

143

const pathSegment = url.pathname.split('/')[1] || 'root';

144

145

if (!pathMap.has(pathSegment)) {

146

pathMap.set(pathSegment, []);

147

}

148

pathMap.get(pathSegment)!.push(link);

149

});

150

151

console.log('URLs organized by path:');

152

pathMap.forEach((links, path) => {

153

console.log(`/${path}: ${links.length} URLs`);

154

});

155

```

156

157

### Documentation Site Mapping

158

159

```typescript

160

// Map documentation with categorization

161

const docsMap = await app.map('https://docs.example.com', {

162

sitemap: 'include',

163

limit: 1000

164

});

165

166

// Categorize documentation URLs

167

const categories = {

168

api: [] as typeof docsMap.links,

169

guides: [] as typeof docsMap.links,

170

tutorials: [] as typeof docsMap.links,

171

reference: [] as typeof docsMap.links,

172

other: [] as typeof docsMap.links

173

};

174

175

docsMap.links.forEach(link => {

176

const url = link.url.toLowerCase();

177

const title = (link.title || '').toLowerCase();

178

179

if (url.includes('/api/') || title.includes('api')) {

180

categories.api.push(link);

181

} else if (url.includes('/guide/') || title.includes('guide')) {

182

categories.guides.push(link);

183

} else if (url.includes('/tutorial/') || title.includes('tutorial')) {

184

categories.tutorials.push(link);

185

} else if (url.includes('/reference/') || title.includes('reference')) {

186

categories.reference.push(link);

187

} else {

188

categories.other.push(link);

189

}

190

});

191

192

console.log('Documentation categories:', {

193

api: categories.api.length,

194

guides: categories.guides.length,

195

tutorials: categories.tutorials.length,

196

reference: categories.reference.length,

197

other: categories.other.length

198

});

199

```

200

201

### E-commerce Site Mapping

202

203

```typescript

204

// Map product pages and categories

205

const productMap = await app.map('https://shop.example.com', {

206

search: 'product category',

207

includeSubdomains: false,

208

limit: 2000

209

});

210

211

// Filter and organize e-commerce URLs

212

const ecommerceUrls = {

213

products: [] as typeof productMap.links,

214

categories: [] as typeof productMap.links,

215

brands: [] as typeof productMap.links,

216

other: [] as typeof productMap.links

217

};

218

219

productMap.links.forEach(link => {

220

const url = link.url.toLowerCase();

221

222

if (url.includes('/product/') || url.includes('/item/')) {

223

ecommerceUrls.products.push(link);

224

} else if (url.includes('/category/') || url.includes('/collection/')) {

225

ecommerceUrls.categories.push(link);

226

} else if (url.includes('/brand/') || url.includes('/manufacturer/')) {

227

ecommerceUrls.brands.push(link);

228

} else {

229

ecommerceUrls.other.push(link);

230

}

231

});

232

233

console.log('E-commerce site structure:', {

234

totalProducts: ecommerceUrls.products.length,

235

categories: ecommerceUrls.categories.length,

236

brands: ecommerceUrls.brands.length

237

});

238

```

239

240

### Multi-Language Site Mapping

241

242

```typescript

243

// Map a multi-language website

244

const allLanguagesMap = await app.map('https://international.example.com', {

245

includeSubdomains: true,

246

limit: 3000,

247

location: {

248

languages: ['en', 'es', 'fr', 'de']

249

}

250

});

251

252

// Organize by language/locale

253

const languageUrls = new Map<string, typeof allLanguagesMap.links>();

254

255

allLanguagesMap.links.forEach(link => {

256

const url = new URL(link.url);

257

let language = 'unknown';

258

259

// Detect language from subdomain (en.example.com)

260

const subdomain = url.hostname.split('.')[0];

261

if (['en', 'es', 'fr', 'de', 'ja', 'zh'].includes(subdomain)) {

262

language = subdomain;

263

}

264

265

// Detect language from path (/en/, /es/, etc.)

266

const pathLanguage = url.pathname.match(/^\/([a-z]{2})\//)?.[1];

267

if (pathLanguage) {

268

language = pathLanguage;

269

}

270

271

if (!languageUrls.has(language)) {

272

languageUrls.set(language, []);

273

}

274

languageUrls.get(language)!.push(link);

275

});

276

277

console.log('URLs by language:');

278

languageUrls.forEach((links, lang) => {

279

console.log(`${lang}: ${links.length} URLs`);

280

});

281

```

282

283

### Content Audit Mapping

284

285

```typescript

286

// Map for content audit purposes

287

const auditMap = await app.map('https://company.example.com', {

288

sitemap: 'include',

289

includeSubdomains: true,

290

limit: 10000

291

});

292

293

// Analyze URL patterns for content audit

294

const analysis = {

295

totalUrls: auditMap.links.length,

296

httpUrls: 0,

297

httpsUrls: 0,

298

subdomains: new Set<string>(),

299

fileTypes: new Map<string, number>(),

300

pathDepths: new Map<number, number>()

301

};

302

303

auditMap.links.forEach(link => {

304

const url = new URL(link.url);

305

306

// Protocol analysis

307

if (url.protocol === 'http:') analysis.httpUrls++;

308

if (url.protocol === 'https:') analysis.httpsUrls++;

309

310

// Subdomain analysis

311

analysis.subdomains.add(url.hostname);

312

313

// File type analysis

314

const fileExtension = url.pathname.split('.').pop()?.toLowerCase();

315

if (fileExtension && fileExtension.length <= 5) {

316

const count = analysis.fileTypes.get(fileExtension) || 0;

317

analysis.fileTypes.set(fileExtension, count + 1);

318

}

319

320

// Path depth analysis

321

const depth = url.pathname.split('/').filter(segment => segment).length;

322

const depthCount = analysis.pathDepths.get(depth) || 0;

323

analysis.pathDepths.set(depth, depthCount + 1);

324

});

325

326

console.log('Content audit results:', {

327

totalUrls: analysis.totalUrls,

328

securityIssues: analysis.httpUrls > 0 ? `${analysis.httpUrls} non-HTTPS URLs` : 'None',

329

uniqueSubdomains: analysis.subdomains.size,

330

commonFileTypes: Array.from(analysis.fileTypes.entries())

331

.sort(([,a], [,b]) => b - a)

332

.slice(0, 5),

333

averagePathDepth: Array.from(analysis.pathDepths.entries())

334

.reduce((sum, [depth, count]) => sum + depth * count, 0) / analysis.totalUrls

335

});

336

```

337

338

### Competitive Analysis Mapping

339

340

```typescript

341

// Map competitor websites for analysis

342

const competitors = [

343

'https://competitor1.com',

344

'https://competitor2.com',

345

'https://competitor3.com'

346

];

347

348

const competitorMaps = await Promise.all(

349

competitors.map(async (url) => {

350

const mapResult = await app.map(url, {

351

sitemap: 'include',

352

limit: 500,

353

timeout: 30000

354

});

355

356

return {

357

domain: new URL(url).hostname,

358

urlCount: mapResult.links.length,

359

links: mapResult.links

360

};

361

})

362

);

363

364

// Analyze competitor site structures

365

competitorMaps.forEach(({ domain, urlCount, links }) => {

366

const pathAnalysis = new Map<string, number>();

367

368

links.forEach(link => {

369

const path = new URL(link.url).pathname.split('/')[1] || 'root';

370

pathAnalysis.set(path, (pathAnalysis.get(path) || 0) + 1);

371

});

372

373

console.log(`${domain}:`, {

374

totalUrls: urlCount,

375

topSections: Array.from(pathAnalysis.entries())

376

.sort(([,a], [,b]) => b - a)

377

.slice(0, 5)

378

.map(([path, count]) => ({ path, count }))

379

});

380

});

381

```

382

383

### Error Handling and Timeouts

384

385

```typescript

386

try {

387

const mapResult = await app.map('https://example.com', {

388

sitemap: 'include',

389

includeSubdomains: true,

390

limit: 5000,

391

timeout: 120000 // 2 minutes

392

});

393

394

console.log(`Successfully mapped ${mapResult.links.length} URLs`);

395

396

// Check for any issues in results

397

const problematicUrls = mapResult.links.filter(link =>

398

!link.title || link.title.length === 0

399

);

400

401

if (problematicUrls.length > 0) {

402

console.log(`${problematicUrls.length} URLs without titles found`);

403

}

404

405

} catch (error) {

406

console.error('Mapping failed:', error);

407

408

// Fallback to smaller mapping operation

409

try {

410

const fallbackResult = await app.map('https://example.com', {

411

sitemap: 'only',

412

limit: 100,

413

timeout: 30000

414

});

415

console.log(`Fallback mapping found ${fallbackResult.links.length} URLs`);

416

} catch (fallbackError) {

417

console.error('Fallback mapping also failed:', fallbackError);

418

}

419

}

420

```