JavaScript SDK for Firecrawl API that enables comprehensive web scraping, crawling, and data extraction with AI-ready output formats.
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
Discover and map website URLs using sitemaps, crawling techniques, and intelligent URL discovery.
/**
* Map a site to discover URLs (sitemap-aware)
* @param url - Root URL to map
* @param options - Mapping configuration options
* @returns Promise resolving to discovered links
*/
map(url: string, options?: MapOptions): Promise<MapData>;interface MapOptions {
// Search/filter discovered URLs
search?: string;
// Sitemap handling
sitemap?: "only" | "include" | "skip";
// Subdomain inclusion
includeSubdomains?: boolean;
// Result limits
limit?: number;
// Operation timeout (milliseconds)
timeout?: number;
// Integration tracking
integration?: string;
// Location configuration
location?: LocationConfig;
}
interface LocationConfig {
country?: string;
languages?: string[];
}interface MapData {
links: SearchResultWeb[];
}
interface SearchResultWeb {
url: string;
title?: string;
description?: string;
category?: string;
}// Discover all URLs on a website
const mapResult = await app.map('https://example.com');
console.log(`Found ${mapResult.links.length} URLs:`);
mapResult.links.forEach(link => {
console.log(`- ${link.url}: ${link.title}`);
});// Use only sitemap.xml for URL discovery
const mapResult = await app.map('https://docs.example.com', {
sitemap: 'only',
limit: 500
});
console.log('URLs from sitemap:', mapResult.links);
// Returns only URLs found in sitemap.xml files// Map main domain and all subdomains
const mapResult = await app.map('https://example.com', {
includeSubdomains: true,
limit: 1000,
timeout: 60000 // 60 seconds
});
// Will discover URLs from:
// - https://example.com
// - https://www.example.com
// - https://blog.example.com
// - https://api.example.com
// etc.// Search for specific types of content
const apiDocsMap = await app.map('https://docs.example.com', {
search: 'api',
sitemap: 'include',
limit: 100
});
const tutorialMap = await app.map('https://docs.example.com', {
search: 'tutorial guide',
limit: 50
});
console.log('API documentation URLs:', apiDocsMap.links);
console.log('Tutorial URLs:', tutorialMap.links);// Map a large site with high limits
const mapResult = await app.map('https://largewebsite.com', {
sitemap: 'include',
includeSubdomains: false,
limit: 5000,
timeout: 300000 // 5 minutes
});
// Organize results by path
const pathMap = new Map<string, typeof mapResult.links>();
mapResult.links.forEach(link => {
const url = new URL(link.url);
const pathSegment = url.pathname.split('/')[1] || 'root';
if (!pathMap.has(pathSegment)) {
pathMap.set(pathSegment, []);
}
pathMap.get(pathSegment)!.push(link);
});
console.log('URLs organized by path:');
pathMap.forEach((links, path) => {
console.log(`/${path}: ${links.length} URLs`);
});// Map documentation with categorization
const docsMap = await app.map('https://docs.example.com', {
sitemap: 'include',
limit: 1000
});
// Categorize documentation URLs
const categories = {
api: [] as typeof docsMap.links,
guides: [] as typeof docsMap.links,
tutorials: [] as typeof docsMap.links,
reference: [] as typeof docsMap.links,
other: [] as typeof docsMap.links
};
docsMap.links.forEach(link => {
const url = link.url.toLowerCase();
const title = (link.title || '').toLowerCase();
if (url.includes('/api/') || title.includes('api')) {
categories.api.push(link);
} else if (url.includes('/guide/') || title.includes('guide')) {
categories.guides.push(link);
} else if (url.includes('/tutorial/') || title.includes('tutorial')) {
categories.tutorials.push(link);
} else if (url.includes('/reference/') || title.includes('reference')) {
categories.reference.push(link);
} else {
categories.other.push(link);
}
});
console.log('Documentation categories:', {
api: categories.api.length,
guides: categories.guides.length,
tutorials: categories.tutorials.length,
reference: categories.reference.length,
other: categories.other.length
});// Map product pages and categories
const productMap = await app.map('https://shop.example.com', {
search: 'product category',
includeSubdomains: false,
limit: 2000
});
// Filter and organize e-commerce URLs
const ecommerceUrls = {
products: [] as typeof productMap.links,
categories: [] as typeof productMap.links,
brands: [] as typeof productMap.links,
other: [] as typeof productMap.links
};
productMap.links.forEach(link => {
const url = link.url.toLowerCase();
if (url.includes('/product/') || url.includes('/item/')) {
ecommerceUrls.products.push(link);
} else if (url.includes('/category/') || url.includes('/collection/')) {
ecommerceUrls.categories.push(link);
} else if (url.includes('/brand/') || url.includes('/manufacturer/')) {
ecommerceUrls.brands.push(link);
} else {
ecommerceUrls.other.push(link);
}
});
console.log('E-commerce site structure:', {
totalProducts: ecommerceUrls.products.length,
categories: ecommerceUrls.categories.length,
brands: ecommerceUrls.brands.length
});// Map a multi-language website
const allLanguagesMap = await app.map('https://international.example.com', {
includeSubdomains: true,
limit: 3000,
location: {
languages: ['en', 'es', 'fr', 'de']
}
});
// Organize by language/locale
const languageUrls = new Map<string, typeof allLanguagesMap.links>();
allLanguagesMap.links.forEach(link => {
const url = new URL(link.url);
let language = 'unknown';
// Detect language from subdomain (en.example.com)
const subdomain = url.hostname.split('.')[0];
if (['en', 'es', 'fr', 'de', 'ja', 'zh'].includes(subdomain)) {
language = subdomain;
}
// Detect language from path (/en/, /es/, etc.)
const pathLanguage = url.pathname.match(/^\/([a-z]{2})\//)?.[1];
if (pathLanguage) {
language = pathLanguage;
}
if (!languageUrls.has(language)) {
languageUrls.set(language, []);
}
languageUrls.get(language)!.push(link);
});
console.log('URLs by language:');
languageUrls.forEach((links, lang) => {
console.log(`${lang}: ${links.length} URLs`);
});// Map for content audit purposes
const auditMap = await app.map('https://company.example.com', {
sitemap: 'include',
includeSubdomains: true,
limit: 10000
});
// Analyze URL patterns for content audit
const analysis = {
totalUrls: auditMap.links.length,
httpUrls: 0,
httpsUrls: 0,
subdomains: new Set<string>(),
fileTypes: new Map<string, number>(),
pathDepths: new Map<number, number>()
};
auditMap.links.forEach(link => {
const url = new URL(link.url);
// Protocol analysis
if (url.protocol === 'http:') analysis.httpUrls++;
if (url.protocol === 'https:') analysis.httpsUrls++;
// Subdomain analysis
analysis.subdomains.add(url.hostname);
// File type analysis
const fileExtension = url.pathname.split('.').pop()?.toLowerCase();
if (fileExtension && fileExtension.length <= 5) {
const count = analysis.fileTypes.get(fileExtension) || 0;
analysis.fileTypes.set(fileExtension, count + 1);
}
// Path depth analysis
const depth = url.pathname.split('/').filter(segment => segment).length;
const depthCount = analysis.pathDepths.get(depth) || 0;
analysis.pathDepths.set(depth, depthCount + 1);
});
console.log('Content audit results:', {
totalUrls: analysis.totalUrls,
securityIssues: analysis.httpUrls > 0 ? `${analysis.httpUrls} non-HTTPS URLs` : 'None',
uniqueSubdomains: analysis.subdomains.size,
commonFileTypes: Array.from(analysis.fileTypes.entries())
.sort(([,a], [,b]) => b - a)
.slice(0, 5),
averagePathDepth: Array.from(analysis.pathDepths.entries())
.reduce((sum, [depth, count]) => sum + depth * count, 0) / analysis.totalUrls
});// Map competitor websites for analysis
const competitors = [
'https://competitor1.com',
'https://competitor2.com',
'https://competitor3.com'
];
const competitorMaps = await Promise.all(
competitors.map(async (url) => {
const mapResult = await app.map(url, {
sitemap: 'include',
limit: 500,
timeout: 30000
});
return {
domain: new URL(url).hostname,
urlCount: mapResult.links.length,
links: mapResult.links
};
})
);
// Analyze competitor site structures
competitorMaps.forEach(({ domain, urlCount, links }) => {
const pathAnalysis = new Map<string, number>();
links.forEach(link => {
const path = new URL(link.url).pathname.split('/')[1] || 'root';
pathAnalysis.set(path, (pathAnalysis.get(path) || 0) + 1);
});
console.log(`${domain}:`, {
totalUrls: urlCount,
topSections: Array.from(pathAnalysis.entries())
.sort(([,a], [,b]) => b - a)
.slice(0, 5)
.map(([path, count]) => ({ path, count }))
});
});try {
const mapResult = await app.map('https://example.com', {
sitemap: 'include',
includeSubdomains: true,
limit: 5000,
timeout: 120000 // 2 minutes
});
console.log(`Successfully mapped ${mapResult.links.length} URLs`);
// Check for any issues in results
const problematicUrls = mapResult.links.filter(link =>
!link.title || link.title.length === 0
);
if (problematicUrls.length > 0) {
console.log(`${problematicUrls.length} URLs without titles found`);
}
} catch (error) {
console.error('Mapping failed:', error);
// Fallback to smaller mapping operation
try {
const fallbackResult = await app.map('https://example.com', {
sitemap: 'only',
limit: 100,
timeout: 30000
});
console.log(`Fallback mapping found ${fallbackResult.links.length} URLs`);
} catch (fallbackError) {
console.error('Fallback mapping also failed:', fallbackError);
}
}