CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-mendable--firecrawl-js

JavaScript SDK for Firecrawl API that enables comprehensive web scraping, crawling, and data extraction with AI-ready output formats.

Pending
Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

SecuritybySnyk

Pending

The risk profile of this skill

Overview
Eval results
Files

mapping.mddocs/

Site Mapping

Discover and map website URLs using sitemaps, crawling techniques, and intelligent URL discovery.

Core Mapping Method

/**
 * Map a site to discover URLs (sitemap-aware)
 * @param url - Root URL to map
 * @param options - Mapping configuration options
 * @returns Promise resolving to discovered links
 */
map(url: string, options?: MapOptions): Promise<MapData>;

Mapping Configuration

interface MapOptions {
  // Search/filter discovered URLs
  search?: string;
  
  // Sitemap handling
  sitemap?: "only" | "include" | "skip";
  
  // Subdomain inclusion
  includeSubdomains?: boolean;
  
  // Result limits
  limit?: number;
  
  // Operation timeout (milliseconds)
  timeout?: number;
  
  // Integration tracking
  integration?: string;
  
  // Location configuration
  location?: LocationConfig;
}

interface LocationConfig {
  country?: string;
  languages?: string[];
}

Mapping Results

interface MapData {
  links: SearchResultWeb[];
}

interface SearchResultWeb {
  url: string;
  title?: string;
  description?: string;
  category?: string;
}

Usage Examples

Basic Site Mapping

// Discover all URLs on a website
const mapResult = await app.map('https://example.com');

console.log(`Found ${mapResult.links.length} URLs:`);
mapResult.links.forEach(link => {
  console.log(`- ${link.url}: ${link.title}`);
});

Sitemap-Only Discovery

// Use only sitemap.xml for URL discovery
const mapResult = await app.map('https://docs.example.com', {
  sitemap: 'only',
  limit: 500
});

console.log('URLs from sitemap:', mapResult.links);
// Returns only URLs found in sitemap.xml files

Include Subdomains

// Map main domain and all subdomains
const mapResult = await app.map('https://example.com', {
  includeSubdomains: true,
  limit: 1000,
  timeout: 60000 // 60 seconds
});

// Will discover URLs from:
// - https://example.com
// - https://www.example.com  
// - https://blog.example.com
// - https://api.example.com
// etc.

Filtered URL Discovery

// Search for specific types of content
const apiDocsMap = await app.map('https://docs.example.com', {
  search: 'api',
  sitemap: 'include',
  limit: 100
});

const tutorialMap = await app.map('https://docs.example.com', {
  search: 'tutorial guide',
  limit: 50
});

console.log('API documentation URLs:', apiDocsMap.links);
console.log('Tutorial URLs:', tutorialMap.links);

Large Site Mapping

// Map a large site with high limits
const mapResult = await app.map('https://largewebsite.com', {
  sitemap: 'include',
  includeSubdomains: false,
  limit: 5000,
  timeout: 300000 // 5 minutes
});

// Organize results by path
const pathMap = new Map<string, typeof mapResult.links>();

mapResult.links.forEach(link => {
  const url = new URL(link.url);
  const pathSegment = url.pathname.split('/')[1] || 'root';
  
  if (!pathMap.has(pathSegment)) {
    pathMap.set(pathSegment, []);
  }
  pathMap.get(pathSegment)!.push(link);
});

console.log('URLs organized by path:');
pathMap.forEach((links, path) => {
  console.log(`/${path}: ${links.length} URLs`);
});

Documentation Site Mapping

// Map documentation with categorization
const docsMap = await app.map('https://docs.example.com', {
  sitemap: 'include',
  limit: 1000
});

// Categorize documentation URLs
const categories = {
  api: [] as typeof docsMap.links,
  guides: [] as typeof docsMap.links,
  tutorials: [] as typeof docsMap.links,
  reference: [] as typeof docsMap.links,
  other: [] as typeof docsMap.links
};

docsMap.links.forEach(link => {
  const url = link.url.toLowerCase();
  const title = (link.title || '').toLowerCase();
  
  if (url.includes('/api/') || title.includes('api')) {
    categories.api.push(link);
  } else if (url.includes('/guide/') || title.includes('guide')) {
    categories.guides.push(link);
  } else if (url.includes('/tutorial/') || title.includes('tutorial')) {
    categories.tutorials.push(link);
  } else if (url.includes('/reference/') || title.includes('reference')) {
    categories.reference.push(link);
  } else {
    categories.other.push(link);
  }
});

console.log('Documentation categories:', {
  api: categories.api.length,
  guides: categories.guides.length,
  tutorials: categories.tutorials.length,
  reference: categories.reference.length,
  other: categories.other.length
});

E-commerce Site Mapping

// Map product pages and categories
const productMap = await app.map('https://shop.example.com', {
  search: 'product category',
  includeSubdomains: false,
  limit: 2000
});

// Filter and organize e-commerce URLs
const ecommerceUrls = {
  products: [] as typeof productMap.links,
  categories: [] as typeof productMap.links,
  brands: [] as typeof productMap.links,
  other: [] as typeof productMap.links
};

productMap.links.forEach(link => {
  const url = link.url.toLowerCase();
  
  if (url.includes('/product/') || url.includes('/item/')) {
    ecommerceUrls.products.push(link);
  } else if (url.includes('/category/') || url.includes('/collection/')) {
    ecommerceUrls.categories.push(link);
  } else if (url.includes('/brand/') || url.includes('/manufacturer/')) {
    ecommerceUrls.brands.push(link);
  } else {
    ecommerceUrls.other.push(link);
  }
});

console.log('E-commerce site structure:', {
  totalProducts: ecommerceUrls.products.length,
  categories: ecommerceUrls.categories.length,
  brands: ecommerceUrls.brands.length
});

Multi-Language Site Mapping

// Map a multi-language website
const allLanguagesMap = await app.map('https://international.example.com', {
  includeSubdomains: true,
  limit: 3000,
  location: {
    languages: ['en', 'es', 'fr', 'de']
  }
});

// Organize by language/locale
const languageUrls = new Map<string, typeof allLanguagesMap.links>();

allLanguagesMap.links.forEach(link => {
  const url = new URL(link.url);
  let language = 'unknown';
  
  // Detect language from subdomain (en.example.com)
  const subdomain = url.hostname.split('.')[0];
  if (['en', 'es', 'fr', 'de', 'ja', 'zh'].includes(subdomain)) {
    language = subdomain;
  }
  
  // Detect language from path (/en/, /es/, etc.)
  const pathLanguage = url.pathname.match(/^\/([a-z]{2})\//)?.[1];
  if (pathLanguage) {
    language = pathLanguage;
  }
  
  if (!languageUrls.has(language)) {
    languageUrls.set(language, []);
  }
  languageUrls.get(language)!.push(link);
});

console.log('URLs by language:');
languageUrls.forEach((links, lang) => {
  console.log(`${lang}: ${links.length} URLs`);
});

Content Audit Mapping

// Map for content audit purposes
const auditMap = await app.map('https://company.example.com', {
  sitemap: 'include',
  includeSubdomains: true,
  limit: 10000
});

// Analyze URL patterns for content audit
const analysis = {
  totalUrls: auditMap.links.length,
  httpUrls: 0,
  httpsUrls: 0,
  subdomains: new Set<string>(),
  fileTypes: new Map<string, number>(),
  pathDepths: new Map<number, number>()
};

auditMap.links.forEach(link => {
  const url = new URL(link.url);
  
  // Protocol analysis
  if (url.protocol === 'http:') analysis.httpUrls++;
  if (url.protocol === 'https:') analysis.httpsUrls++;
  
  // Subdomain analysis
  analysis.subdomains.add(url.hostname);
  
  // File type analysis
  const fileExtension = url.pathname.split('.').pop()?.toLowerCase();
  if (fileExtension && fileExtension.length <= 5) {
    const count = analysis.fileTypes.get(fileExtension) || 0;
    analysis.fileTypes.set(fileExtension, count + 1);
  }
  
  // Path depth analysis
  const depth = url.pathname.split('/').filter(segment => segment).length;
  const depthCount = analysis.pathDepths.get(depth) || 0;
  analysis.pathDepths.set(depth, depthCount + 1);
});

console.log('Content audit results:', {
  totalUrls: analysis.totalUrls,
  securityIssues: analysis.httpUrls > 0 ? `${analysis.httpUrls} non-HTTPS URLs` : 'None',
  uniqueSubdomains: analysis.subdomains.size,
  commonFileTypes: Array.from(analysis.fileTypes.entries())
    .sort(([,a], [,b]) => b - a)
    .slice(0, 5),
  averagePathDepth: Array.from(analysis.pathDepths.entries())
    .reduce((sum, [depth, count]) => sum + depth * count, 0) / analysis.totalUrls
});

Competitive Analysis Mapping

// Map competitor websites for analysis
const competitors = [
  'https://competitor1.com',
  'https://competitor2.com',
  'https://competitor3.com'
];

const competitorMaps = await Promise.all(
  competitors.map(async (url) => {
    const mapResult = await app.map(url, {
      sitemap: 'include',
      limit: 500,
      timeout: 30000
    });
    
    return {
      domain: new URL(url).hostname,
      urlCount: mapResult.links.length,
      links: mapResult.links
    };
  })
);

// Analyze competitor site structures
competitorMaps.forEach(({ domain, urlCount, links }) => {
  const pathAnalysis = new Map<string, number>();
  
  links.forEach(link => {
    const path = new URL(link.url).pathname.split('/')[1] || 'root';
    pathAnalysis.set(path, (pathAnalysis.get(path) || 0) + 1);
  });
  
  console.log(`${domain}:`, {
    totalUrls: urlCount,
    topSections: Array.from(pathAnalysis.entries())
      .sort(([,a], [,b]) => b - a)
      .slice(0, 5)
      .map(([path, count]) => ({ path, count }))
  });
});

Error Handling and Timeouts

try {
  const mapResult = await app.map('https://example.com', {
    sitemap: 'include',
    includeSubdomains: true,
    limit: 5000,
    timeout: 120000 // 2 minutes
  });
  
  console.log(`Successfully mapped ${mapResult.links.length} URLs`);
  
  // Check for any issues in results
  const problematicUrls = mapResult.links.filter(link => 
    !link.title || link.title.length === 0
  );
  
  if (problematicUrls.length > 0) {
    console.log(`${problematicUrls.length} URLs without titles found`);
  }
  
} catch (error) {
  console.error('Mapping failed:', error);
  
  // Fallback to smaller mapping operation
  try {
    const fallbackResult = await app.map('https://example.com', {
      sitemap: 'only',
      limit: 100,
      timeout: 30000
    });
    console.log(`Fallback mapping found ${fallbackResult.links.length} URLs`);
  } catch (fallbackError) {
    console.error('Fallback mapping also failed:', fallbackError);
  }
}

docs

batch.md

crawling.md

extraction.md

index.md

mapping.md

monitoring.md

scraping.md

search.md

usage.md

v1-api.md

tile.json