CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-mendable--firecrawl-js

JavaScript SDK for Firecrawl API that enables comprehensive web scraping, crawling, and data extraction with AI-ready output formats.

Pending
Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

SecuritybySnyk

Pending

The risk profile of this skill

Overview
Eval results
Files

search.mddocs/

Search

Web search with optional result scraping across different sources (web, news, images) with advanced filtering.

Core Search Method

/**
 * Search the web and optionally scrape each result
 * @param query - Search query string
 * @param req - Additional search options
 * @returns Promise resolving to structured search results
 */
search(query: string, req?: Omit<SearchRequest, "query">): Promise<SearchData>;

Search Configuration

interface SearchRequest {
  query: string;
  
  // Search sources
  sources?: Array<"web" | "news" | "images" | { type: "web" | "news" | "images" }>;
  
  // Search categories
  categories?: Array<"github" | "research" | CategoryOption>;
  
  // Result limits
  limit?: number;
  
  // Search parameters
  tbs?: string;
  location?: string;
  ignoreInvalidURLs?: boolean;
  timeout?: number; // milliseconds
  
  // Content scraping
  scrapeOptions?: ScrapeOptions;
  
  // Integration tracking
  integration?: string;
}

interface CategoryOption {
  type: "github" | "research";
}

Search Results

interface SearchData {
  web?: Array<SearchResultWeb | Document>;
  news?: Array<SearchResultNews | Document>;
  images?: Array<SearchResultImages | Document>;
}

// Web search result
interface SearchResultWeb {
  url: string;
  title?: string;
  description?: string;
  category?: string;
}

// News search result  
interface SearchResultNews {
  title?: string;
  url?: string;
  snippet?: string;
  date?: string;
  imageUrl?: string;
  position?: number;
  category?: string;
}

// Image search result
interface SearchResultImages {
  title?: string;
  imageUrl?: string;
  imageWidth?: number;
  imageHeight?: number;
  url?: string;
  position?: number;
}

Usage Examples

Basic Web Search

// Simple web search
const results = await app.search('artificial intelligence trends 2024');

console.log('Web results:', results.web);
// Array of SearchResultWeb objects with url, title, description

Multi-Source Search

// Search across web, news, and images
const results = await app.search('climate change', {
  sources: ['web', 'news', 'images'],
  limit: 20
});

console.log('Web results:', results.web?.length);
console.log('News results:', results.news?.length);
console.log('Image results:', results.images?.length);

Search with Content Scraping

// Search and scrape the content of each result
const results = await app.search('best javascript frameworks 2024', {
  sources: ['web'],
  limit: 10,
  scrapeOptions: {
    formats: ['markdown', 'json'],
    onlyMainContent: true
  }
});

// Results now include full scraped content
for (const result of results.web || []) {
  if ('markdown' in result) {
    console.log('Scraped content:', result.markdown);
  }
}

Structured Data Extraction from Search

import { z } from 'zod';

const ArticleSchema = z.object({
  title: z.string(),
  author: z.string().optional(),
  publishDate: z.string().optional(),
  mainPoints: z.array(z.string()),
  conclusion: z.string().optional()
});

const results = await app.search('machine learning research 2024', {
  sources: ['web'],
  limit: 15,
  scrapeOptions: {
    formats: [{
      type: 'json',
      schema: ArticleSchema
    }],
    onlyMainContent: true
  }
});

// Extract structured data from search results
for (const result of results.web || []) {
  if ('json' in result && result.json) {
    console.log('Structured article data:', result.json);
  }
}

GitHub Code Search

// Search for code repositories and projects
const results = await app.search('react typescript authentication', {
  categories: ['github'],
  limit: 20
});

console.log('GitHub results:', results.web);
// Returns relevant GitHub repositories and code examples

Research-Focused Search

// Search academic and research content
const results = await app.search('quantum computing algorithms', {
  categories: ['research'],
  sources: ['web'],
  limit: 25,
  scrapeOptions: {
    formats: ['markdown'],
    onlyMainContent: true
  }
});

// Get research papers and academic content
for (const result of results.web || []) {
  if ('markdown' in result) {
    console.log(`Research content from ${result.metadata?.sourceURL}:`, 
                result.markdown?.substring(0, 500) + '...');
  }
}

News Search with Date Filtering

// Search recent news with time-based filtering
const results = await app.search('cybersecurity breaches', {
  sources: ['news'],
  tbs: 'qdr:m', // Past month
  limit: 30,
  scrapeOptions: {
    formats: ['markdown', {
      type: 'json',
      schema: {
        type: 'object',
        properties: {
          headline: { type: 'string' },
          summary: { type: 'string' },
          source: { type: 'string' },
          publishDate: { type: 'string' },
          impact: { type: 'string' },
          affectedCompanies: { 
            type: 'array', 
            items: { type: 'string' } 
          }
        }
      }
    }]
  }
});

console.log('Recent cybersecurity news:', results.news);

Location-Based Search

// Search with geographic targeting
const results = await app.search('local tech meetups', {
  sources: ['web'],
  location: 'San Francisco, CA',
  limit: 15,
  scrapeOptions: {
    formats: ['markdown'],
    location: {
      country: 'US',
      languages: ['en']
    }
  }
});

Image Search

// Search for images with metadata
const results = await app.search('sustainable architecture designs', {
  sources: ['images'],
  limit: 50
});

// Process image results
for (const image of results.images || []) {
  console.log({
    title: image.title,
    imageUrl: image.imageUrl,
    dimensions: `${image.imageWidth}x${image.imageHeight}`,
    sourceUrl: image.url
  });
}

Advanced Search Configuration

const results = await app.search('enterprise software security', {
  sources: [
    { type: 'web' },
    { type: 'news' }
  ],
  categories: ['research'],
  limit: 40,
  tbs: 'qdr:y', // Past year
  location: 'United States',
  timeout: 60000, // 60 seconds
  ignoreInvalidURLs: true,
  scrapeOptions: {
    formats: ['markdown', 'links'],
    headers: {
      'User-Agent': 'Research Bot 1.0'
    },
    onlyMainContent: true,
    blockAds: true,
    timeout: 30000
  }
});

Error Handling

try {
  const results = await app.search('complex search query', {
    sources: ['web', 'news'],
    limit: 100,
    scrapeOptions: {
      formats: ['markdown']
    },
    timeout: 30000
  });
  
  // Check for any failed scrapes in results
  for (const result of results.web || []) {
    if ('metadata' in result && result.metadata?.error) {
      console.log(`Failed to scrape ${result.metadata.sourceURL}: ${result.metadata.error}`);
    }
  }
  
} catch (error) {
  console.error('Search failed:', error);
}

Combining Search Sources

// Search across all sources and combine results
const webResults = await app.search('renewable energy innovation', {
  sources: ['web'],
  limit: 20,
  scrapeOptions: { formats: ['markdown'] }
});

const newsResults = await app.search('renewable energy innovation', {
  sources: ['news'],
  limit: 15,
  tbs: 'qdr:m' // Recent news only
});

const imageResults = await app.search('renewable energy innovation', {
  sources: ['images'],
  limit: 10
});

// Process combined results
console.log('Comprehensive search results:', {
  webPages: webResults.web?.length || 0,
  newsArticles: newsResults.news?.length || 0,
  images: imageResults.images?.length || 0
});

Search Result Processing

const results = await app.search('artificial intelligence ethics', {
  sources: ['web', 'news'],
  limit: 30,
  scrapeOptions: {
    formats: ['markdown', 'links']
  }
});

// Process and analyze results
const processedResults = {
  webSites: new Set(),
  totalLinks: 0,
  contentLength: 0
};

for (const result of [...(results.web || []), ...(results.news || [])]) {
  if ('metadata' in result && result.metadata?.sourceURL) {
    const domain = new URL(result.metadata.sourceURL).hostname;
    processedResults.webSites.add(domain);
  }
  
  if ('links' in result && result.links) {
    processedResults.totalLinks += result.links.length;
  }
  
  if ('markdown' in result && result.markdown) {
    processedResults.contentLength += result.markdown.length;
  }
}

console.log('Analysis:', {
  uniqueDomains: processedResults.webSites.size,
  totalLinks: processedResults.totalLinks,
  totalContentLength: processedResults.contentLength
});

docs

batch.md

crawling.md

extraction.md

index.md

mapping.md

monitoring.md

scraping.md

search.md

usage.md

v1-api.md

tile.json