tessl/npm-mendable--firecrawl-js

JavaScript SDK for Firecrawl API that enables comprehensive web scraping, crawling, and data extraction with AI-ready output formats.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Securityby

Pending

The risk profile of this skill

Overview

Eval results

Files

Search

Name: tessl/npm-mendable--firecrawl-js
Author: tessl

Web search with optional result scraping across different sources (web, news, images) with advanced filtering.

Core Search Method

/**
 * Search the web and optionally scrape each result
 * @param query - Search query string
 * @param req - Additional search options
 * @returns Promise resolving to structured search results
 */
search(query: string, req?: Omit<SearchRequest, "query">): Promise<SearchData>;

Search Configuration

interface SearchRequest {
  query: string;
  
  // Search sources
  sources?: Array<"web" | "news" | "images" | { type: "web" | "news" | "images" }>;
  
  // Search categories
  categories?: Array<"github" | "research" | CategoryOption>;
  
  // Result limits
  limit?: number;
  
  // Search parameters
  tbs?: string;
  location?: string;
  ignoreInvalidURLs?: boolean;
  timeout?: number; // milliseconds
  
  // Content scraping
  scrapeOptions?: ScrapeOptions;
  
  // Integration tracking
  integration?: string;
}

interface CategoryOption {
  type: "github" | "research";
}

Search Results

interface SearchData {
  web?: Array<SearchResultWeb | Document>;
  news?: Array<SearchResultNews | Document>;
  images?: Array<SearchResultImages | Document>;
}

// Web search result
interface SearchResultWeb {
  url: string;
  title?: string;
  description?: string;
  category?: string;
}

// News search result  
interface SearchResultNews {
  title?: string;
  url?: string;
  snippet?: string;
  date?: string;
  imageUrl?: string;
  position?: number;
  category?: string;
}

// Image search result
interface SearchResultImages {
  title?: string;
  imageUrl?: string;
  imageWidth?: number;
  imageHeight?: number;
  url?: string;
  position?: number;
}

Usage Examples

Basic Web Search

// Simple web search
const results = await app.search('artificial intelligence trends 2024');

console.log('Web results:', results.web);
// Array of SearchResultWeb objects with url, title, description

Multi-Source Search

// Search across web, news, and images
const results = await app.search('climate change', {
  sources: ['web', 'news', 'images'],
  limit: 20
});

console.log('Web results:', results.web?.length);
console.log('News results:', results.news?.length);
console.log('Image results:', results.images?.length);

Search with Content Scraping

// Search and scrape the content of each result
const results = await app.search('best javascript frameworks 2024', {
  sources: ['web'],
  limit: 10,
  scrapeOptions: {
    formats: ['markdown', 'json'],
    onlyMainContent: true
  }
});

// Results now include full scraped content
for (const result of results.web || []) {
  if ('markdown' in result) {
    console.log('Scraped content:', result.markdown);
  }
}

Structured Data Extraction from Search

import { z } from 'zod';

const ArticleSchema = z.object({
  title: z.string(),
  author: z.string().optional(),
  publishDate: z.string().optional(),
  mainPoints: z.array(z.string()),
  conclusion: z.string().optional()
});

const results = await app.search('machine learning research 2024', {
  sources: ['web'],
  limit: 15,
  scrapeOptions: {
    formats: [{
      type: 'json',
      schema: ArticleSchema
    }],
    onlyMainContent: true
  }
});

// Extract structured data from search results
for (const result of results.web || []) {
  if ('json' in result && result.json) {
    console.log('Structured article data:', result.json);
  }
}

GitHub Code Search

// Search for code repositories and projects
const results = await app.search('react typescript authentication', {
  categories: ['github'],
  limit: 20
});

console.log('GitHub results:', results.web);
// Returns relevant GitHub repositories and code examples

Research-Focused Search

// Search academic and research content
const results = await app.search('quantum computing algorithms', {
  categories: ['research'],
  sources: ['web'],
  limit: 25,
  scrapeOptions: {
    formats: ['markdown'],
    onlyMainContent: true
  }
});

// Get research papers and academic content
for (const result of results.web || []) {
  if ('markdown' in result) {
    console.log(`Research content from ${result.metadata?.sourceURL}:`, 
                result.markdown?.substring(0, 500) + '...');
  }
}

News Search with Date Filtering

// Search recent news with time-based filtering
const results = await app.search('cybersecurity breaches', {
  sources: ['news'],
  tbs: 'qdr:m', // Past month
  limit: 30,
  scrapeOptions: {
    formats: ['markdown', {
      type: 'json',
      schema: {
        type: 'object',
        properties: {
          headline: { type: 'string' },
          summary: { type: 'string' },
          source: { type: 'string' },
          publishDate: { type: 'string' },
          impact: { type: 'string' },
          affectedCompanies: { 
            type: 'array', 
            items: { type: 'string' } 
          }
        }
      }
    }]
  }
});

console.log('Recent cybersecurity news:', results.news);

Location-Based Search

// Search with geographic targeting
const results = await app.search('local tech meetups', {
  sources: ['web'],
  location: 'San Francisco, CA',
  limit: 15,
  scrapeOptions: {
    formats: ['markdown'],
    location: {
      country: 'US',
      languages: ['en']
    }
  }
});

Image Search

// Search for images with metadata
const results = await app.search('sustainable architecture designs', {
  sources: ['images'],
  limit: 50
});

// Process image results
for (const image of results.images || []) {
  console.log({
    title: image.title,
    imageUrl: image.imageUrl,
    dimensions: `${image.imageWidth}x${image.imageHeight}`,
    sourceUrl: image.url
  });
}

Advanced Search Configuration

const results = await app.search('enterprise software security', {
  sources: [
    { type: 'web' },
    { type: 'news' }
  ],
  categories: ['research'],
  limit: 40,
  tbs: 'qdr:y', // Past year
  location: 'United States',
  timeout: 60000, // 60 seconds
  ignoreInvalidURLs: true,
  scrapeOptions: {
    formats: ['markdown', 'links'],
    headers: {
      'User-Agent': 'Research Bot 1.0'
    },
    onlyMainContent: true,
    blockAds: true,
    timeout: 30000
  }
});

Error Handling

try {
  const results = await app.search('complex search query', {
    sources: ['web', 'news'],
    limit: 100,
    scrapeOptions: {
      formats: ['markdown']
    },
    timeout: 30000
  });
  
  // Check for any failed scrapes in results
  for (const result of results.web || []) {
    if ('metadata' in result && result.metadata?.error) {
      console.log(`Failed to scrape ${result.metadata.sourceURL}: ${result.metadata.error}`);
    }
  }
  
} catch (error) {
  console.error('Search failed:', error);
}

Combining Search Sources

// Search across all sources and combine results
const webResults = await app.search('renewable energy innovation', {
  sources: ['web'],
  limit: 20,
  scrapeOptions: { formats: ['markdown'] }
});

const newsResults = await app.search('renewable energy innovation', {
  sources: ['news'],
  limit: 15,
  tbs: 'qdr:m' // Recent news only
});

const imageResults = await app.search('renewable energy innovation', {
  sources: ['images'],
  limit: 10
});

// Process combined results
console.log('Comprehensive search results:', {
  webPages: webResults.web?.length || 0,
  newsArticles: newsResults.news?.length || 0,
  images: imageResults.images?.length || 0
});

Search Result Processing

const results = await app.search('artificial intelligence ethics', {
  sources: ['web', 'news'],
  limit: 30,
  scrapeOptions: {
    formats: ['markdown', 'links']
  }
});

// Process and analyze results
const processedResults = {
  webSites: new Set(),
  totalLinks: 0,
  contentLength: 0
};

for (const result of [...(results.web || []), ...(results.news || [])]) {
  if ('metadata' in result && result.metadata?.sourceURL) {
    const domain = new URL(result.metadata.sourceURL).hostname;
    processedResults.webSites.add(domain);
  }
  
  if ('links' in result && result.links) {
    processedResults.totalLinks += result.links.length;
  }
  
  if ('markdown' in result && result.markdown) {
    processedResults.contentLength += result.markdown.length;
  }
}

console.log('Analysis:', {
  uniqueDomains: processedResults.webSites.size,
  totalLinks: processedResults.totalLinks,
  totalContentLength: processedResults.contentLength
});