tessl/npm-mendable--firecrawl-js

JavaScript SDK for Firecrawl API that enables comprehensive web scraping, crawling, and data extraction with AI-ready output formats.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Securityby

Pending

The risk profile of this skill

Overview

Eval results

Files

Legacy V1 API

Name: tessl/npm-mendable--firecrawl-js
Author: tessl

Feature-frozen v1 API with additional capabilities including deep research, LLMs.txt generation, and WebSocket monitoring.

Accessing V1 API

// Access v1 client through unified client
const app = new Firecrawl({ apiKey: 'your-api-key' });
const v1Client = app.v1;

// Or instantiate v1 client directly
import { FirecrawlAppV1 } from '@mendable/firecrawl-js';
const v1Client = new FirecrawlAppV1({ apiKey: 'your-api-key' });

V1 Client Configuration

interface FirecrawlAppConfig {
  apiKey?: string | null;
  apiUrl?: string | null;
}

class FirecrawlApp {
  constructor(config: FirecrawlAppConfig);
}

Core V1 Methods

Scraping

/**
 * Scrape a single URL with v1 API
 * @param url - URL to scrape
 * @param params - V1 scraping parameters
 * @returns Promise resolving to scrape response
 */
scrapeUrl<T extends ZodSchema, ActionsSchema extends Action[] | undefined>(
  url: string,
  params?: ScrapeParams<T, ActionsSchema>
): Promise<ScrapeResponse<infer<T>, ActionsSchema extends Action[] ? ActionsResult : never> | ErrorResponse>;

Search

/**
 * Search using v1 API
 * @param query - Search query
 * @param params - V1 search parameters
 * @returns Promise resolving to search results
 */
search(query: string, params?: SearchParams): Promise<SearchResponse>;

Crawling

/**
 * Crawl URL with v1 API (with polling)
 * @param url - URL to crawl
 * @param params - V1 crawl parameters
 * @param pollInterval - Polling interval in seconds
 * @param idempotencyKey - Optional idempotency key
 * @returns Promise resolving to crawl status
 */
crawlUrl(
  url: string,
  params?: CrawlParams,
  pollInterval?: number,
  idempotencyKey?: string
): Promise<CrawlStatusResponse | ErrorResponse>;

/**
 * Start async crawl without polling
 * @param url - URL to crawl
 * @param params - V1 crawl parameters
 * @param idempotencyKey - Optional idempotency key
 * @returns Promise resolving to crawl initiation response
 */
asyncCrawlUrl(
  url: string,
  params?: CrawlParams,
  idempotencyKey?: string
): Promise<CrawlResponse | ErrorResponse>;

/**
 * Check crawl status
 * @param id - Crawl job ID
 * @param getAllData - Get all pages of data
 * @param nextURL - Next URL for pagination
 * @param skip - Skip entries for pagination
 * @param limit - Limit entries returned
 * @returns Promise resolving to crawl status
 */
checkCrawlStatus(
  id?: string,
  getAllData?: boolean,
  nextURL?: string,
  skip?: number,
  limit?: number
): Promise<CrawlStatusResponse | ErrorResponse>;

/**
 * Cancel crawl job
 * @param id - Crawl job ID
 * @returns Promise resolving to cancellation response
 */
cancelCrawl(id: string): Promise<ErrorResponse>;

/**
 * Get crawl errors
 * @param id - Crawl job ID
 * @returns Promise resolving to error information
 */
checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;

/**
 * Crawl with WebSocket monitoring
 * @param url - URL to crawl
 * @param params - V1 crawl parameters
 * @param idempotencyKey - Optional idempotency key
 * @returns Promise resolving to CrawlWatcher instance
 */
crawlUrlAndWatch(
  url: string,
  params?: CrawlParams,
  idempotencyKey?: string
): Promise<CrawlWatcher>;

Batch Operations

/**
 * Batch scrape URLs with v1 API (with polling)
 * @param urls - URLs to scrape
 * @param params - V1 scrape parameters
 * @param pollInterval - Polling interval in seconds
 * @param idempotencyKey - Optional idempotency key
 * @param webhook - Optional webhook configuration
 * @param ignoreInvalidURLs - Ignore invalid URLs
 * @param maxConcurrency - Maximum concurrent requests
 * @returns Promise resolving to batch status
 */
batchScrapeUrls(
  urls: string[],
  params?: ScrapeParams,
  pollInterval?: number,
  idempotencyKey?: string,
  webhook?: CrawlParams["webhook"],
  ignoreInvalidURLs?: boolean,
  maxConcurrency?: number
): Promise<BatchScrapeStatusResponse | ErrorResponse>;

/**
 * Start async batch scrape without polling
 * @param urls - URLs to scrape
 * @param params - V1 scrape parameters
 * @param idempotencyKey - Optional idempotency key
 * @param webhook - Optional webhook configuration
 * @param ignoreInvalidURLs - Ignore invalid URLs
 * @returns Promise resolving to batch initiation response
 */
asyncBatchScrapeUrls(
  urls: string[],
  params?: ScrapeParams,
  idempotencyKey?: string,
  webhook?: CrawlParams["webhook"],
  ignoreInvalidURLs?: boolean
): Promise<BatchScrapeResponse | ErrorResponse>;

/**
 * Check batch scrape status
 * @param id - Batch job ID
 * @param getAllData - Get all pages of data
 * @param nextURL - Next URL for pagination
 * @param skip - Skip entries for pagination
 * @param limit - Limit entries returned
 * @returns Promise resolving to batch status
 */
checkBatchScrapeStatus(
  id?: string,
  getAllData?: boolean,
  nextURL?: string,
  skip?: number,
  limit?: number
): Promise<BatchScrapeStatusResponse | ErrorResponse>;

/**
 * Get batch scrape errors
 * @param id - Batch job ID
 * @returns Promise resolving to error information
 */
checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;

/**
 * Batch scrape with WebSocket monitoring
 * @param urls - URLs to scrape
 * @param params - V1 scrape parameters
 * @param idempotencyKey - Optional idempotency key
 * @param webhook - Optional webhook configuration
 * @param ignoreInvalidURLs - Ignore invalid URLs
 * @returns Promise resolving to CrawlWatcher instance
 */
batchScrapeUrlsAndWatch(
  urls: string[],
  params?: ScrapeParams,
  idempotencyKey?: string,
  webhook?: CrawlParams["webhook"],
  ignoreInvalidURLs?: boolean
): Promise<CrawlWatcher>;

Data Extraction

/**
 * Extract data with v1 API (with polling)
 * @param urls - URLs to extract from
 * @param params - V1 extraction parameters
 * @returns Promise resolving to extraction results
 */
extract<T extends ZodSchema>(
  urls?: string[],
  params?: ExtractParams<T>
): Promise<ExtractResponse<infer<T>> | ErrorResponse>;

/**
 * Start async extraction without polling
 * @param urls - URLs to extract from
 * @param params - V1 extraction parameters
 * @param idempotencyKey - Optional idempotency key
 * @returns Promise resolving to extraction initiation response
 */
asyncExtract(
  urls: string[],
  params?: ExtractParams,
  idempotencyKey?: string
): Promise<ExtractResponse | ErrorResponse>;

/**
 * Get extraction status
 * @param jobId - Extraction job ID
 * @returns Promise resolving to extraction status
 */
getExtractStatus(jobId: string): Promise<any>;

Site Mapping

/**
 * Map URLs with v1 API
 * @param url - URL to map
 * @param params - V1 mapping parameters
 * @returns Promise resolving to mapping results
 */
mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse>;

V1 Exclusive Features

Deep Research

/**
 * Perform deep research with v1 API (with polling)
 * @param query - Research query
 * @param params - Deep research parameters
 * @param onActivity - Activity callback
 * @param onSource - Source callback
 * @returns Promise resolving to research results
 */
deepResearch(
  query: string,
  params: DeepResearchParams<ZodSchema>,
  onActivity?: (activity: {
    type: string;
    status: string;
    message: string;
    timestamp: string;
    depth: number;
  }) => void,
  onSource?: (source: {
    url: string;
    title?: string;
    description?: string;
    icon?: string;
  }) => void
): Promise<DeepResearchStatusResponse | ErrorResponse>;

/**
 * Start async deep research without polling
 * @param query - Research query
 * @param params - Deep research parameters
 * @returns Promise resolving to research initiation response
 */
asyncDeepResearch(
  query: string,
  params: DeepResearchParams<ZodSchema>
): Promise<DeepResearchResponse | ErrorResponse>;

/**
 * Check deep research status
 * @param id - Research job ID
 * @returns Promise resolving to research status
 */
checkDeepResearchStatus(id: string): Promise<DeepResearchStatusResponse | ErrorResponse>;

LLMs.txt Generation

/**
 * Generate LLMs.txt with v1 API (with polling)
 * @param url - URL to generate LLMs.txt from
 * @param params - Generation parameters
 * @returns Promise resolving to generation results
 */
generateLLMsText(
  url: string,
  params?: GenerateLLMsTextParams
): Promise<GenerateLLMsTextStatusResponse | ErrorResponse>;

/**
 * Start async LLMs.txt generation without polling
 * @param url - URL to generate LLMs.txt from
 * @param params - Generation parameters
 * @returns Promise resolving to generation initiation response
 */
asyncGenerateLLMsText(
  url: string,
  params?: GenerateLLMsTextParams
): Promise<GenerateLLMsTextResponse | ErrorResponse>;

/**
 * Check LLMs.txt generation status
 * @param id - Generation job ID
 * @returns Promise resolving to generation status
 */
checkGenerateLLMsTextStatus(id: string): Promise<GenerateLLMsTextStatusResponse | ErrorResponse>;

V1 Configuration Types

// V1 scrape parameters
interface ScrapeParams<LLMSchema extends ZodSchema, ActionsSchema extends Action[] | undefined> {
  formats?: string[];
  headers?: Record<string, string>;
  includeTags?: string[];
  excludeTags?: string[];
  onlyMainContent?: boolean;
  waitFor?: number;
  timeout?: number;
  location?: LocationConfig;
  mobile?: boolean;
  skipTlsVerification?: boolean;
  removeBase64Images?: boolean;
  blockAds?: boolean;
  proxy?: "basic" | "stealth" | "auto";
  storeInCache?: boolean;
  maxAge?: number;
  parsePDF?: boolean;
  extract?: {
    prompt?: string;
    schema?: LLMSchema;
    systemPrompt?: string;
  };
  jsonOptions?: {
    prompt?: string;
    schema?: LLMSchema;
    systemPrompt?: string;
  };
  changeTrackingOptions?: {
    prompt?: string;
    schema?: any;
    modes?: ("json" | "git-diff")[];
    tag?: string | null;
  };
  actions?: ActionsSchema;
  agent?: AgentOptions;
  zeroDataRetention?: boolean;
}

// V1 crawl parameters
interface CrawlParams {
  includePaths?: string[];
  excludePaths?: string[];
  maxDepth?: number;
  maxDiscoveryDepth?: number;
  limit?: number;
  allowBackwardLinks?: boolean;
  crawlEntireDomain?: boolean;
  allowExternalLinks?: boolean;
  ignoreSitemap?: boolean;
  scrapeOptions?: CrawlScrapeOptions;
  webhook?: string | {
    url: string;
    headers?: Record<string, string>;
    metadata?: Record<string, string>;
    events?: ("completed" | "failed" | "page" | "started")[];
  };
  deduplicateSimilarURLs?: boolean;
  ignoreQueryParameters?: boolean;
  regexOnFullURL?: boolean;
  delay?: number;
  allowSubdomains?: boolean;
  maxConcurrency?: number;
  zeroDataRetention?: boolean;
}

// Deep research parameters
interface DeepResearchParams<LLMSchema extends ZodSchema> {
  maxDepth?: number; // 1-10, default 7
  timeLimit?: number; // 30-300 seconds, default 270
  maxUrls?: number; // 1-1000, default 20
  analysisPrompt?: string;
  systemPrompt?: string;
  formats?: ("markdown" | "json")[];
  jsonOptions?: {
    prompt?: string;
    schema?: LLMSchema;
    systemPrompt?: string;
  };
}

// LLMs.txt generation parameters
interface GenerateLLMsTextParams {
  maxUrls?: number; // 1-100, default 10
  showFullText?: boolean; // default false
  cache?: boolean; // default true
  __experimental_stream?: boolean;
}

V1 WebSocket Monitoring

/**
 * V1 WebSocket-based job watcher
 */
class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
  constructor(id: string, app: FirecrawlApp);
  
  // Event listeners
  on(event: 'document', listener: (e: CustomEvent<FirecrawlDocument>) => void): void;
  on(event: 'done', listener: (e: CustomEvent<{
    status: string;
    data: FirecrawlDocument[];
  }>) => void): void;
  on(event: 'error', listener: (e: CustomEvent<{
    status: string;
    data: FirecrawlDocument[];
    error: string;
  }>) => void): void;
  
  close(): void;
}

Usage Examples

V1 Deep Research

const v1Client = app.v1;

// Perform comprehensive research with real-time updates
const researchResult = await v1Client.deepResearch(
  'artificial intelligence safety research 2024',
  {
    maxDepth: 5,
    timeLimit: 180, // 3 minutes
    maxUrls: 30,
    analysisPrompt: 'Analyze the current state of AI safety research, key developments, and future challenges',
    formats: ['markdown', 'json'],
    jsonOptions: {
      schema: {
        type: 'object',
        properties: {
          keyFindings: { type: 'array', items: { type: 'string' } },
          researchers: { type: 'array', items: { type: 'string' } },
          developments: { type: 'array', items: { type: 'object' } },
          challenges: { type: 'array', items: { type: 'string' } },
          recommendations: { type: 'array', items: { type: 'string' } }
        }
      }
    }
  },
  // Activity callback
  (activity) => {
    console.log(`Research activity: ${activity.message} (depth: ${activity.depth})`);
  },
  // Source callback
  (source) => {
    console.log(`Found source: ${source.title} - ${source.url}`);
  }
);

console.log('Research completed:', researchResult.data);

V1 LLMs.txt Generation

const v1Client = app.v1;

// Generate LLMs.txt for a website
const llmsResult = await v1Client.generateLLMsText('https://docs.example.com', {
  maxUrls: 50,
  showFullText: true,
  cache: true
});

console.log('LLMs.txt generated:');
console.log(llmsResult.data.llmstxt);

if (llmsResult.data.llmsfulltxt) {
  console.log('\nFull text version available');
}

V1 Advanced Scraping with Actions

const v1Client = app.v1;

// Scrape with browser automation
const scrapeResult = await v1Client.scrapeUrl('https://app.example.com', {
  formats: ['markdown', 'json'],
  actions: [
    { type: 'wait', selector: '#content' },
    { type: 'click', selector: '.load-more-button' },
    { type: 'wait', milliseconds: 3000 },
    { type: 'screenshot', fullPage: true },
    { type: 'scrape' }
  ],
  jsonOptions: {
    prompt: 'Extract product information including names, prices, and descriptions',
    schema: {
      type: 'object',
      properties: {
        products: {
          type: 'array',
          items: {
            type: 'object',
            properties: {
              name: { type: 'string' },
              price: { type: 'number' },
              description: { type: 'string' }
            }
          }
        }
      }
    }
  }
});

console.log('Scraped data:', scrapeResult.data);
console.log('Actions performed:', scrapeResult.actions);

V1 Crawl with WebSocket Monitoring

const v1Client = app.v1;

// Start crawl with WebSocket monitoring
const watcher = await v1Client.crawlUrlAndWatch('https://example.com', {
  limit: 100,
  scrapeOptions: {
    formats: ['markdown'],
    onlyMainContent: true
  }
});

// Listen for real-time updates
watcher.addEventListener('document', (event) => {
  const document = event.detail;
  console.log(`New document: ${document.url}`);
});

watcher.addEventListener('done', (event) => {
  const { status, data } = event.detail;
  console.log(`Crawl ${status}! Got ${data.length} documents`);
  watcher.close();
});

watcher.addEventListener('error', (event) => {
  const { error } = event.detail;
  console.error(`Crawl error: ${error}`);
  watcher.close();
});

V1 Usage Analytics

const v1Client = app.v1;

// Get v1 usage information (different format from v2)
const [creditUsage, tokenUsage, queueStatus] = await Promise.all([
  v1Client.getCreditUsage(),
  v1Client.getTokenUsage(),
  v1Client.getQueueStatus()
]);

console.log('V1 Credit Usage:', {
  remaining: creditUsage.data.remaining_credits,
  plan: creditUsage.data.plan_credits,
  billingStart: creditUsage.data.billing_period_start,
  billingEnd: creditUsage.data.billing_period_end
});

console.log('V1 Token Usage:', {
  remaining: tokenUsage.data.remaining_tokens,
  plan: tokenUsage.data.plan_tokens
});

console.log('Queue Status:', queueStatus);