JavaScript SDK for Firecrawl API that enables comprehensive web scraping, crawling, and data extraction with AI-ready output formats.
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
Feature-frozen v1 API with additional capabilities including deep research, LLMs.txt generation, and WebSocket monitoring.
// Access v1 client through unified client
const app = new Firecrawl({ apiKey: 'your-api-key' });
const v1Client = app.v1;
// Or instantiate v1 client directly
import { FirecrawlAppV1 } from '@mendable/firecrawl-js';
const v1Client = new FirecrawlAppV1({ apiKey: 'your-api-key' });interface FirecrawlAppConfig {
apiKey?: string | null;
apiUrl?: string | null;
}
class FirecrawlApp {
constructor(config: FirecrawlAppConfig);
}/**
* Scrape a single URL with v1 API
* @param url - URL to scrape
* @param params - V1 scraping parameters
* @returns Promise resolving to scrape response
*/
scrapeUrl<T extends ZodSchema, ActionsSchema extends Action[] | undefined>(
url: string,
params?: ScrapeParams<T, ActionsSchema>
): Promise<ScrapeResponse<infer<T>, ActionsSchema extends Action[] ? ActionsResult : never> | ErrorResponse>;/**
* Search using v1 API
* @param query - Search query
* @param params - V1 search parameters
* @returns Promise resolving to search results
*/
search(query: string, params?: SearchParams): Promise<SearchResponse>;/**
* Crawl URL with v1 API (with polling)
* @param url - URL to crawl
* @param params - V1 crawl parameters
* @param pollInterval - Polling interval in seconds
* @param idempotencyKey - Optional idempotency key
* @returns Promise resolving to crawl status
*/
crawlUrl(
url: string,
params?: CrawlParams,
pollInterval?: number,
idempotencyKey?: string
): Promise<CrawlStatusResponse | ErrorResponse>;
/**
* Start async crawl without polling
* @param url - URL to crawl
* @param params - V1 crawl parameters
* @param idempotencyKey - Optional idempotency key
* @returns Promise resolving to crawl initiation response
*/
asyncCrawlUrl(
url: string,
params?: CrawlParams,
idempotencyKey?: string
): Promise<CrawlResponse | ErrorResponse>;
/**
* Check crawl status
* @param id - Crawl job ID
* @param getAllData - Get all pages of data
* @param nextURL - Next URL for pagination
* @param skip - Skip entries for pagination
* @param limit - Limit entries returned
* @returns Promise resolving to crawl status
*/
checkCrawlStatus(
id?: string,
getAllData?: boolean,
nextURL?: string,
skip?: number,
limit?: number
): Promise<CrawlStatusResponse | ErrorResponse>;
/**
* Cancel crawl job
* @param id - Crawl job ID
* @returns Promise resolving to cancellation response
*/
cancelCrawl(id: string): Promise<ErrorResponse>;
/**
* Get crawl errors
* @param id - Crawl job ID
* @returns Promise resolving to error information
*/
checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
/**
* Crawl with WebSocket monitoring
* @param url - URL to crawl
* @param params - V1 crawl parameters
* @param idempotencyKey - Optional idempotency key
* @returns Promise resolving to CrawlWatcher instance
*/
crawlUrlAndWatch(
url: string,
params?: CrawlParams,
idempotencyKey?: string
): Promise<CrawlWatcher>;/**
* Batch scrape URLs with v1 API (with polling)
* @param urls - URLs to scrape
* @param params - V1 scrape parameters
* @param pollInterval - Polling interval in seconds
* @param idempotencyKey - Optional idempotency key
* @param webhook - Optional webhook configuration
* @param ignoreInvalidURLs - Ignore invalid URLs
* @param maxConcurrency - Maximum concurrent requests
* @returns Promise resolving to batch status
*/
batchScrapeUrls(
urls: string[],
params?: ScrapeParams,
pollInterval?: number,
idempotencyKey?: string,
webhook?: CrawlParams["webhook"],
ignoreInvalidURLs?: boolean,
maxConcurrency?: number
): Promise<BatchScrapeStatusResponse | ErrorResponse>;
/**
* Start async batch scrape without polling
* @param urls - URLs to scrape
* @param params - V1 scrape parameters
* @param idempotencyKey - Optional idempotency key
* @param webhook - Optional webhook configuration
* @param ignoreInvalidURLs - Ignore invalid URLs
* @returns Promise resolving to batch initiation response
*/
asyncBatchScrapeUrls(
urls: string[],
params?: ScrapeParams,
idempotencyKey?: string,
webhook?: CrawlParams["webhook"],
ignoreInvalidURLs?: boolean
): Promise<BatchScrapeResponse | ErrorResponse>;
/**
* Check batch scrape status
* @param id - Batch job ID
* @param getAllData - Get all pages of data
* @param nextURL - Next URL for pagination
* @param skip - Skip entries for pagination
* @param limit - Limit entries returned
* @returns Promise resolving to batch status
*/
checkBatchScrapeStatus(
id?: string,
getAllData?: boolean,
nextURL?: string,
skip?: number,
limit?: number
): Promise<BatchScrapeStatusResponse | ErrorResponse>;
/**
* Get batch scrape errors
* @param id - Batch job ID
* @returns Promise resolving to error information
*/
checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
/**
* Batch scrape with WebSocket monitoring
* @param urls - URLs to scrape
* @param params - V1 scrape parameters
* @param idempotencyKey - Optional idempotency key
* @param webhook - Optional webhook configuration
* @param ignoreInvalidURLs - Ignore invalid URLs
* @returns Promise resolving to CrawlWatcher instance
*/
batchScrapeUrlsAndWatch(
urls: string[],
params?: ScrapeParams,
idempotencyKey?: string,
webhook?: CrawlParams["webhook"],
ignoreInvalidURLs?: boolean
): Promise<CrawlWatcher>;/**
* Extract data with v1 API (with polling)
* @param urls - URLs to extract from
* @param params - V1 extraction parameters
* @returns Promise resolving to extraction results
*/
extract<T extends ZodSchema>(
urls?: string[],
params?: ExtractParams<T>
): Promise<ExtractResponse<infer<T>> | ErrorResponse>;
/**
* Start async extraction without polling
* @param urls - URLs to extract from
* @param params - V1 extraction parameters
* @param idempotencyKey - Optional idempotency key
* @returns Promise resolving to extraction initiation response
*/
asyncExtract(
urls: string[],
params?: ExtractParams,
idempotencyKey?: string
): Promise<ExtractResponse | ErrorResponse>;
/**
* Get extraction status
* @param jobId - Extraction job ID
* @returns Promise resolving to extraction status
*/
getExtractStatus(jobId: string): Promise<any>;/**
* Map URLs with v1 API
* @param url - URL to map
* @param params - V1 mapping parameters
* @returns Promise resolving to mapping results
*/
mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse>;/**
* Perform deep research with v1 API (with polling)
* @param query - Research query
* @param params - Deep research parameters
* @param onActivity - Activity callback
* @param onSource - Source callback
* @returns Promise resolving to research results
*/
deepResearch(
query: string,
params: DeepResearchParams<ZodSchema>,
onActivity?: (activity: {
type: string;
status: string;
message: string;
timestamp: string;
depth: number;
}) => void,
onSource?: (source: {
url: string;
title?: string;
description?: string;
icon?: string;
}) => void
): Promise<DeepResearchStatusResponse | ErrorResponse>;
/**
* Start async deep research without polling
* @param query - Research query
* @param params - Deep research parameters
* @returns Promise resolving to research initiation response
*/
asyncDeepResearch(
query: string,
params: DeepResearchParams<ZodSchema>
): Promise<DeepResearchResponse | ErrorResponse>;
/**
* Check deep research status
* @param id - Research job ID
* @returns Promise resolving to research status
*/
checkDeepResearchStatus(id: string): Promise<DeepResearchStatusResponse | ErrorResponse>;/**
* Generate LLMs.txt with v1 API (with polling)
* @param url - URL to generate LLMs.txt from
* @param params - Generation parameters
* @returns Promise resolving to generation results
*/
generateLLMsText(
url: string,
params?: GenerateLLMsTextParams
): Promise<GenerateLLMsTextStatusResponse | ErrorResponse>;
/**
* Start async LLMs.txt generation without polling
* @param url - URL to generate LLMs.txt from
* @param params - Generation parameters
* @returns Promise resolving to generation initiation response
*/
asyncGenerateLLMsText(
url: string,
params?: GenerateLLMsTextParams
): Promise<GenerateLLMsTextResponse | ErrorResponse>;
/**
* Check LLMs.txt generation status
* @param id - Generation job ID
* @returns Promise resolving to generation status
*/
checkGenerateLLMsTextStatus(id: string): Promise<GenerateLLMsTextStatusResponse | ErrorResponse>;// V1 scrape parameters
interface ScrapeParams<LLMSchema extends ZodSchema, ActionsSchema extends Action[] | undefined> {
formats?: string[];
headers?: Record<string, string>;
includeTags?: string[];
excludeTags?: string[];
onlyMainContent?: boolean;
waitFor?: number;
timeout?: number;
location?: LocationConfig;
mobile?: boolean;
skipTlsVerification?: boolean;
removeBase64Images?: boolean;
blockAds?: boolean;
proxy?: "basic" | "stealth" | "auto";
storeInCache?: boolean;
maxAge?: number;
parsePDF?: boolean;
extract?: {
prompt?: string;
schema?: LLMSchema;
systemPrompt?: string;
};
jsonOptions?: {
prompt?: string;
schema?: LLMSchema;
systemPrompt?: string;
};
changeTrackingOptions?: {
prompt?: string;
schema?: any;
modes?: ("json" | "git-diff")[];
tag?: string | null;
};
actions?: ActionsSchema;
agent?: AgentOptions;
zeroDataRetention?: boolean;
}
// V1 crawl parameters
interface CrawlParams {
includePaths?: string[];
excludePaths?: string[];
maxDepth?: number;
maxDiscoveryDepth?: number;
limit?: number;
allowBackwardLinks?: boolean;
crawlEntireDomain?: boolean;
allowExternalLinks?: boolean;
ignoreSitemap?: boolean;
scrapeOptions?: CrawlScrapeOptions;
webhook?: string | {
url: string;
headers?: Record<string, string>;
metadata?: Record<string, string>;
events?: ("completed" | "failed" | "page" | "started")[];
};
deduplicateSimilarURLs?: boolean;
ignoreQueryParameters?: boolean;
regexOnFullURL?: boolean;
delay?: number;
allowSubdomains?: boolean;
maxConcurrency?: number;
zeroDataRetention?: boolean;
}
// Deep research parameters
interface DeepResearchParams<LLMSchema extends ZodSchema> {
maxDepth?: number; // 1-10, default 7
timeLimit?: number; // 30-300 seconds, default 270
maxUrls?: number; // 1-1000, default 20
analysisPrompt?: string;
systemPrompt?: string;
formats?: ("markdown" | "json")[];
jsonOptions?: {
prompt?: string;
schema?: LLMSchema;
systemPrompt?: string;
};
}
// LLMs.txt generation parameters
interface GenerateLLMsTextParams {
maxUrls?: number; // 1-100, default 10
showFullText?: boolean; // default false
cache?: boolean; // default true
__experimental_stream?: boolean;
}/**
* V1 WebSocket-based job watcher
*/
class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
constructor(id: string, app: FirecrawlApp);
// Event listeners
on(event: 'document', listener: (e: CustomEvent<FirecrawlDocument>) => void): void;
on(event: 'done', listener: (e: CustomEvent<{
status: string;
data: FirecrawlDocument[];
}>) => void): void;
on(event: 'error', listener: (e: CustomEvent<{
status: string;
data: FirecrawlDocument[];
error: string;
}>) => void): void;
close(): void;
}const v1Client = app.v1;
// Perform comprehensive research with real-time updates
const researchResult = await v1Client.deepResearch(
'artificial intelligence safety research 2024',
{
maxDepth: 5,
timeLimit: 180, // 3 minutes
maxUrls: 30,
analysisPrompt: 'Analyze the current state of AI safety research, key developments, and future challenges',
formats: ['markdown', 'json'],
jsonOptions: {
schema: {
type: 'object',
properties: {
keyFindings: { type: 'array', items: { type: 'string' } },
researchers: { type: 'array', items: { type: 'string' } },
developments: { type: 'array', items: { type: 'object' } },
challenges: { type: 'array', items: { type: 'string' } },
recommendations: { type: 'array', items: { type: 'string' } }
}
}
}
},
// Activity callback
(activity) => {
console.log(`Research activity: ${activity.message} (depth: ${activity.depth})`);
},
// Source callback
(source) => {
console.log(`Found source: ${source.title} - ${source.url}`);
}
);
console.log('Research completed:', researchResult.data);const v1Client = app.v1;
// Generate LLMs.txt for a website
const llmsResult = await v1Client.generateLLMsText('https://docs.example.com', {
maxUrls: 50,
showFullText: true,
cache: true
});
console.log('LLMs.txt generated:');
console.log(llmsResult.data.llmstxt);
if (llmsResult.data.llmsfulltxt) {
console.log('\nFull text version available');
}const v1Client = app.v1;
// Scrape with browser automation
const scrapeResult = await v1Client.scrapeUrl('https://app.example.com', {
formats: ['markdown', 'json'],
actions: [
{ type: 'wait', selector: '#content' },
{ type: 'click', selector: '.load-more-button' },
{ type: 'wait', milliseconds: 3000 },
{ type: 'screenshot', fullPage: true },
{ type: 'scrape' }
],
jsonOptions: {
prompt: 'Extract product information including names, prices, and descriptions',
schema: {
type: 'object',
properties: {
products: {
type: 'array',
items: {
type: 'object',
properties: {
name: { type: 'string' },
price: { type: 'number' },
description: { type: 'string' }
}
}
}
}
}
}
});
console.log('Scraped data:', scrapeResult.data);
console.log('Actions performed:', scrapeResult.actions);const v1Client = app.v1;
// Start crawl with WebSocket monitoring
const watcher = await v1Client.crawlUrlAndWatch('https://example.com', {
limit: 100,
scrapeOptions: {
formats: ['markdown'],
onlyMainContent: true
}
});
// Listen for real-time updates
watcher.addEventListener('document', (event) => {
const document = event.detail;
console.log(`New document: ${document.url}`);
});
watcher.addEventListener('done', (event) => {
const { status, data } = event.detail;
console.log(`Crawl ${status}! Got ${data.length} documents`);
watcher.close();
});
watcher.addEventListener('error', (event) => {
const { error } = event.detail;
console.error(`Crawl error: ${error}`);
watcher.close();
});const v1Client = app.v1;
// Get v1 usage information (different format from v2)
const [creditUsage, tokenUsage, queueStatus] = await Promise.all([
v1Client.getCreditUsage(),
v1Client.getTokenUsage(),
v1Client.getQueueStatus()
]);
console.log('V1 Credit Usage:', {
remaining: creditUsage.data.remaining_credits,
plan: creditUsage.data.plan_credits,
billingStart: creditUsage.data.billing_period_start,
billingEnd: creditUsage.data.billing_period_end
});
console.log('V1 Token Usage:', {
remaining: tokenUsage.data.remaining_tokens,
plan: tokenUsage.data.plan_tokens
});
console.log('Queue Status:', queueStatus);When migrating from v1 to v2 API, note these key differences:
For maximum compatibility, use the unified Firecrawl client which provides both APIs.