JavaScript SDK for Firecrawl API that enables comprehensive web scraping, crawling, and data extraction with AI-ready output formats.
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
Concurrent processing of multiple URLs with job monitoring, error handling, and webhook support.
/**
* Start a batch scrape job for multiple URLs
* @param urls - Array of URLs to scrape
* @param opts - Batch scraping options
* @returns Promise resolving to job ID and invalid URLs
*/
startBatchScrape(urls: string[], opts?: BatchScrapeOptions): Promise<BatchScrapeResponse>;
/**
* Get batch scrape job status and partial data
* @param jobId - Batch job identifier
* @param pagination - Pagination configuration for results
* @returns Promise resolving to job status and data
*/
getBatchScrapeStatus(jobId: string, pagination?: PaginationConfig): Promise<BatchScrapeJob>;
/**
* Retrieve batch scrape errors and robots.txt blocks
* @param jobId - Batch job identifier
* @returns Promise resolving to error details
*/
getBatchScrapeErrors(jobId: string): Promise<CrawlErrorsResponse>;
/**
* Cancel a running batch scrape job
* @param jobId - Batch job identifier
* @returns Promise resolving to true if cancelled
*/
cancelBatchScrape(jobId: string): Promise<boolean>;
/**
* Convenience waiter: start batch scrape and poll until completion
* @param urls - Array of URLs to scrape
* @param opts - Batch options plus waiter controls
* @returns Promise resolving to final job snapshot
*/
batchScrape(urls: string[], opts?: BatchScrapeOptions & { pollInterval?: number; timeout?: number }): Promise<BatchScrapeJob>;interface BatchScrapeOptions {
// Scraping configuration for all URLs
options?: ScrapeOptions;
// Webhook notifications
webhook?: string | WebhookConfig;
// Job management
appendToId?: string;
ignoreInvalidURLs?: boolean;
maxConcurrency?: number;
// Privacy
zeroDataRetention?: boolean;
// Idempotency
idempotencyKey?: string;
// Integration tracking
integration?: string;
}// Batch job initiation response
interface BatchScrapeResponse {
id: string;
url: string;
invalidURLs?: string[];
}
// Batch job status and data
interface BatchScrapeJob {
status: "scraping" | "completed" | "failed" | "cancelled";
completed: number;
total: number;
creditsUsed?: number;
expiresAt?: string;
next?: string | null;
data: Document[];
}const urls = [
'https://example.com/page1',
'https://example.com/page2',
'https://example.com/page3',
'https://example.com/page4'
];
const batchJob = await app.batchScrape(urls, {
options: {
formats: ['markdown', 'links']
},
maxConcurrency: 3
});
console.log(`Scraped ${batchJob.completed} of ${batchJob.total} URLs`);
console.log(batchJob.data); // Array of scraped documentsconst urls = [
'https://news.example.com/article1',
'https://news.example.com/article2',
'https://news.example.com/article3'
];
// Start batch job
const batchResponse = await app.startBatchScrape(urls, {
options: {
formats: ['markdown', {
type: 'json',
schema: {
type: 'object',
properties: {
title: { type: 'string' },
author: { type: 'string' },
publishDate: { type: 'string' },
content: { type: 'string' }
}
}
}]
},
maxConcurrency: 2
});
console.log(`Started batch job: ${batchResponse.id}`);
if (batchResponse.invalidURLs?.length) {
console.log('Invalid URLs detected:', batchResponse.invalidURLs);
}
// Monitor progress
let job: BatchScrapeJob;
do {
await new Promise(resolve => setTimeout(resolve, 3000)); // Wait 3 seconds
job = await app.getBatchScrapeStatus(batchResponse.id);
console.log(`Progress: ${job.completed}/${job.total} - Status: ${job.status}`);
} while (job.status === 'scraping');
console.log('Batch completed!', job.data.length, 'documents scraped');import { z } from 'zod';
const ProductSchema = z.object({
name: z.string(),
price: z.number(),
description: z.string(),
availability: z.enum(['in-stock', 'out-of-stock', 'pre-order']),
images: z.array(z.string()),
rating: z.number().optional()
});
const productUrls = [
'https://shop.example.com/products/laptop-1',
'https://shop.example.com/products/laptop-2',
'https://shop.example.com/products/laptop-3'
];
const batchJob = await app.batchScrape(productUrls, {
options: {
formats: [{
type: 'json',
schema: ProductSchema
}],
onlyMainContent: true
},
maxConcurrency: 2
});
// Each document.json will be typed as ProductSchema
for (const doc of batchJob.data) {
console.log('Product:', doc.json); // Typed product data
}const urls = [
'https://api.example.com/data1',
'https://api.example.com/data2',
'https://api.example.com/data3'
];
const batchJob = await app.batchScrape(urls, {
options: {
formats: ['json'],
headers: {
'Authorization': 'Bearer api-token'
}
},
webhook: {
url: 'https://myapp.com/webhooks/batch-complete',
headers: {
'X-API-Key': 'webhook-secret'
},
metadata: {
'batchType': 'api-data-sync',
'userId': '12345'
},
events: ['completed', 'failed']
},
maxConcurrency: 5
});const urls = [
'https://example.com/valid-page',
'https://invalid-domain-xyz.com/page',
'https://example.com/another-valid-page',
'not-a-valid-url'
];
const batchResponse = await app.startBatchScrape(urls, {
options: {
formats: ['markdown']
},
ignoreInvalidURLs: true // Continue processing despite invalid URLs
});
// Check which URLs were invalid
if (batchResponse.invalidURLs?.length) {
console.log('Invalid URLs that were skipped:', batchResponse.invalidURLs);
}
// Monitor and handle errors
const job = await app.getBatchScrapeStatus(batchResponse.id);
if (job.status === 'completed') {
// Get detailed error information
const errors = await app.getBatchScrapeErrors(batchResponse.id);
if (errors.errors.length > 0) {
console.log('Scraping errors:');
errors.errors.forEach(error => {
console.log(`- ${error.url}: ${error.error} (${error.code})`);
});
}
if (errors.robotsBlocked.length > 0) {
console.log('URLs blocked by robots.txt:', errors.robotsBlocked);
}
}const urls = [
'https://example.com/data1',
'https://example.com/data2'
];
// First request with idempotency key
const batchJob1 = await app.startBatchScrape(urls, {
options: { formats: ['markdown'] },
idempotencyKey: 'batch-operation-123'
});
// Duplicate request with same key - will return existing job
const batchJob2 = await app.startBatchScrape(urls, {
options: { formats: ['markdown'] },
idempotencyKey: 'batch-operation-123'
});
console.log(batchJob1.id === batchJob2.id); // trueconst urls = Array.from({ length: 100 }, (_, i) =>
`https://api.example.com/items/${i + 1}`
);
const batchJob = await app.batchScrape(urls, {
options: {
formats: ['json', 'markdown'],
headers: {
'User-Agent': 'MyBot/1.0',
'Accept': 'application/json'
},
timeout: 30000,
mobile: false,
fastMode: true,
proxy: 'basic'
},
maxConcurrency: 10,
zeroDataRetention: true,
ignoreInvalidURLs: true,
webhook: 'https://myapp.com/batch-webhook'
});const urls = Array.from({ length: 1000 }, (_, i) =>
`https://catalog.example.com/item/${i + 1}`
);
// Start large batch job
const batchResponse = await app.startBatchScrape(urls, {
options: { formats: ['markdown'] },
maxConcurrency: 20
});
// Get results with pagination
const job = await app.getBatchScrapeStatus(batchResponse.id, {
autoPaginate: true,
maxPages: 50,
maxResults: 5000,
maxWaitTime: 600 // 10 minutes
});
console.log(`Retrieved ${job.data.length} documents`);// Start a large batch job
const batchResponse = await app.startBatchScrape(urls, {
options: { formats: ['markdown'] }
});
// Cancel if needed
setTimeout(async () => {
const cancelled = await app.cancelBatchScrape(batchResponse.id);
if (cancelled) {
console.log('Batch job cancelled successfully');
}
}, 30000); // Cancel after 30 seconds