JavaScript SDK for Firecrawl API that enables comprehensive web scraping, crawling, and data extraction with AI-ready output formats.
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
WebSocket-based job monitoring with automatic fallback to polling for long-running crawl and batch operations.
/**
* Create a watcher for a crawl or batch job
* @param jobId - Job identifier to monitor
* @param opts - Watcher configuration options
* @returns Watcher instance for real-time updates
*/
watcher(jobId: string, opts?: WatcherOptions): Watcher;interface WatcherOptions {
// Job type to monitor
kind?: "crawl" | "batch";
// Polling interval in seconds (fallback mode)
pollInterval?: number;
// Total timeout in seconds
timeout?: number;
}/**
* EventEmitter-based watcher for real-time job monitoring
* Automatically handles WebSocket connection with polling fallback
*/
class Watcher extends EventEmitter {
constructor(http: HttpClient, jobId: string, opts?: WatcherOptions);
/**
* Start monitoring the job
* @returns Promise that resolves when connection is established
*/
start(): Promise<void>;
/**
* Stop monitoring and close connections
*/
close(): void;
// Event methods inherited from EventEmitter
on(event: 'document', listener: (document: Document & { id: string }) => void): this;
on(event: 'snapshot', listener: (snapshot: CrawlJob | BatchScrapeJob) => void): this;
on(event: 'done', listener: (result: JobCompletionEvent) => void): this;
on(event: 'error', listener: (error: JobErrorEvent) => void): this;
emit(event: 'document', document: Document & { id: string }): boolean;
emit(event: 'snapshot', snapshot: CrawlJob | BatchScrapeJob): boolean;
emit(event: 'done', result: JobCompletionEvent): boolean;
emit(event: 'error', error: JobErrorEvent): boolean;
}// Job completion event
interface JobCompletionEvent {
status: "completed" | "failed" | "cancelled";
data: Document[];
id: string;
}
// Job error event
interface JobErrorEvent {
status: "failed";
data: Document[];
error: string;
id: string;
}
// Job status snapshot (CrawlJob or BatchScrapeJob)
type JobSnapshot = CrawlJob | BatchScrapeJob;// Start a crawl job
const crawlResponse = await app.startCrawl('https://example.com', {
limit: 100,
scrapeOptions: { formats: ['markdown'] }
});
// Create watcher for real-time monitoring
const watcher = app.watcher(crawlResponse.id, {
kind: 'crawl',
pollInterval: 2,
timeout: 300 // 5 minutes
});
// Listen for individual documents
watcher.on('document', (document) => {
console.log(`New document scraped: ${document.metadata?.sourceURL}`);
console.log(`Content length: ${document.markdown?.length || 0} characters`);
});
// Listen for job status updates
watcher.on('snapshot', (snapshot) => {
console.log(`Progress: ${snapshot.completed}/${snapshot.total} - Status: ${snapshot.status}`);
console.log(`Credits used: ${snapshot.creditsUsed || 0}`);
});
// Listen for job completion
watcher.on('done', (result) => {
console.log(`Crawl ${result.status}! Total documents: ${result.data.length}`);
watcher.close();
});
// Listen for errors
watcher.on('error', (error) => {
console.error(`Crawl failed: ${error.error}`);
watcher.close();
});
// Start monitoring
await watcher.start();const urls = Array.from({ length: 50 }, (_, i) =>
`https://api.example.com/items/${i + 1}`
);
// Start batch job
const batchResponse = await app.startBatchScrape(urls, {
options: { formats: ['json'] },
maxConcurrency: 5
});
// Monitor batch progress
const watcher = app.watcher(batchResponse.id, {
kind: 'batch',
pollInterval: 3,
timeout: 600 // 10 minutes
});
let processedCount = 0;
const results: Document[] = [];
watcher.on('document', (document) => {
processedCount++;
results.push(document);
console.log(`Processed ${processedCount} documents`);
if (document.metadata?.error) {
console.log(`Error processing ${document.metadata.sourceURL}: ${document.metadata.error}`);
}
});
watcher.on('snapshot', (snapshot) => {
const progress = Math.round((snapshot.completed / snapshot.total) * 100);
console.log(`Batch progress: ${progress}% (${snapshot.completed}/${snapshot.total})`);
if (snapshot.creditsUsed) {
console.log(`Credits used so far: ${snapshot.creditsUsed}`);
}
});
watcher.on('done', (result) => {
console.log(`Batch ${result.status}!`);
console.log(`Total processed: ${results.length}`);
// Process all results
const successfulResults = results.filter(doc => !doc.metadata?.error);
const failedResults = results.filter(doc => doc.metadata?.error);
console.log(`Successful: ${successfulResults.length}, Failed: ${failedResults.length}`);
watcher.close();
});
watcher.on('error', (error) => {
console.error(`Batch monitoring error: ${error.error}`);
watcher.close();
});
await watcher.start();class CrawlProgressTracker {
private startTime: number;
private documentTimes: number[] = [];
private errors: string[] = [];
constructor(private watcher: Watcher) {
this.startTime = Date.now();
this.setupEventHandlers();
}
private setupEventHandlers() {
this.watcher.on('document', (document) => {
this.documentTimes.push(Date.now());
if (document.metadata?.error) {
this.errors.push(`${document.metadata.sourceURL}: ${document.metadata.error}`);
}
this.logProgress(document);
});
this.watcher.on('snapshot', (snapshot) => {
this.logSnapshot(snapshot);
});
this.watcher.on('done', (result) => {
this.logFinalStats(result);
});
}
private logProgress(document: Document) {
const elapsed = Date.now() - this.startTime;
const rate = this.documentTimes.length / (elapsed / 1000);
console.log(`Document ${this.documentTimes.length}: ${document.metadata?.sourceURL}`);
console.log(`Current rate: ${rate.toFixed(2)} docs/sec`);
}
private logSnapshot(snapshot: CrawlJob | BatchScrapeJob) {
const elapsed = Date.now() - this.startTime;
const progress = (snapshot.completed / snapshot.total) * 100;
const eta = snapshot.completed > 0
? ((snapshot.total - snapshot.completed) * elapsed / snapshot.completed) / 1000
: 0;
console.log(`\n--- Progress Update ---`);
console.log(`Status: ${snapshot.status}`);
console.log(`Progress: ${snapshot.completed}/${snapshot.total} (${progress.toFixed(1)}%)`);
console.log(`Elapsed: ${(elapsed / 1000).toFixed(0)}s`);
console.log(`ETA: ${eta.toFixed(0)}s`);
console.log(`Credits: ${snapshot.creditsUsed || 0}`);
console.log(`Errors: ${this.errors.length}`);
console.log(`-----------------------\n`);
}
private logFinalStats(result: JobCompletionEvent) {
const totalTime = Date.now() - this.startTime;
const avgRate = result.data.length / (totalTime / 1000);
console.log(`\n=== Final Statistics ===`);
console.log(`Status: ${result.status}`);
console.log(`Total documents: ${result.data.length}`);
console.log(`Total time: ${(totalTime / 1000).toFixed(1)}s`);
console.log(`Average rate: ${avgRate.toFixed(2)} docs/sec`);
console.log(`Total errors: ${this.errors.length}`);
if (this.errors.length > 0) {
console.log(`\nErrors:`);
this.errors.slice(0, 5).forEach(error => console.log(`- ${error}`));
if (this.errors.length > 5) {
console.log(`... and ${this.errors.length - 5} more`);
}
}
console.log(`========================\n`);
}
}
// Usage
const crawlResponse = await app.startCrawl('https://docs.example.com', {
limit: 500,
scrapeOptions: { formats: ['markdown'] }
});
const watcher = app.watcher(crawlResponse.id);
const tracker = new CrawlProgressTracker(watcher);
await watcher.start();class MultiJobMonitor {
private watchers: Map<string, Watcher> = new Map();
private jobStats = new Map<string, {
type: 'crawl' | 'batch';
started: number;
completed: number;
total: number;
status: string;
}>();
async addCrawlJob(url: string, options: any) {
const response = await app.startCrawl(url, options);
this.addWatcher(response.id, 'crawl');
return response.id;
}
async addBatchJob(urls: string[], options: any) {
const response = await app.startBatchScrape(urls, options);
this.addWatcher(response.id, 'batch');
return response.id;
}
private addWatcher(jobId: string, type: 'crawl' | 'batch') {
const watcher = app.watcher(jobId, { kind: type });
this.jobStats.set(jobId, {
type,
started: Date.now(),
completed: 0,
total: 0,
status: 'starting'
});
watcher.on('snapshot', (snapshot) => {
const stats = this.jobStats.get(jobId)!;
stats.completed = snapshot.completed;
stats.total = snapshot.total;
stats.status = snapshot.status;
this.logAllJobs();
});
watcher.on('done', (result) => {
console.log(`Job ${jobId} ${result.status}`);
this.watchers.delete(jobId);
if (this.watchers.size === 0) {
console.log('All jobs completed!');
}
});
watcher.on('error', (error) => {
console.error(`Job ${jobId} error: ${error.error}`);
this.watchers.delete(jobId);
});
this.watchers.set(jobId, watcher);
watcher.start();
}
private logAllJobs() {
console.clear();
console.log('=== Multi-Job Monitor ===');
for (const [jobId, stats] of this.jobStats) {
const elapsed = (Date.now() - stats.started) / 1000;
const progress = stats.total > 0 ? (stats.completed / stats.total * 100) : 0;
console.log(`${jobId.substring(0, 8)}... (${stats.type}): ${stats.status}`);
console.log(` Progress: ${stats.completed}/${stats.total} (${progress.toFixed(1)}%)`);
console.log(` Elapsed: ${elapsed.toFixed(0)}s`);
console.log('');
}
}
closeAll() {
for (const watcher of this.watchers.values()) {
watcher.close();
}
this.watchers.clear();
}
}
// Usage
const monitor = new MultiJobMonitor();
// Start multiple jobs concurrently
await Promise.all([
monitor.addCrawlJob('https://site1.example.com', { limit: 100 }),
monitor.addCrawlJob('https://site2.example.com', { limit: 150 }),
monitor.addBatchJob([
'https://api.example.com/data1',
'https://api.example.com/data2'
], { options: { formats: ['json'] } })
]);
// Jobs will be monitored automatically
// Call monitor.closeAll() when doneclass RobustCrawlMonitor {
private maxRetries = 3;
private retryCount = 0;
async startMonitoredCrawl(url: string, options: any) {
while (this.retryCount < this.maxRetries) {
try {
const response = await app.startCrawl(url, options);
return await this.monitorWithRetry(response.id);
} catch (error) {
this.retryCount++;
console.log(`Attempt ${this.retryCount} failed:`, error);
if (this.retryCount >= this.maxRetries) {
throw new Error(`Failed after ${this.maxRetries} attempts`);
}
// Wait before retry
await new Promise(resolve => setTimeout(resolve, 5000 * this.retryCount));
}
}
}
private async monitorWithRetry(jobId: string): Promise<Document[]> {
return new Promise((resolve, reject) => {
const watcher = app.watcher(jobId, {
timeout: 300,
pollInterval: 2
});
const documents: Document[] = [];
let lastSnapshot: CrawlJob | null = null;
watcher.on('document', (document) => {
documents.push(document);
});
watcher.on('snapshot', (snapshot) => {
lastSnapshot = snapshot as CrawlJob;
console.log(`Progress: ${snapshot.completed}/${snapshot.total}`);
});
watcher.on('done', (result) => {
if (result.status === 'completed') {
resolve(documents);
} else {
reject(new Error(`Job ${result.status}: ${JSON.stringify(result)}`));
}
watcher.close();
});
watcher.on('error', (error) => {
// Check if we got partial results
if (documents.length > 0) {
console.log(`Partial success: got ${documents.length} documents before error`);
resolve(documents);
} else {
reject(new Error(error.error));
}
watcher.close();
});
watcher.start().catch(reject);
});
}
}
// Usage
const monitor = new RobustCrawlMonitor();
try {
const documents = await monitor.startMonitoredCrawl('https://example.com', {
limit: 200,
scrapeOptions: { formats: ['markdown'] }
});
console.log(`Successfully crawled ${documents.length} documents`);
} catch (error) {
console.error('Crawl failed completely:', error);
}