tessl/npm-mendable--firecrawl-js

JavaScript SDK for Firecrawl API that enables comprehensive web scraping, crawling, and data extraction with AI-ready output formats.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Securityby

Pending

The risk profile of this skill

Overview

Eval results

Files

Real-time Monitoring

Name: tessl/npm-mendable--firecrawl-js
Author: tessl

WebSocket-based job monitoring with automatic fallback to polling for long-running crawl and batch operations.

Core Monitoring Method

/**
 * Create a watcher for a crawl or batch job
 * @param jobId - Job identifier to monitor
 * @param opts - Watcher configuration options
 * @returns Watcher instance for real-time updates
 */
watcher(jobId: string, opts?: WatcherOptions): Watcher;

Watcher Configuration

interface WatcherOptions {
  // Job type to monitor
  kind?: "crawl" | "batch";
  
  // Polling interval in seconds (fallback mode)
  pollInterval?: number;
  
  // Total timeout in seconds
  timeout?: number;
}

Watcher Class

/**
 * EventEmitter-based watcher for real-time job monitoring
 * Automatically handles WebSocket connection with polling fallback
 */
class Watcher extends EventEmitter {
  constructor(http: HttpClient, jobId: string, opts?: WatcherOptions);
  
  /**
   * Start monitoring the job
   * @returns Promise that resolves when connection is established
   */
  start(): Promise<void>;
  
  /**
   * Stop monitoring and close connections
   */
  close(): void;
  
  // Event methods inherited from EventEmitter
  on(event: 'document', listener: (document: Document & { id: string }) => void): this;
  on(event: 'snapshot', listener: (snapshot: CrawlJob | BatchScrapeJob) => void): this;
  on(event: 'done', listener: (result: JobCompletionEvent) => void): this;
  on(event: 'error', listener: (error: JobErrorEvent) => void): this;
  
  emit(event: 'document', document: Document & { id: string }): boolean;
  emit(event: 'snapshot', snapshot: CrawlJob | BatchScrapeJob): boolean;
  emit(event: 'done', result: JobCompletionEvent): boolean;
  emit(event: 'error', error: JobErrorEvent): boolean;
}

Event Types

// Job completion event
interface JobCompletionEvent {
  status: "completed" | "failed" | "cancelled";
  data: Document[];
  id: string;
}

// Job error event
interface JobErrorEvent {
  status: "failed";
  data: Document[];
  error: string;
  id: string;
}

// Job status snapshot (CrawlJob or BatchScrapeJob)
type JobSnapshot = CrawlJob | BatchScrapeJob;

Usage Examples

Basic Crawl Monitoring

// Start a crawl job
const crawlResponse = await app.startCrawl('https://example.com', {
  limit: 100,
  scrapeOptions: { formats: ['markdown'] }
});

// Create watcher for real-time monitoring
const watcher = app.watcher(crawlResponse.id, {
  kind: 'crawl',
  pollInterval: 2,
  timeout: 300 // 5 minutes
});

// Listen for individual documents
watcher.on('document', (document) => {
  console.log(`New document scraped: ${document.metadata?.sourceURL}`);
  console.log(`Content length: ${document.markdown?.length || 0} characters`);
});

// Listen for job status updates
watcher.on('snapshot', (snapshot) => {
  console.log(`Progress: ${snapshot.completed}/${snapshot.total} - Status: ${snapshot.status}`);
  console.log(`Credits used: ${snapshot.creditsUsed || 0}`);
});

// Listen for job completion
watcher.on('done', (result) => {
  console.log(`Crawl ${result.status}! Total documents: ${result.data.length}`);
  watcher.close();
});

// Listen for errors
watcher.on('error', (error) => {
  console.error(`Crawl failed: ${error.error}`);
  watcher.close();
});

// Start monitoring
await watcher.start();

Batch Job Monitoring

const urls = Array.from({ length: 50 }, (_, i) => 
  `https://api.example.com/items/${i + 1}`
);

// Start batch job
const batchResponse = await app.startBatchScrape(urls, {
  options: { formats: ['json'] },
  maxConcurrency: 5
});

// Monitor batch progress
const watcher = app.watcher(batchResponse.id, {
  kind: 'batch',
  pollInterval: 3,
  timeout: 600 // 10 minutes
});

let processedCount = 0;
const results: Document[] = [];

watcher.on('document', (document) => {
  processedCount++;
  results.push(document);
  
  console.log(`Processed ${processedCount} documents`);
  
  if (document.metadata?.error) {
    console.log(`Error processing ${document.metadata.sourceURL}: ${document.metadata.error}`);
  }
});

watcher.on('snapshot', (snapshot) => {
  const progress = Math.round((snapshot.completed / snapshot.total) * 100);
  console.log(`Batch progress: ${progress}% (${snapshot.completed}/${snapshot.total})`);
  
  if (snapshot.creditsUsed) {
    console.log(`Credits used so far: ${snapshot.creditsUsed}`);
  }
});

watcher.on('done', (result) => {
  console.log(`Batch ${result.status}!`);
  console.log(`Total processed: ${results.length}`);
  
  // Process all results
  const successfulResults = results.filter(doc => !doc.metadata?.error);
  const failedResults = results.filter(doc => doc.metadata?.error);
  
  console.log(`Successful: ${successfulResults.length}, Failed: ${failedResults.length}`);
  
  watcher.close();
});

watcher.on('error', (error) => {
  console.error(`Batch monitoring error: ${error.error}`);
  watcher.close();
});

await watcher.start();

Advanced Monitoring with Progress Tracking

class CrawlProgressTracker {
  private startTime: number;
  private documentTimes: number[] = [];
  private errors: string[] = [];
  
  constructor(private watcher: Watcher) {
    this.startTime = Date.now();
    this.setupEventHandlers();
  }
  
  private setupEventHandlers() {
    this.watcher.on('document', (document) => {
      this.documentTimes.push(Date.now());
      
      if (document.metadata?.error) {
        this.errors.push(`${document.metadata.sourceURL}: ${document.metadata.error}`);
      }
      
      this.logProgress(document);
    });
    
    this.watcher.on('snapshot', (snapshot) => {
      this.logSnapshot(snapshot);
    });
    
    this.watcher.on('done', (result) => {
      this.logFinalStats(result);
    });
  }
  
  private logProgress(document: Document) {
    const elapsed = Date.now() - this.startTime;
    const rate = this.documentTimes.length / (elapsed / 1000);
    
    console.log(`Document ${this.documentTimes.length}: ${document.metadata?.sourceURL}`);
    console.log(`Current rate: ${rate.toFixed(2)} docs/sec`);
  }
  
  private logSnapshot(snapshot: CrawlJob | BatchScrapeJob) {
    const elapsed = Date.now() - this.startTime;
    const progress = (snapshot.completed / snapshot.total) * 100;
    const eta = snapshot.completed > 0 
      ? ((snapshot.total - snapshot.completed) * elapsed / snapshot.completed) / 1000
      : 0;
    
    console.log(`\n--- Progress Update ---`);
    console.log(`Status: ${snapshot.status}`);
    console.log(`Progress: ${snapshot.completed}/${snapshot.total} (${progress.toFixed(1)}%)`);
    console.log(`Elapsed: ${(elapsed / 1000).toFixed(0)}s`);
    console.log(`ETA: ${eta.toFixed(0)}s`);
    console.log(`Credits: ${snapshot.creditsUsed || 0}`);
    console.log(`Errors: ${this.errors.length}`);
    console.log(`-----------------------\n`);
  }
  
  private logFinalStats(result: JobCompletionEvent) {
    const totalTime = Date.now() - this.startTime;
    const avgRate = result.data.length / (totalTime / 1000);
    
    console.log(`\n=== Final Statistics ===`);
    console.log(`Status: ${result.status}`);
    console.log(`Total documents: ${result.data.length}`);
    console.log(`Total time: ${(totalTime / 1000).toFixed(1)}s`);
    console.log(`Average rate: ${avgRate.toFixed(2)} docs/sec`);
    console.log(`Total errors: ${this.errors.length}`);
    
    if (this.errors.length > 0) {
      console.log(`\nErrors:`);
      this.errors.slice(0, 5).forEach(error => console.log(`- ${error}`));
      if (this.errors.length > 5) {
        console.log(`... and ${this.errors.length - 5} more`);
      }
    }
    console.log(`========================\n`);
  }
}

// Usage
const crawlResponse = await app.startCrawl('https://docs.example.com', {
  limit: 500,
  scrapeOptions: { formats: ['markdown'] }
});

const watcher = app.watcher(crawlResponse.id);
const tracker = new CrawlProgressTracker(watcher);

await watcher.start();

Multiple Job Monitoring

class MultiJobMonitor {
  private watchers: Map<string, Watcher> = new Map();
  private jobStats = new Map<string, {
    type: 'crawl' | 'batch';
    started: number;
    completed: number;
    total: number;
    status: string;
  }>();
  
  async addCrawlJob(url: string, options: any) {
    const response = await app.startCrawl(url, options);
    this.addWatcher(response.id, 'crawl');
    return response.id;
  }
  
  async addBatchJob(urls: string[], options: any) {
    const response = await app.startBatchScrape(urls, options);
    this.addWatcher(response.id, 'batch');
    return response.id;
  }
  
  private addWatcher(jobId: string, type: 'crawl' | 'batch') {
    const watcher = app.watcher(jobId, { kind: type });
    
    this.jobStats.set(jobId, {
      type,
      started: Date.now(),
      completed: 0,
      total: 0,
      status: 'starting'
    });
    
    watcher.on('snapshot', (snapshot) => {
      const stats = this.jobStats.get(jobId)!;
      stats.completed = snapshot.completed;
      stats.total = snapshot.total;
      stats.status = snapshot.status;
      
      this.logAllJobs();
    });
    
    watcher.on('done', (result) => {
      console.log(`Job ${jobId} ${result.status}`);
      this.watchers.delete(jobId);
      
      if (this.watchers.size === 0) {
        console.log('All jobs completed!');
      }
    });
    
    watcher.on('error', (error) => {
      console.error(`Job ${jobId} error: ${error.error}`);
      this.watchers.delete(jobId);
    });
    
    this.watchers.set(jobId, watcher);
    watcher.start();
  }
  
  private logAllJobs() {
    console.clear();
    console.log('=== Multi-Job Monitor ===');
    
    for (const [jobId, stats] of this.jobStats) {
      const elapsed = (Date.now() - stats.started) / 1000;
      const progress = stats.total > 0 ? (stats.completed / stats.total * 100) : 0;
      
      console.log(`${jobId.substring(0, 8)}... (${stats.type}): ${stats.status}`);
      console.log(`  Progress: ${stats.completed}/${stats.total} (${progress.toFixed(1)}%)`);
      console.log(`  Elapsed: ${elapsed.toFixed(0)}s`);
      console.log('');
    }
  }
  
  closeAll() {
    for (const watcher of this.watchers.values()) {
      watcher.close();
    }
    this.watchers.clear();
  }
}

// Usage
const monitor = new MultiJobMonitor();

// Start multiple jobs concurrently
await Promise.all([
  monitor.addCrawlJob('https://site1.example.com', { limit: 100 }),
  monitor.addCrawlJob('https://site2.example.com', { limit: 150 }),
  monitor.addBatchJob([
    'https://api.example.com/data1',
    'https://api.example.com/data2'
  ], { options: { formats: ['json'] } })
]);

// Jobs will be monitored automatically
// Call monitor.closeAll() when done

Error Recovery and Retry Monitoring

class RobustCrawlMonitor {
  private maxRetries = 3;
  private retryCount = 0;
  
  async startMonitoredCrawl(url: string, options: any) {
    while (this.retryCount < this.maxRetries) {
      try {
        const response = await app.startCrawl(url, options);
        return await this.monitorWithRetry(response.id);
      } catch (error) {
        this.retryCount++;
        console.log(`Attempt ${this.retryCount} failed:`, error);
        
        if (this.retryCount >= this.maxRetries) {
          throw new Error(`Failed after ${this.maxRetries} attempts`);
        }
        
        // Wait before retry
        await new Promise(resolve => setTimeout(resolve, 5000 * this.retryCount));
      }
    }
  }
  
  private async monitorWithRetry(jobId: string): Promise<Document[]> {
    return new Promise((resolve, reject) => {
      const watcher = app.watcher(jobId, {
        timeout: 300,
        pollInterval: 2
      });
      
      const documents: Document[] = [];
      let lastSnapshot: CrawlJob | null = null;
      
      watcher.on('document', (document) => {
        documents.push(document);
      });
      
      watcher.on('snapshot', (snapshot) => {
        lastSnapshot = snapshot as CrawlJob;
        console.log(`Progress: ${snapshot.completed}/${snapshot.total}`);
      });
      
      watcher.on('done', (result) => {
        if (result.status === 'completed') {
          resolve(documents);
        } else {
          reject(new Error(`Job ${result.status}: ${JSON.stringify(result)}`));
        }
        watcher.close();
      });
      
      watcher.on('error', (error) => {
        // Check if we got partial results
        if (documents.length > 0) {
          console.log(`Partial success: got ${documents.length} documents before error`);
          resolve(documents);
        } else {
          reject(new Error(error.error));
        }
        watcher.close();
      });
      
      watcher.start().catch(reject);
    });
  }
}

// Usage
const monitor = new RobustCrawlMonitor();

try {
  const documents = await monitor.startMonitoredCrawl('https://example.com', {
    limit: 200,
    scrapeOptions: { formats: ['markdown'] }
  });
  
  console.log(`Successfully crawled ${documents.length} documents`);
} catch (error) {
  console.error('Crawl failed completely:', error);
}