CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-mendable--firecrawl-js

JavaScript SDK for Firecrawl API that enables comprehensive web scraping, crawling, and data extraction with AI-ready output formats.

Pending
Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

SecuritybySnyk

Pending

The risk profile of this skill

Overview
Eval results
Files

monitoring.mddocs/

Real-time Monitoring

WebSocket-based job monitoring with automatic fallback to polling for long-running crawl and batch operations.

Core Monitoring Method

/**
 * Create a watcher for a crawl or batch job
 * @param jobId - Job identifier to monitor
 * @param opts - Watcher configuration options
 * @returns Watcher instance for real-time updates
 */
watcher(jobId: string, opts?: WatcherOptions): Watcher;

Watcher Configuration

interface WatcherOptions {
  // Job type to monitor
  kind?: "crawl" | "batch";
  
  // Polling interval in seconds (fallback mode)
  pollInterval?: number;
  
  // Total timeout in seconds
  timeout?: number;
}

Watcher Class

/**
 * EventEmitter-based watcher for real-time job monitoring
 * Automatically handles WebSocket connection with polling fallback
 */
class Watcher extends EventEmitter {
  constructor(http: HttpClient, jobId: string, opts?: WatcherOptions);
  
  /**
   * Start monitoring the job
   * @returns Promise that resolves when connection is established
   */
  start(): Promise<void>;
  
  /**
   * Stop monitoring and close connections
   */
  close(): void;
  
  // Event methods inherited from EventEmitter
  on(event: 'document', listener: (document: Document & { id: string }) => void): this;
  on(event: 'snapshot', listener: (snapshot: CrawlJob | BatchScrapeJob) => void): this;
  on(event: 'done', listener: (result: JobCompletionEvent) => void): this;
  on(event: 'error', listener: (error: JobErrorEvent) => void): this;
  
  emit(event: 'document', document: Document & { id: string }): boolean;
  emit(event: 'snapshot', snapshot: CrawlJob | BatchScrapeJob): boolean;
  emit(event: 'done', result: JobCompletionEvent): boolean;
  emit(event: 'error', error: JobErrorEvent): boolean;
}

Event Types

// Job completion event
interface JobCompletionEvent {
  status: "completed" | "failed" | "cancelled";
  data: Document[];
  id: string;
}

// Job error event
interface JobErrorEvent {
  status: "failed";
  data: Document[];
  error: string;
  id: string;
}

// Job status snapshot (CrawlJob or BatchScrapeJob)
type JobSnapshot = CrawlJob | BatchScrapeJob;

Usage Examples

Basic Crawl Monitoring

// Start a crawl job
const crawlResponse = await app.startCrawl('https://example.com', {
  limit: 100,
  scrapeOptions: { formats: ['markdown'] }
});

// Create watcher for real-time monitoring
const watcher = app.watcher(crawlResponse.id, {
  kind: 'crawl',
  pollInterval: 2,
  timeout: 300 // 5 minutes
});

// Listen for individual documents
watcher.on('document', (document) => {
  console.log(`New document scraped: ${document.metadata?.sourceURL}`);
  console.log(`Content length: ${document.markdown?.length || 0} characters`);
});

// Listen for job status updates
watcher.on('snapshot', (snapshot) => {
  console.log(`Progress: ${snapshot.completed}/${snapshot.total} - Status: ${snapshot.status}`);
  console.log(`Credits used: ${snapshot.creditsUsed || 0}`);
});

// Listen for job completion
watcher.on('done', (result) => {
  console.log(`Crawl ${result.status}! Total documents: ${result.data.length}`);
  watcher.close();
});

// Listen for errors
watcher.on('error', (error) => {
  console.error(`Crawl failed: ${error.error}`);
  watcher.close();
});

// Start monitoring
await watcher.start();

Batch Job Monitoring

const urls = Array.from({ length: 50 }, (_, i) => 
  `https://api.example.com/items/${i + 1}`
);

// Start batch job
const batchResponse = await app.startBatchScrape(urls, {
  options: { formats: ['json'] },
  maxConcurrency: 5
});

// Monitor batch progress
const watcher = app.watcher(batchResponse.id, {
  kind: 'batch',
  pollInterval: 3,
  timeout: 600 // 10 minutes
});

let processedCount = 0;
const results: Document[] = [];

watcher.on('document', (document) => {
  processedCount++;
  results.push(document);
  
  console.log(`Processed ${processedCount} documents`);
  
  if (document.metadata?.error) {
    console.log(`Error processing ${document.metadata.sourceURL}: ${document.metadata.error}`);
  }
});

watcher.on('snapshot', (snapshot) => {
  const progress = Math.round((snapshot.completed / snapshot.total) * 100);
  console.log(`Batch progress: ${progress}% (${snapshot.completed}/${snapshot.total})`);
  
  if (snapshot.creditsUsed) {
    console.log(`Credits used so far: ${snapshot.creditsUsed}`);
  }
});

watcher.on('done', (result) => {
  console.log(`Batch ${result.status}!`);
  console.log(`Total processed: ${results.length}`);
  
  // Process all results
  const successfulResults = results.filter(doc => !doc.metadata?.error);
  const failedResults = results.filter(doc => doc.metadata?.error);
  
  console.log(`Successful: ${successfulResults.length}, Failed: ${failedResults.length}`);
  
  watcher.close();
});

watcher.on('error', (error) => {
  console.error(`Batch monitoring error: ${error.error}`);
  watcher.close();
});

await watcher.start();

Advanced Monitoring with Progress Tracking

class CrawlProgressTracker {
  private startTime: number;
  private documentTimes: number[] = [];
  private errors: string[] = [];
  
  constructor(private watcher: Watcher) {
    this.startTime = Date.now();
    this.setupEventHandlers();
  }
  
  private setupEventHandlers() {
    this.watcher.on('document', (document) => {
      this.documentTimes.push(Date.now());
      
      if (document.metadata?.error) {
        this.errors.push(`${document.metadata.sourceURL}: ${document.metadata.error}`);
      }
      
      this.logProgress(document);
    });
    
    this.watcher.on('snapshot', (snapshot) => {
      this.logSnapshot(snapshot);
    });
    
    this.watcher.on('done', (result) => {
      this.logFinalStats(result);
    });
  }
  
  private logProgress(document: Document) {
    const elapsed = Date.now() - this.startTime;
    const rate = this.documentTimes.length / (elapsed / 1000);
    
    console.log(`Document ${this.documentTimes.length}: ${document.metadata?.sourceURL}`);
    console.log(`Current rate: ${rate.toFixed(2)} docs/sec`);
  }
  
  private logSnapshot(snapshot: CrawlJob | BatchScrapeJob) {
    const elapsed = Date.now() - this.startTime;
    const progress = (snapshot.completed / snapshot.total) * 100;
    const eta = snapshot.completed > 0 
      ? ((snapshot.total - snapshot.completed) * elapsed / snapshot.completed) / 1000
      : 0;
    
    console.log(`\n--- Progress Update ---`);
    console.log(`Status: ${snapshot.status}`);
    console.log(`Progress: ${snapshot.completed}/${snapshot.total} (${progress.toFixed(1)}%)`);
    console.log(`Elapsed: ${(elapsed / 1000).toFixed(0)}s`);
    console.log(`ETA: ${eta.toFixed(0)}s`);
    console.log(`Credits: ${snapshot.creditsUsed || 0}`);
    console.log(`Errors: ${this.errors.length}`);
    console.log(`-----------------------\n`);
  }
  
  private logFinalStats(result: JobCompletionEvent) {
    const totalTime = Date.now() - this.startTime;
    const avgRate = result.data.length / (totalTime / 1000);
    
    console.log(`\n=== Final Statistics ===`);
    console.log(`Status: ${result.status}`);
    console.log(`Total documents: ${result.data.length}`);
    console.log(`Total time: ${(totalTime / 1000).toFixed(1)}s`);
    console.log(`Average rate: ${avgRate.toFixed(2)} docs/sec`);
    console.log(`Total errors: ${this.errors.length}`);
    
    if (this.errors.length > 0) {
      console.log(`\nErrors:`);
      this.errors.slice(0, 5).forEach(error => console.log(`- ${error}`));
      if (this.errors.length > 5) {
        console.log(`... and ${this.errors.length - 5} more`);
      }
    }
    console.log(`========================\n`);
  }
}

// Usage
const crawlResponse = await app.startCrawl('https://docs.example.com', {
  limit: 500,
  scrapeOptions: { formats: ['markdown'] }
});

const watcher = app.watcher(crawlResponse.id);
const tracker = new CrawlProgressTracker(watcher);

await watcher.start();

Multiple Job Monitoring

class MultiJobMonitor {
  private watchers: Map<string, Watcher> = new Map();
  private jobStats = new Map<string, {
    type: 'crawl' | 'batch';
    started: number;
    completed: number;
    total: number;
    status: string;
  }>();
  
  async addCrawlJob(url: string, options: any) {
    const response = await app.startCrawl(url, options);
    this.addWatcher(response.id, 'crawl');
    return response.id;
  }
  
  async addBatchJob(urls: string[], options: any) {
    const response = await app.startBatchScrape(urls, options);
    this.addWatcher(response.id, 'batch');
    return response.id;
  }
  
  private addWatcher(jobId: string, type: 'crawl' | 'batch') {
    const watcher = app.watcher(jobId, { kind: type });
    
    this.jobStats.set(jobId, {
      type,
      started: Date.now(),
      completed: 0,
      total: 0,
      status: 'starting'
    });
    
    watcher.on('snapshot', (snapshot) => {
      const stats = this.jobStats.get(jobId)!;
      stats.completed = snapshot.completed;
      stats.total = snapshot.total;
      stats.status = snapshot.status;
      
      this.logAllJobs();
    });
    
    watcher.on('done', (result) => {
      console.log(`Job ${jobId} ${result.status}`);
      this.watchers.delete(jobId);
      
      if (this.watchers.size === 0) {
        console.log('All jobs completed!');
      }
    });
    
    watcher.on('error', (error) => {
      console.error(`Job ${jobId} error: ${error.error}`);
      this.watchers.delete(jobId);
    });
    
    this.watchers.set(jobId, watcher);
    watcher.start();
  }
  
  private logAllJobs() {
    console.clear();
    console.log('=== Multi-Job Monitor ===');
    
    for (const [jobId, stats] of this.jobStats) {
      const elapsed = (Date.now() - stats.started) / 1000;
      const progress = stats.total > 0 ? (stats.completed / stats.total * 100) : 0;
      
      console.log(`${jobId.substring(0, 8)}... (${stats.type}): ${stats.status}`);
      console.log(`  Progress: ${stats.completed}/${stats.total} (${progress.toFixed(1)}%)`);
      console.log(`  Elapsed: ${elapsed.toFixed(0)}s`);
      console.log('');
    }
  }
  
  closeAll() {
    for (const watcher of this.watchers.values()) {
      watcher.close();
    }
    this.watchers.clear();
  }
}

// Usage
const monitor = new MultiJobMonitor();

// Start multiple jobs concurrently
await Promise.all([
  monitor.addCrawlJob('https://site1.example.com', { limit: 100 }),
  monitor.addCrawlJob('https://site2.example.com', { limit: 150 }),
  monitor.addBatchJob([
    'https://api.example.com/data1',
    'https://api.example.com/data2'
  ], { options: { formats: ['json'] } })
]);

// Jobs will be monitored automatically
// Call monitor.closeAll() when done

Error Recovery and Retry Monitoring

class RobustCrawlMonitor {
  private maxRetries = 3;
  private retryCount = 0;
  
  async startMonitoredCrawl(url: string, options: any) {
    while (this.retryCount < this.maxRetries) {
      try {
        const response = await app.startCrawl(url, options);
        return await this.monitorWithRetry(response.id);
      } catch (error) {
        this.retryCount++;
        console.log(`Attempt ${this.retryCount} failed:`, error);
        
        if (this.retryCount >= this.maxRetries) {
          throw new Error(`Failed after ${this.maxRetries} attempts`);
        }
        
        // Wait before retry
        await new Promise(resolve => setTimeout(resolve, 5000 * this.retryCount));
      }
    }
  }
  
  private async monitorWithRetry(jobId: string): Promise<Document[]> {
    return new Promise((resolve, reject) => {
      const watcher = app.watcher(jobId, {
        timeout: 300,
        pollInterval: 2
      });
      
      const documents: Document[] = [];
      let lastSnapshot: CrawlJob | null = null;
      
      watcher.on('document', (document) => {
        documents.push(document);
      });
      
      watcher.on('snapshot', (snapshot) => {
        lastSnapshot = snapshot as CrawlJob;
        console.log(`Progress: ${snapshot.completed}/${snapshot.total}`);
      });
      
      watcher.on('done', (result) => {
        if (result.status === 'completed') {
          resolve(documents);
        } else {
          reject(new Error(`Job ${result.status}: ${JSON.stringify(result)}`));
        }
        watcher.close();
      });
      
      watcher.on('error', (error) => {
        // Check if we got partial results
        if (documents.length > 0) {
          console.log(`Partial success: got ${documents.length} documents before error`);
          resolve(documents);
        } else {
          reject(new Error(error.error));
        }
        watcher.close();
      });
      
      watcher.start().catch(reject);
    });
  }
}

// Usage
const monitor = new RobustCrawlMonitor();

try {
  const documents = await monitor.startMonitoredCrawl('https://example.com', {
    limit: 200,
    scrapeOptions: { formats: ['markdown'] }
  });
  
  console.log(`Successfully crawled ${documents.length} documents`);
} catch (error) {
  console.error('Crawl failed completely:', error);
}

docs

batch.md

crawling.md

extraction.md

index.md

mapping.md

monitoring.md

scraping.md

search.md

usage.md

v1-api.md

tile.json