Tessl Tile for npm/@mendable/firecrawl-js@4.3.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

batch.md crawling.md extraction.md index.md mapping.md monitoring.md scraping.md search.md usage.md v1-api.md

monitoring.mddocs/

0
# Real-time Monitoring
1

2
WebSocket-based job monitoring with automatic fallback to polling for long-running crawl and batch operations.
3

4
## Core Monitoring Method
5

6
```typescript { .api }
7
/**
8
 * Create a watcher for a crawl or batch job
9
 * @param jobId - Job identifier to monitor
10
 * @param opts - Watcher configuration options
11
 * @returns Watcher instance for real-time updates
12
 */
13
watcher(jobId: string, opts?: WatcherOptions): Watcher;
14
```
15

16
## Watcher Configuration
17

18
```typescript { .api }
19
interface WatcherOptions {
20
  // Job type to monitor
21
  kind?: "crawl" | "batch";
22
  
23
  // Polling interval in seconds (fallback mode)
24
  pollInterval?: number;
25
  
26
  // Total timeout in seconds
27
  timeout?: number;
28
}
29
```
30

31
## Watcher Class
32

33
```typescript { .api }
34
/**
35
 * EventEmitter-based watcher for real-time job monitoring
36
 * Automatically handles WebSocket connection with polling fallback
37
 */
38
class Watcher extends EventEmitter {
39
  constructor(http: HttpClient, jobId: string, opts?: WatcherOptions);
40
  
41
  /**
42
   * Start monitoring the job
43
   * @returns Promise that resolves when connection is established
44
   */
45
  start(): Promise<void>;
46
  
47
  /**
48
   * Stop monitoring and close connections
49
   */
50
  close(): void;
51
  
52
  // Event methods inherited from EventEmitter
53
  on(event: 'document', listener: (document: Document & { id: string }) => void): this;
54
  on(event: 'snapshot', listener: (snapshot: CrawlJob | BatchScrapeJob) => void): this;
55
  on(event: 'done', listener: (result: JobCompletionEvent) => void): this;
56
  on(event: 'error', listener: (error: JobErrorEvent) => void): this;
57
  
58
  emit(event: 'document', document: Document & { id: string }): boolean;
59
  emit(event: 'snapshot', snapshot: CrawlJob | BatchScrapeJob): boolean;
60
  emit(event: 'done', result: JobCompletionEvent): boolean;
61
  emit(event: 'error', error: JobErrorEvent): boolean;
62
}
63
```
64

65
## Event Types
66

67
```typescript { .api }
68
// Job completion event
69
interface JobCompletionEvent {
70
  status: "completed" | "failed" | "cancelled";
71
  data: Document[];
72
  id: string;
73
}
74

75
// Job error event
76
interface JobErrorEvent {
77
  status: "failed";
78
  data: Document[];
79
  error: string;
80
  id: string;
81
}
82

83
// Job status snapshot (CrawlJob or BatchScrapeJob)
84
type JobSnapshot = CrawlJob | BatchScrapeJob;
85
```
86

87
## Usage Examples
88

89
### Basic Crawl Monitoring
90

91
```typescript
92
// Start a crawl job
93
const crawlResponse = await app.startCrawl('https://example.com', {
94
  limit: 100,
95
  scrapeOptions: { formats: ['markdown'] }
96
});
97

98
// Create watcher for real-time monitoring
99
const watcher = app.watcher(crawlResponse.id, {
100
  kind: 'crawl',
101
  pollInterval: 2,
102
  timeout: 300 // 5 minutes
103
});
104

105
// Listen for individual documents
106
watcher.on('document', (document) => {
107
  console.log(`New document scraped: ${document.metadata?.sourceURL}`);
108
  console.log(`Content length: ${document.markdown?.length || 0} characters`);
109
});
110

111
// Listen for job status updates
112
watcher.on('snapshot', (snapshot) => {
113
  console.log(`Progress: ${snapshot.completed}/${snapshot.total} - Status: ${snapshot.status}`);
114
  console.log(`Credits used: ${snapshot.creditsUsed || 0}`);
115
});
116

117
// Listen for job completion
118
watcher.on('done', (result) => {
119
  console.log(`Crawl ${result.status}! Total documents: ${result.data.length}`);
120
  watcher.close();
121
});
122

123
// Listen for errors
124
watcher.on('error', (error) => {
125
  console.error(`Crawl failed: ${error.error}`);
126
  watcher.close();
127
});
128

129
// Start monitoring
130
await watcher.start();
131
```
132

133
### Batch Job Monitoring
134

135
```typescript
136
const urls = Array.from({ length: 50 }, (_, i) => 
137
  `https://api.example.com/items/${i + 1}`
138
);
139

140
// Start batch job
141
const batchResponse = await app.startBatchScrape(urls, {
142
  options: { formats: ['json'] },
143
  maxConcurrency: 5
144
});
145

146
// Monitor batch progress
147
const watcher = app.watcher(batchResponse.id, {
148
  kind: 'batch',
149
  pollInterval: 3,
150
  timeout: 600 // 10 minutes
151
});
152

153
let processedCount = 0;
154
const results: Document[] = [];
155

156
watcher.on('document', (document) => {
157
  processedCount++;
158
  results.push(document);
159
  
160
  console.log(`Processed ${processedCount} documents`);
161
  
162
  if (document.metadata?.error) {
163
    console.log(`Error processing ${document.metadata.sourceURL}: ${document.metadata.error}`);
164
  }
165
});
166

167
watcher.on('snapshot', (snapshot) => {
168
  const progress = Math.round((snapshot.completed / snapshot.total) * 100);
169
  console.log(`Batch progress: ${progress}% (${snapshot.completed}/${snapshot.total})`);
170
  
171
  if (snapshot.creditsUsed) {
172
    console.log(`Credits used so far: ${snapshot.creditsUsed}`);
173
  }
174
});
175

176
watcher.on('done', (result) => {
177
  console.log(`Batch ${result.status}!`);
178
  console.log(`Total processed: ${results.length}`);
179
  
180
  // Process all results
181
  const successfulResults = results.filter(doc => !doc.metadata?.error);
182
  const failedResults = results.filter(doc => doc.metadata?.error);
183
  
184
  console.log(`Successful: ${successfulResults.length}, Failed: ${failedResults.length}`);
185
  
186
  watcher.close();
187
});
188

189
watcher.on('error', (error) => {
190
  console.error(`Batch monitoring error: ${error.error}`);
191
  watcher.close();
192
});
193

194
await watcher.start();
195
```
196

197
### Advanced Monitoring with Progress Tracking
198

199
```typescript
200
class CrawlProgressTracker {
201
  private startTime: number;
202
  private documentTimes: number[] = [];
203
  private errors: string[] = [];
204
  
205
  constructor(private watcher: Watcher) {
206
    this.startTime = Date.now();
207
    this.setupEventHandlers();
208
  }
209
  
210
  private setupEventHandlers() {
211
    this.watcher.on('document', (document) => {
212
      this.documentTimes.push(Date.now());
213
      
214
      if (document.metadata?.error) {
215
        this.errors.push(`${document.metadata.sourceURL}: ${document.metadata.error}`);
216
      }
217
      
218
      this.logProgress(document);
219
    });
220
    
221
    this.watcher.on('snapshot', (snapshot) => {
222
      this.logSnapshot(snapshot);
223
    });
224
    
225
    this.watcher.on('done', (result) => {
226
      this.logFinalStats(result);
227
    });
228
  }
229
  
230
  private logProgress(document: Document) {
231
    const elapsed = Date.now() - this.startTime;
232
    const rate = this.documentTimes.length / (elapsed / 1000);
233
    
234
    console.log(`Document ${this.documentTimes.length}: ${document.metadata?.sourceURL}`);
235
    console.log(`Current rate: ${rate.toFixed(2)} docs/sec`);
236
  }
237
  
238
  private logSnapshot(snapshot: CrawlJob | BatchScrapeJob) {
239
    const elapsed = Date.now() - this.startTime;
240
    const progress = (snapshot.completed / snapshot.total) * 100;
241
    const eta = snapshot.completed > 0 
242
      ? ((snapshot.total - snapshot.completed) * elapsed / snapshot.completed) / 1000
243
      : 0;
244
    
245
    console.log(`\n--- Progress Update ---`);
246
    console.log(`Status: ${snapshot.status}`);
247
    console.log(`Progress: ${snapshot.completed}/${snapshot.total} (${progress.toFixed(1)}%)`);
248
    console.log(`Elapsed: ${(elapsed / 1000).toFixed(0)}s`);
249
    console.log(`ETA: ${eta.toFixed(0)}s`);
250
    console.log(`Credits: ${snapshot.creditsUsed || 0}`);
251
    console.log(`Errors: ${this.errors.length}`);
252
    console.log(`-----------------------\n`);
253
  }
254
  
255
  private logFinalStats(result: JobCompletionEvent) {
256
    const totalTime = Date.now() - this.startTime;
257
    const avgRate = result.data.length / (totalTime / 1000);
258
    
259
    console.log(`\n=== Final Statistics ===`);
260
    console.log(`Status: ${result.status}`);
261
    console.log(`Total documents: ${result.data.length}`);
262
    console.log(`Total time: ${(totalTime / 1000).toFixed(1)}s`);
263
    console.log(`Average rate: ${avgRate.toFixed(2)} docs/sec`);
264
    console.log(`Total errors: ${this.errors.length}`);
265
    
266
    if (this.errors.length > 0) {
267
      console.log(`\nErrors:`);
268
      this.errors.slice(0, 5).forEach(error => console.log(`- ${error}`));
269
      if (this.errors.length > 5) {
270
        console.log(`... and ${this.errors.length - 5} more`);
271
      }
272
    }
273
    console.log(`========================\n`);
274
  }
275
}
276

277
// Usage
278
const crawlResponse = await app.startCrawl('https://docs.example.com', {
279
  limit: 500,
280
  scrapeOptions: { formats: ['markdown'] }
281
});
282

283
const watcher = app.watcher(crawlResponse.id);
284
const tracker = new CrawlProgressTracker(watcher);
285

286
await watcher.start();
287
```
288

289
### Multiple Job Monitoring
290

291
```typescript
292
class MultiJobMonitor {
293
  private watchers: Map<string, Watcher> = new Map();
294
  private jobStats = new Map<string, {
295
    type: 'crawl' | 'batch';
296
    started: number;
297
    completed: number;
298
    total: number;
299
    status: string;
300
  }>();
301
  
302
  async addCrawlJob(url: string, options: any) {
303
    const response = await app.startCrawl(url, options);
304
    this.addWatcher(response.id, 'crawl');
305
    return response.id;
306
  }
307
  
308
  async addBatchJob(urls: string[], options: any) {
309
    const response = await app.startBatchScrape(urls, options);
310
    this.addWatcher(response.id, 'batch');
311
    return response.id;
312
  }
313
  
314
  private addWatcher(jobId: string, type: 'crawl' | 'batch') {
315
    const watcher = app.watcher(jobId, { kind: type });
316
    
317
    this.jobStats.set(jobId, {
318
      type,
319
      started: Date.now(),
320
      completed: 0,
321
      total: 0,
322
      status: 'starting'
323
    });
324
    
325
    watcher.on('snapshot', (snapshot) => {
326
      const stats = this.jobStats.get(jobId)!;
327
      stats.completed = snapshot.completed;
328
      stats.total = snapshot.total;
329
      stats.status = snapshot.status;
330
      
331
      this.logAllJobs();
332
    });
333
    
334
    watcher.on('done', (result) => {
335
      console.log(`Job ${jobId} ${result.status}`);
336
      this.watchers.delete(jobId);
337
      
338
      if (this.watchers.size === 0) {
339
        console.log('All jobs completed!');
340
      }
341
    });
342
    
343
    watcher.on('error', (error) => {
344
      console.error(`Job ${jobId} error: ${error.error}`);
345
      this.watchers.delete(jobId);
346
    });
347
    
348
    this.watchers.set(jobId, watcher);
349
    watcher.start();
350
  }
351
  
352
  private logAllJobs() {
353
    console.clear();
354
    console.log('=== Multi-Job Monitor ===');
355
    
356
    for (const [jobId, stats] of this.jobStats) {
357
      const elapsed = (Date.now() - stats.started) / 1000;
358
      const progress = stats.total > 0 ? (stats.completed / stats.total * 100) : 0;
359
      
360
      console.log(`${jobId.substring(0, 8)}... (${stats.type}): ${stats.status}`);
361
      console.log(`  Progress: ${stats.completed}/${stats.total} (${progress.toFixed(1)}%)`);
362
      console.log(`  Elapsed: ${elapsed.toFixed(0)}s`);
363
      console.log('');
364
    }
365
  }
366
  
367
  closeAll() {
368
    for (const watcher of this.watchers.values()) {
369
      watcher.close();
370
    }
371
    this.watchers.clear();
372
  }
373
}
374

375
// Usage
376
const monitor = new MultiJobMonitor();
377

378
// Start multiple jobs concurrently
379
await Promise.all([
380
  monitor.addCrawlJob('https://site1.example.com', { limit: 100 }),
381
  monitor.addCrawlJob('https://site2.example.com', { limit: 150 }),
382
  monitor.addBatchJob([
383
    'https://api.example.com/data1',
384
    'https://api.example.com/data2'
385
  ], { options: { formats: ['json'] } })
386
]);
387

388
// Jobs will be monitored automatically
389
// Call monitor.closeAll() when done
390
```
391

392
### Error Recovery and Retry Monitoring
393

394
```typescript
395
class RobustCrawlMonitor {
396
  private maxRetries = 3;
397
  private retryCount = 0;
398
  
399
  async startMonitoredCrawl(url: string, options: any) {
400
    while (this.retryCount < this.maxRetries) {
401
      try {
402
        const response = await app.startCrawl(url, options);
403
        return await this.monitorWithRetry(response.id);
404
      } catch (error) {
405
        this.retryCount++;
406
        console.log(`Attempt ${this.retryCount} failed:`, error);
407
        
408
        if (this.retryCount >= this.maxRetries) {
409
          throw new Error(`Failed after ${this.maxRetries} attempts`);
410
        }
411
        
412
        // Wait before retry
413
        await new Promise(resolve => setTimeout(resolve, 5000 * this.retryCount));
414
      }
415
    }
416
  }
417
  
418
  private async monitorWithRetry(jobId: string): Promise<Document[]> {
419
    return new Promise((resolve, reject) => {
420
      const watcher = app.watcher(jobId, {
421
        timeout: 300,
422
        pollInterval: 2
423
      });
424
      
425
      const documents: Document[] = [];
426
      let lastSnapshot: CrawlJob | null = null;
427
      
428
      watcher.on('document', (document) => {
429
        documents.push(document);
430
      });
431
      
432
      watcher.on('snapshot', (snapshot) => {
433
        lastSnapshot = snapshot as CrawlJob;
434
        console.log(`Progress: ${snapshot.completed}/${snapshot.total}`);
435
      });
436
      
437
      watcher.on('done', (result) => {
438
        if (result.status === 'completed') {
439
          resolve(documents);
440
        } else {
441
          reject(new Error(`Job ${result.status}: ${JSON.stringify(result)}`));
442
        }
443
        watcher.close();
444
      });
445
      
446
      watcher.on('error', (error) => {
447
        // Check if we got partial results
448
        if (documents.length > 0) {
449
          console.log(`Partial success: got ${documents.length} documents before error`);
450
          resolve(documents);
451
        } else {
452
          reject(new Error(error.error));
453
        }
454
        watcher.close();
455
      });
456
      
457
      watcher.start().catch(reject);
458
    });
459
  }
460
}
461

462
// Usage
463
const monitor = new RobustCrawlMonitor();
464

465
try {
466
  const documents = await monitor.startMonitoredCrawl('https://example.com', {
467
    limit: 200,
468
    scrapeOptions: { formats: ['markdown'] }
469
  });
470
  
471
  console.log(`Successfully crawled ${documents.length} documents`);
472
} catch (error) {
473
  console.error('Crawl failed completely:', error);
474
}
475
```

Version

Tile

Files

monitoring.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

monitoring.mddocs/