or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

batch.mdcrawling.mdextraction.mdindex.mdmapping.mdmonitoring.mdscraping.mdsearch.mdusage.mdv1-api.md

monitoring.mddocs/

0

# Real-time Monitoring

1

2

WebSocket-based job monitoring with automatic fallback to polling for long-running crawl and batch operations.

3

4

## Core Monitoring Method

5

6

```typescript { .api }

7

/**

8

* Create a watcher for a crawl or batch job

9

* @param jobId - Job identifier to monitor

10

* @param opts - Watcher configuration options

11

* @returns Watcher instance for real-time updates

12

*/

13

watcher(jobId: string, opts?: WatcherOptions): Watcher;

14

```

15

16

## Watcher Configuration

17

18

```typescript { .api }

19

interface WatcherOptions {

20

// Job type to monitor

21

kind?: "crawl" | "batch";

22

23

// Polling interval in seconds (fallback mode)

24

pollInterval?: number;

25

26

// Total timeout in seconds

27

timeout?: number;

28

}

29

```

30

31

## Watcher Class

32

33

```typescript { .api }

34

/**

35

* EventEmitter-based watcher for real-time job monitoring

36

* Automatically handles WebSocket connection with polling fallback

37

*/

38

class Watcher extends EventEmitter {

39

constructor(http: HttpClient, jobId: string, opts?: WatcherOptions);

40

41

/**

42

* Start monitoring the job

43

* @returns Promise that resolves when connection is established

44

*/

45

start(): Promise<void>;

46

47

/**

48

* Stop monitoring and close connections

49

*/

50

close(): void;

51

52

// Event methods inherited from EventEmitter

53

on(event: 'document', listener: (document: Document & { id: string }) => void): this;

54

on(event: 'snapshot', listener: (snapshot: CrawlJob | BatchScrapeJob) => void): this;

55

on(event: 'done', listener: (result: JobCompletionEvent) => void): this;

56

on(event: 'error', listener: (error: JobErrorEvent) => void): this;

57

58

emit(event: 'document', document: Document & { id: string }): boolean;

59

emit(event: 'snapshot', snapshot: CrawlJob | BatchScrapeJob): boolean;

60

emit(event: 'done', result: JobCompletionEvent): boolean;

61

emit(event: 'error', error: JobErrorEvent): boolean;

62

}

63

```

64

65

## Event Types

66

67

```typescript { .api }

68

// Job completion event

69

interface JobCompletionEvent {

70

status: "completed" | "failed" | "cancelled";

71

data: Document[];

72

id: string;

73

}

74

75

// Job error event

76

interface JobErrorEvent {

77

status: "failed";

78

data: Document[];

79

error: string;

80

id: string;

81

}

82

83

// Job status snapshot (CrawlJob or BatchScrapeJob)

84

type JobSnapshot = CrawlJob | BatchScrapeJob;

85

```

86

87

## Usage Examples

88

89

### Basic Crawl Monitoring

90

91

```typescript

92

// Start a crawl job

93

const crawlResponse = await app.startCrawl('https://example.com', {

94

limit: 100,

95

scrapeOptions: { formats: ['markdown'] }

96

});

97

98

// Create watcher for real-time monitoring

99

const watcher = app.watcher(crawlResponse.id, {

100

kind: 'crawl',

101

pollInterval: 2,

102

timeout: 300 // 5 minutes

103

});

104

105

// Listen for individual documents

106

watcher.on('document', (document) => {

107

console.log(`New document scraped: ${document.metadata?.sourceURL}`);

108

console.log(`Content length: ${document.markdown?.length || 0} characters`);

109

});

110

111

// Listen for job status updates

112

watcher.on('snapshot', (snapshot) => {

113

console.log(`Progress: ${snapshot.completed}/${snapshot.total} - Status: ${snapshot.status}`);

114

console.log(`Credits used: ${snapshot.creditsUsed || 0}`);

115

});

116

117

// Listen for job completion

118

watcher.on('done', (result) => {

119

console.log(`Crawl ${result.status}! Total documents: ${result.data.length}`);

120

watcher.close();

121

});

122

123

// Listen for errors

124

watcher.on('error', (error) => {

125

console.error(`Crawl failed: ${error.error}`);

126

watcher.close();

127

});

128

129

// Start monitoring

130

await watcher.start();

131

```

132

133

### Batch Job Monitoring

134

135

```typescript

136

const urls = Array.from({ length: 50 }, (_, i) =>

137

`https://api.example.com/items/${i + 1}`

138

);

139

140

// Start batch job

141

const batchResponse = await app.startBatchScrape(urls, {

142

options: { formats: ['json'] },

143

maxConcurrency: 5

144

});

145

146

// Monitor batch progress

147

const watcher = app.watcher(batchResponse.id, {

148

kind: 'batch',

149

pollInterval: 3,

150

timeout: 600 // 10 minutes

151

});

152

153

let processedCount = 0;

154

const results: Document[] = [];

155

156

watcher.on('document', (document) => {

157

processedCount++;

158

results.push(document);

159

160

console.log(`Processed ${processedCount} documents`);

161

162

if (document.metadata?.error) {

163

console.log(`Error processing ${document.metadata.sourceURL}: ${document.metadata.error}`);

164

}

165

});

166

167

watcher.on('snapshot', (snapshot) => {

168

const progress = Math.round((snapshot.completed / snapshot.total) * 100);

169

console.log(`Batch progress: ${progress}% (${snapshot.completed}/${snapshot.total})`);

170

171

if (snapshot.creditsUsed) {

172

console.log(`Credits used so far: ${snapshot.creditsUsed}`);

173

}

174

});

175

176

watcher.on('done', (result) => {

177

console.log(`Batch ${result.status}!`);

178

console.log(`Total processed: ${results.length}`);

179

180

// Process all results

181

const successfulResults = results.filter(doc => !doc.metadata?.error);

182

const failedResults = results.filter(doc => doc.metadata?.error);

183

184

console.log(`Successful: ${successfulResults.length}, Failed: ${failedResults.length}`);

185

186

watcher.close();

187

});

188

189

watcher.on('error', (error) => {

190

console.error(`Batch monitoring error: ${error.error}`);

191

watcher.close();

192

});

193

194

await watcher.start();

195

```

196

197

### Advanced Monitoring with Progress Tracking

198

199

```typescript

200

class CrawlProgressTracker {

201

private startTime: number;

202

private documentTimes: number[] = [];

203

private errors: string[] = [];

204

205

constructor(private watcher: Watcher) {

206

this.startTime = Date.now();

207

this.setupEventHandlers();

208

}

209

210

private setupEventHandlers() {

211

this.watcher.on('document', (document) => {

212

this.documentTimes.push(Date.now());

213

214

if (document.metadata?.error) {

215

this.errors.push(`${document.metadata.sourceURL}: ${document.metadata.error}`);

216

}

217

218

this.logProgress(document);

219

});

220

221

this.watcher.on('snapshot', (snapshot) => {

222

this.logSnapshot(snapshot);

223

});

224

225

this.watcher.on('done', (result) => {

226

this.logFinalStats(result);

227

});

228

}

229

230

private logProgress(document: Document) {

231

const elapsed = Date.now() - this.startTime;

232

const rate = this.documentTimes.length / (elapsed / 1000);

233

234

console.log(`Document ${this.documentTimes.length}: ${document.metadata?.sourceURL}`);

235

console.log(`Current rate: ${rate.toFixed(2)} docs/sec`);

236

}

237

238

private logSnapshot(snapshot: CrawlJob | BatchScrapeJob) {

239

const elapsed = Date.now() - this.startTime;

240

const progress = (snapshot.completed / snapshot.total) * 100;

241

const eta = snapshot.completed > 0

242

? ((snapshot.total - snapshot.completed) * elapsed / snapshot.completed) / 1000

243

: 0;

244

245

console.log(`\n--- Progress Update ---`);

246

console.log(`Status: ${snapshot.status}`);

247

console.log(`Progress: ${snapshot.completed}/${snapshot.total} (${progress.toFixed(1)}%)`);

248

console.log(`Elapsed: ${(elapsed / 1000).toFixed(0)}s`);

249

console.log(`ETA: ${eta.toFixed(0)}s`);

250

console.log(`Credits: ${snapshot.creditsUsed || 0}`);

251

console.log(`Errors: ${this.errors.length}`);

252

console.log(`-----------------------\n`);

253

}

254

255

private logFinalStats(result: JobCompletionEvent) {

256

const totalTime = Date.now() - this.startTime;

257

const avgRate = result.data.length / (totalTime / 1000);

258

259

console.log(`\n=== Final Statistics ===`);

260

console.log(`Status: ${result.status}`);

261

console.log(`Total documents: ${result.data.length}`);

262

console.log(`Total time: ${(totalTime / 1000).toFixed(1)}s`);

263

console.log(`Average rate: ${avgRate.toFixed(2)} docs/sec`);

264

console.log(`Total errors: ${this.errors.length}`);

265

266

if (this.errors.length > 0) {

267

console.log(`\nErrors:`);

268

this.errors.slice(0, 5).forEach(error => console.log(`- ${error}`));

269

if (this.errors.length > 5) {

270

console.log(`... and ${this.errors.length - 5} more`);

271

}

272

}

273

console.log(`========================\n`);

274

}

275

}

276

277

// Usage

278

const crawlResponse = await app.startCrawl('https://docs.example.com', {

279

limit: 500,

280

scrapeOptions: { formats: ['markdown'] }

281

});

282

283

const watcher = app.watcher(crawlResponse.id);

284

const tracker = new CrawlProgressTracker(watcher);

285

286

await watcher.start();

287

```

288

289

### Multiple Job Monitoring

290

291

```typescript

292

class MultiJobMonitor {

293

private watchers: Map<string, Watcher> = new Map();

294

private jobStats = new Map<string, {

295

type: 'crawl' | 'batch';

296

started: number;

297

completed: number;

298

total: number;

299

status: string;

300

}>();

301

302

async addCrawlJob(url: string, options: any) {

303

const response = await app.startCrawl(url, options);

304

this.addWatcher(response.id, 'crawl');

305

return response.id;

306

}

307

308

async addBatchJob(urls: string[], options: any) {

309

const response = await app.startBatchScrape(urls, options);

310

this.addWatcher(response.id, 'batch');

311

return response.id;

312

}

313

314

private addWatcher(jobId: string, type: 'crawl' | 'batch') {

315

const watcher = app.watcher(jobId, { kind: type });

316

317

this.jobStats.set(jobId, {

318

type,

319

started: Date.now(),

320

completed: 0,

321

total: 0,

322

status: 'starting'

323

});

324

325

watcher.on('snapshot', (snapshot) => {

326

const stats = this.jobStats.get(jobId)!;

327

stats.completed = snapshot.completed;

328

stats.total = snapshot.total;

329

stats.status = snapshot.status;

330

331

this.logAllJobs();

332

});

333

334

watcher.on('done', (result) => {

335

console.log(`Job ${jobId} ${result.status}`);

336

this.watchers.delete(jobId);

337

338

if (this.watchers.size === 0) {

339

console.log('All jobs completed!');

340

}

341

});

342

343

watcher.on('error', (error) => {

344

console.error(`Job ${jobId} error: ${error.error}`);

345

this.watchers.delete(jobId);

346

});

347

348

this.watchers.set(jobId, watcher);

349

watcher.start();

350

}

351

352

private logAllJobs() {

353

console.clear();

354

console.log('=== Multi-Job Monitor ===');

355

356

for (const [jobId, stats] of this.jobStats) {

357

const elapsed = (Date.now() - stats.started) / 1000;

358

const progress = stats.total > 0 ? (stats.completed / stats.total * 100) : 0;

359

360

console.log(`${jobId.substring(0, 8)}... (${stats.type}): ${stats.status}`);

361

console.log(` Progress: ${stats.completed}/${stats.total} (${progress.toFixed(1)}%)`);

362

console.log(` Elapsed: ${elapsed.toFixed(0)}s`);

363

console.log('');

364

}

365

}

366

367

closeAll() {

368

for (const watcher of this.watchers.values()) {

369

watcher.close();

370

}

371

this.watchers.clear();

372

}

373

}

374

375

// Usage

376

const monitor = new MultiJobMonitor();

377

378

// Start multiple jobs concurrently

379

await Promise.all([

380

monitor.addCrawlJob('https://site1.example.com', { limit: 100 }),

381

monitor.addCrawlJob('https://site2.example.com', { limit: 150 }),

382

monitor.addBatchJob([

383

'https://api.example.com/data1',

384

'https://api.example.com/data2'

385

], { options: { formats: ['json'] } })

386

]);

387

388

// Jobs will be monitored automatically

389

// Call monitor.closeAll() when done

390

```

391

392

### Error Recovery and Retry Monitoring

393

394

```typescript

395

class RobustCrawlMonitor {

396

private maxRetries = 3;

397

private retryCount = 0;

398

399

async startMonitoredCrawl(url: string, options: any) {

400

while (this.retryCount < this.maxRetries) {

401

try {

402

const response = await app.startCrawl(url, options);

403

return await this.monitorWithRetry(response.id);

404

} catch (error) {

405

this.retryCount++;

406

console.log(`Attempt ${this.retryCount} failed:`, error);

407

408

if (this.retryCount >= this.maxRetries) {

409

throw new Error(`Failed after ${this.maxRetries} attempts`);

410

}

411

412

// Wait before retry

413

await new Promise(resolve => setTimeout(resolve, 5000 * this.retryCount));

414

}

415

}

416

}

417

418

private async monitorWithRetry(jobId: string): Promise<Document[]> {

419

return new Promise((resolve, reject) => {

420

const watcher = app.watcher(jobId, {

421

timeout: 300,

422

pollInterval: 2

423

});

424

425

const documents: Document[] = [];

426

let lastSnapshot: CrawlJob | null = null;

427

428

watcher.on('document', (document) => {

429

documents.push(document);

430

});

431

432

watcher.on('snapshot', (snapshot) => {

433

lastSnapshot = snapshot as CrawlJob;

434

console.log(`Progress: ${snapshot.completed}/${snapshot.total}`);

435

});

436

437

watcher.on('done', (result) => {

438

if (result.status === 'completed') {

439

resolve(documents);

440

} else {

441

reject(new Error(`Job ${result.status}: ${JSON.stringify(result)}`));

442

}

443

watcher.close();

444

});

445

446

watcher.on('error', (error) => {

447

// Check if we got partial results

448

if (documents.length > 0) {

449

console.log(`Partial success: got ${documents.length} documents before error`);

450

resolve(documents);

451

} else {

452

reject(new Error(error.error));

453

}

454

watcher.close();

455

});

456

457

watcher.start().catch(reject);

458

});

459

}

460

}

461

462

// Usage

463

const monitor = new RobustCrawlMonitor();

464

465

try {

466

const documents = await monitor.startMonitoredCrawl('https://example.com', {

467

limit: 200,

468

scrapeOptions: { formats: ['markdown'] }

469

});

470

471

console.log(`Successfully crawled ${documents.length} documents`);

472

} catch (error) {

473

console.error('Crawl failed completely:', error);

474

}

475

```