Tessl Tile for npm/@mendable/firecrawl-js@4.3.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

batch.md crawling.md extraction.md index.md mapping.md monitoring.md scraping.md search.md usage.md v1-api.md

batch.mddocs/

0
# Batch Operations
1

2
Concurrent processing of multiple URLs with job monitoring, error handling, and webhook support.
3

4
## Core Batch Methods
5

6
```typescript { .api }
7
/**
8
 * Start a batch scrape job for multiple URLs
9
 * @param urls - Array of URLs to scrape
10
 * @param opts - Batch scraping options
11
 * @returns Promise resolving to job ID and invalid URLs
12
 */
13
startBatchScrape(urls: string[], opts?: BatchScrapeOptions): Promise<BatchScrapeResponse>;
14

15
/**
16
 * Get batch scrape job status and partial data
17
 * @param jobId - Batch job identifier
18
 * @param pagination - Pagination configuration for results
19
 * @returns Promise resolving to job status and data
20
 */
21
getBatchScrapeStatus(jobId: string, pagination?: PaginationConfig): Promise<BatchScrapeJob>;
22

23
/**
24
 * Retrieve batch scrape errors and robots.txt blocks
25
 * @param jobId - Batch job identifier
26
 * @returns Promise resolving to error details
27
 */
28
getBatchScrapeErrors(jobId: string): Promise<CrawlErrorsResponse>;
29

30
/**
31
 * Cancel a running batch scrape job
32
 * @param jobId - Batch job identifier
33
 * @returns Promise resolving to true if cancelled
34
 */
35
cancelBatchScrape(jobId: string): Promise<boolean>;
36

37
/**
38
 * Convenience waiter: start batch scrape and poll until completion
39
 * @param urls - Array of URLs to scrape
40
 * @param opts - Batch options plus waiter controls
41
 * @returns Promise resolving to final job snapshot
42
 */
43
batchScrape(urls: string[], opts?: BatchScrapeOptions & { pollInterval?: number; timeout?: number }): Promise<BatchScrapeJob>;
44
```
45

46
## Batch Configuration
47

48
```typescript { .api }
49
interface BatchScrapeOptions {
50
  // Scraping configuration for all URLs
51
  options?: ScrapeOptions;
52
  
53
  // Webhook notifications
54
  webhook?: string | WebhookConfig;
55
  
56
  // Job management
57
  appendToId?: string;
58
  ignoreInvalidURLs?: boolean;
59
  maxConcurrency?: number;
60
  
61
  // Privacy
62
  zeroDataRetention?: boolean;
63
  
64
  // Idempotency
65
  idempotencyKey?: string;
66
  
67
  // Integration tracking
68
  integration?: string;
69
}
70
```
71

72
## Response Types
73

74
```typescript { .api }
75
// Batch job initiation response
76
interface BatchScrapeResponse {
77
  id: string;
78
  url: string;
79
  invalidURLs?: string[];
80
}
81

82
// Batch job status and data
83
interface BatchScrapeJob {
84
  status: "scraping" | "completed" | "failed" | "cancelled";
85
  completed: number;
86
  total: number;
87
  creditsUsed?: number;
88
  expiresAt?: string;
89
  next?: string | null;
90
  data: Document[];
91
}
92
```
93

94
## Usage Examples
95

96
### Basic Batch Scraping
97

98
```typescript
99
const urls = [
100
  'https://example.com/page1',
101
  'https://example.com/page2',
102
  'https://example.com/page3',
103
  'https://example.com/page4'
104
];
105

106
const batchJob = await app.batchScrape(urls, {
107
  options: {
108
    formats: ['markdown', 'links']
109
  },
110
  maxConcurrency: 3
111
});
112

113
console.log(`Scraped ${batchJob.completed} of ${batchJob.total} URLs`);
114
console.log(batchJob.data); // Array of scraped documents
115
```
116

117
### Async Batch with Status Monitoring
118

119
```typescript
120
const urls = [
121
  'https://news.example.com/article1',
122
  'https://news.example.com/article2',
123
  'https://news.example.com/article3'
124
];
125

126
// Start batch job
127
const batchResponse = await app.startBatchScrape(urls, {
128
  options: {
129
    formats: ['markdown', {
130
      type: 'json',
131
      schema: {
132
        type: 'object',
133
        properties: {
134
          title: { type: 'string' },
135
          author: { type: 'string' },
136
          publishDate: { type: 'string' },
137
          content: { type: 'string' }
138
        }
139
      }
140
    }]
141
  },
142
  maxConcurrency: 2
143
});
144

145
console.log(`Started batch job: ${batchResponse.id}`);
146
if (batchResponse.invalidURLs?.length) {
147
  console.log('Invalid URLs detected:', batchResponse.invalidURLs);
148
}
149

150
// Monitor progress
151
let job: BatchScrapeJob;
152
do {
153
  await new Promise(resolve => setTimeout(resolve, 3000)); // Wait 3 seconds
154
  job = await app.getBatchScrapeStatus(batchResponse.id);
155
  console.log(`Progress: ${job.completed}/${job.total} - Status: ${job.status}`);
156
} while (job.status === 'scraping');
157

158
console.log('Batch completed!', job.data.length, 'documents scraped');
159
```
160

161
### Structured Data Extraction
162

163
```typescript
164
import { z } from 'zod';
165

166
const ProductSchema = z.object({
167
  name: z.string(),
168
  price: z.number(),
169
  description: z.string(),
170
  availability: z.enum(['in-stock', 'out-of-stock', 'pre-order']),
171
  images: z.array(z.string()),
172
  rating: z.number().optional()
173
});
174

175
const productUrls = [
176
  'https://shop.example.com/products/laptop-1',
177
  'https://shop.example.com/products/laptop-2',
178
  'https://shop.example.com/products/laptop-3'
179
];
180

181
const batchJob = await app.batchScrape(productUrls, {
182
  options: {
183
    formats: [{
184
      type: 'json',
185
      schema: ProductSchema
186
    }],
187
    onlyMainContent: true
188
  },
189
  maxConcurrency: 2
190
});
191

192
// Each document.json will be typed as ProductSchema
193
for (const doc of batchJob.data) {
194
  console.log('Product:', doc.json); // Typed product data
195
}
196
```
197

198
### Batch with Webhooks
199

200
```typescript
201
const urls = [
202
  'https://api.example.com/data1',
203
  'https://api.example.com/data2',
204
  'https://api.example.com/data3'
205
];
206

207
const batchJob = await app.batchScrape(urls, {
208
  options: {
209
    formats: ['json'],
210
    headers: {
211
      'Authorization': 'Bearer api-token'
212
    }
213
  },
214
  webhook: {
215
    url: 'https://myapp.com/webhooks/batch-complete',
216
    headers: {
217
      'X-API-Key': 'webhook-secret'
218
    },
219
    metadata: {
220
      'batchType': 'api-data-sync',
221
      'userId': '12345'
222
    },
223
    events: ['completed', 'failed']
224
  },
225
  maxConcurrency: 5
226
});
227
```
228

229
### Error Handling and Invalid URLs
230

231
```typescript
232
const urls = [
233
  'https://example.com/valid-page',
234
  'https://invalid-domain-xyz.com/page',
235
  'https://example.com/another-valid-page',
236
  'not-a-valid-url'
237
];
238

239
const batchResponse = await app.startBatchScrape(urls, {
240
  options: {
241
    formats: ['markdown']
242
  },
243
  ignoreInvalidURLs: true // Continue processing despite invalid URLs
244
});
245

246
// Check which URLs were invalid
247
if (batchResponse.invalidURLs?.length) {
248
  console.log('Invalid URLs that were skipped:', batchResponse.invalidURLs);
249
}
250

251
// Monitor and handle errors
252
const job = await app.getBatchScrapeStatus(batchResponse.id);
253
if (job.status === 'completed') {
254
  // Get detailed error information
255
  const errors = await app.getBatchScrapeErrors(batchResponse.id);
256
  
257
  if (errors.errors.length > 0) {
258
    console.log('Scraping errors:');
259
    errors.errors.forEach(error => {
260
      console.log(`- ${error.url}: ${error.error} (${error.code})`);
261
    });
262
  }
263
  
264
  if (errors.robotsBlocked.length > 0) {
265
    console.log('URLs blocked by robots.txt:', errors.robotsBlocked);
266
  }
267
}
268
```
269

270
### Idempotent Operations
271

272
```typescript
273
const urls = [
274
  'https://example.com/data1',
275
  'https://example.com/data2'
276
];
277

278
// First request with idempotency key
279
const batchJob1 = await app.startBatchScrape(urls, {
280
  options: { formats: ['markdown'] },
281
  idempotencyKey: 'batch-operation-123'
282
});
283

284
// Duplicate request with same key - will return existing job
285
const batchJob2 = await app.startBatchScrape(urls, {
286
  options: { formats: ['markdown'] },
287
  idempotencyKey: 'batch-operation-123'
288
});
289

290
console.log(batchJob1.id === batchJob2.id); // true
291
```
292

293
### Advanced Batch Configuration
294

295
```typescript
296
const urls = Array.from({ length: 100 }, (_, i) => 
297
  `https://api.example.com/items/${i + 1}`
298
);
299

300
const batchJob = await app.batchScrape(urls, {
301
  options: {
302
    formats: ['json', 'markdown'],
303
    headers: {
304
      'User-Agent': 'MyBot/1.0',
305
      'Accept': 'application/json'
306
    },
307
    timeout: 30000,
308
    mobile: false,
309
    fastMode: true,
310
    proxy: 'basic'
311
  },
312
  maxConcurrency: 10,
313
  zeroDataRetention: true,
314
  ignoreInvalidURLs: true,
315
  webhook: 'https://myapp.com/batch-webhook'
316
});
317
```
318

319
### Pagination with Large Results
320

321
```typescript
322
const urls = Array.from({ length: 1000 }, (_, i) => 
323
  `https://catalog.example.com/item/${i + 1}`
324
);
325

326
// Start large batch job
327
const batchResponse = await app.startBatchScrape(urls, {
328
  options: { formats: ['markdown'] },
329
  maxConcurrency: 20
330
});
331

332
// Get results with pagination
333
const job = await app.getBatchScrapeStatus(batchResponse.id, {
334
  autoPaginate: true,
335
  maxPages: 50,
336
  maxResults: 5000,
337
  maxWaitTime: 600 // 10 minutes
338
});
339

340
console.log(`Retrieved ${job.data.length} documents`);
341
```
342

343
### Canceling Batch Jobs
344

345
```typescript
346
// Start a large batch job
347
const batchResponse = await app.startBatchScrape(urls, {
348
  options: { formats: ['markdown'] }
349
});
350

351
// Cancel if needed
352
setTimeout(async () => {
353
  const cancelled = await app.cancelBatchScrape(batchResponse.id);
354
  if (cancelled) {
355
    console.log('Batch job cancelled successfully');
356
  }
357
}, 30000); // Cancel after 30 seconds
358
```

Version

Tile

Files

batch.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

batch.mddocs/