0
# Legacy V1 API
1
2
Complete v1 API support for backward compatibility with existing implementations. The v1 API provides the original Firecrawl functionality with its own set of methods, types, and patterns.
3
4
## Capabilities
5
6
### V1 Synchronous Client
7
8
Legacy synchronous client for v1 API operations with traditional method signatures and response formats.
9
10
```python { .api }
11
class V1FirecrawlApp:
12
"""Legacy v1 synchronous client"""
13
14
def __init__(self, api_key: str = None, api_url: str = None):
15
"""
16
Initialize v1 client.
17
18
Parameters:
19
- api_key: str, Firecrawl API key
20
- api_url: str, optional custom API URL
21
"""
22
23
def scrape_url(self, url: str, params: Optional[dict] = None) -> dict:
24
"""
25
Scrape a single URL using v1 API.
26
27
Parameters:
28
- url: str, target URL to scrape
29
- params: dict, optional scraping parameters
30
31
Returns:
32
- dict: v1 scrape response format
33
"""
34
35
def crawl_url(self, url: str, params: Optional[dict] = None, poll_interval: int = 2) -> dict:
36
"""
37
Crawl a website using v1 API with automatic polling.
38
39
Parameters:
40
- url: str, target URL to crawl
41
- params: dict, optional crawling parameters
42
- poll_interval: int, polling interval in seconds
43
44
Returns:
45
- dict: v1 crawl response with all results
46
"""
47
48
def async_crawl_url(self, url: str, params: Optional[dict] = None) -> dict:
49
"""
50
Start async crawl job using v1 API.
51
52
Parameters:
53
- url: str, target URL to crawl
54
- params: dict, optional crawling parameters
55
56
Returns:
57
- dict: v1 job response with job ID
58
"""
59
60
def check_crawl_status(self, job_id: str) -> dict:
61
"""
62
Check v1 crawl job status.
63
64
Parameters:
65
- job_id: str, job ID from async_crawl_url
66
67
Returns:
68
- dict: v1 job status response
69
"""
70
71
def check_crawl_errors(self, job_id: str) -> dict:
72
"""
73
Check v1 crawl job errors.
74
75
Parameters:
76
- job_id: str, job ID from async_crawl_url
77
78
Returns:
79
- dict: v1 job error information
80
"""
81
82
def cancel_crawl(self, job_id: str) -> dict:
83
"""
84
Cancel a v1 crawl job.
85
86
Parameters:
87
- job_id: str, job ID to cancel
88
89
Returns:
90
- dict: v1 cancellation response
91
"""
92
93
def crawl_url_and_watch(self, url: str, params: Optional[dict] = None, poll_interval: int = 2) -> dict:
94
"""
95
Crawl URL with real-time progress monitoring.
96
97
Parameters:
98
- url: str, target URL to crawl
99
- params: dict, optional crawling parameters
100
- poll_interval: int, polling interval in seconds
101
102
Returns:
103
- dict: v1 crawl response with progress tracking
104
"""
105
```
106
107
### V1 Asynchronous Client
108
109
Legacy asynchronous client providing async versions of all v1 operations.
110
111
```python { .api }
112
class AsyncV1FirecrawlApp:
113
"""Legacy v1 asynchronous client"""
114
115
def __init__(self, api_key: str = None, api_url: str = None):
116
"""Initialize async v1 client"""
117
118
async def scrape_url(self, url: str, params: Optional[dict] = None) -> dict:
119
"""Async version of v1 scrape_url"""
120
121
async def crawl_url(self, url: str, params: Optional[dict] = None, poll_interval: int = 2) -> dict:
122
"""Async version of v1 crawl_url with polling"""
123
124
async def async_crawl_url(self, url: str, params: Optional[dict] = None) -> dict:
125
"""Async version of v1 async_crawl_url"""
126
127
async def check_crawl_status(self, job_id: str) -> dict:
128
"""Async version of v1 check_crawl_status"""
129
```
130
131
### V1 Extended Operations
132
133
Additional v1 operations for mapping, extraction, research, and batch processing.
134
135
```python { .api }
136
# V1FirecrawlApp additional methods
137
def map_url(self, url: str) -> dict:
138
"""
139
Generate URL map using v1 API.
140
141
Parameters:
142
- url: str, target website URL
143
144
Returns:
145
- dict: v1 map response
146
"""
147
148
def extract(self, data: dict, schema: dict, prompt: Optional[str] = None) -> dict:
149
"""
150
Extract structured data using v1 API.
151
152
Parameters:
153
- data: dict, input data for extraction
154
- schema: dict, extraction schema
155
- prompt: str, optional extraction prompt
156
157
Returns:
158
- dict: v1 extraction response
159
"""
160
161
def deep_research(self, query: str, max_articles: int = 5) -> dict:
162
"""
163
Perform deep research using v1 API.
164
165
Parameters:
166
- query: str, research query
167
- max_articles: int, maximum articles to analyze
168
169
Returns:
170
- dict: v1 research response
171
"""
172
173
def generate_llms_text(self, data: dict, prompt: str) -> dict:
174
"""
175
Generate text using LLM via v1 API.
176
177
Parameters:
178
- data: dict, input data
179
- prompt: str, generation prompt
180
181
Returns:
182
- dict: v1 text generation response
183
"""
184
185
def batch_scrape_urls(self, urls: List[str], params: Optional[dict] = None) -> dict:
186
"""
187
Batch scrape URLs using v1 API.
188
189
Parameters:
190
- urls: List[str], list of URLs to scrape
191
- params: dict, optional scraping parameters
192
193
Returns:
194
- dict: v1 batch scrape response
195
"""
196
197
def async_batch_scrape_urls(self, urls: List[str], params: Optional[dict] = None) -> dict:
198
"""
199
Start async batch scrape using v1 API.
200
201
Parameters:
202
- urls: List[str], list of URLs to scrape
203
- params: dict, optional scraping parameters
204
205
Returns:
206
- dict: v1 batch job response
207
"""
208
209
def batch_scrape_urls_and_watch(self, urls: List[str], params: Optional[dict] = None, poll_interval: int = 2) -> dict:
210
"""
211
Batch scrape URLs with real-time progress monitoring.
212
213
Parameters:
214
- urls: List[str], list of URLs to scrape
215
- params: dict, optional scraping parameters
216
- poll_interval: int, polling interval in seconds
217
218
Returns:
219
- dict: v1 batch scrape response with progress tracking
220
"""
221
222
def check_batch_scrape_status(self, job_id: str) -> dict:
223
"""
224
Check v1 batch scrape job status.
225
226
Parameters:
227
- job_id: str, job ID from async_batch_scrape_urls
228
229
Returns:
230
- dict: v1 batch job status response
231
"""
232
233
def check_batch_scrape_errors(self, job_id: str) -> dict:
234
"""
235
Check v1 batch scrape job errors.
236
237
Parameters:
238
- job_id: str, job ID from async_batch_scrape_urls
239
240
Returns:
241
- dict: v1 batch job error information
242
"""
243
```
244
245
## Usage Examples
246
247
### Basic V1 Operations
248
249
```python
250
from firecrawl import V1FirecrawlApp
251
252
# Initialize v1 client
253
app = V1FirecrawlApp(api_key="your-api-key")
254
255
# V1 scraping
256
result = app.scrape_url("https://example.com", {
257
"pageOptions": {
258
"includeHtml": True,
259
"includeMarkdown": True,
260
"includeRawHtml": False,
261
"screenshot": False
262
}
263
})
264
265
print(f"Content: {result['data']['content']}")
266
print(f"HTML: {result['data']['html']}")
267
268
# V1 crawling with polling
269
crawl_result = app.crawl_url("https://example.com", {
270
"crawlerOptions": {
271
"maxDepth": 2,
272
"limit": 50
273
},
274
"pageOptions": {
275
"includeMarkdown": True
276
}
277
})
278
279
print(f"Crawled {len(crawl_result['data'])} pages")
280
```
281
282
### V1 Async Operations
283
284
```python
285
from firecrawl import V1FirecrawlApp
286
import time
287
288
app = V1FirecrawlApp(api_key="your-api-key")
289
290
# Start async crawl
291
job_response = app.async_crawl_url("https://example.com", {
292
"crawlerOptions": {"limit": 100}
293
})
294
295
job_id = job_response['jobId']
296
print(f"Started job: {job_id}")
297
298
# Poll for completion
299
while True:
300
status = app.check_crawl_status(job_id)
301
print(f"Status: {status['status']}")
302
303
if status['status'] == 'completed':
304
print(f"Crawl completed with {len(status['data'])} pages")
305
break
306
elif status['status'] == 'failed':
307
print("Crawl failed")
308
break
309
310
time.sleep(5)
311
```
312
313
### V1 Data Extraction
314
315
```python
316
from firecrawl import V1FirecrawlApp
317
318
app = V1FirecrawlApp(api_key="your-api-key")
319
320
# First scrape the page
321
scrape_result = app.scrape_url("https://store.example.com/product/123")
322
323
# Extract structured data
324
extraction_schema = {
325
"type": "object",
326
"properties": {
327
"product_name": {"type": "string"},
328
"price": {"type": "number"},
329
"availability": {"type": "string"},
330
"features": {
331
"type": "array",
332
"items": {"type": "string"}
333
}
334
}
335
}
336
337
extracted = app.extract(
338
data=scrape_result['data'],
339
schema=extraction_schema,
340
prompt="Extract product information from this e-commerce page"
341
)
342
343
print(f"Product: {extracted['data']['product_name']}")
344
print(f"Price: ${extracted['data']['price']}")
345
```
346
347
### V1 Batch Operations
348
349
```python
350
from firecrawl import V1FirecrawlApp
351
352
app = V1FirecrawlApp(api_key="your-api-key")
353
354
# Batch scrape multiple URLs
355
urls = [
356
"https://example1.com",
357
"https://example2.com",
358
"https://example3.com"
359
]
360
361
batch_result = app.batch_scrape_urls(urls, {
362
"pageOptions": {
363
"includeMarkdown": True,
364
"includeHtml": False
365
}
366
})
367
368
for item in batch_result['data']:
369
print(f"URL: {item['metadata']['sourceURL']}")
370
print(f"Title: {item['metadata'].get('title', 'No title')}")
371
print(f"Content length: {len(item['content'])}")
372
print("---")
373
```
374
375
### V1 Research Operations
376
377
```python
378
from firecrawl import V1FirecrawlApp
379
380
app = V1FirecrawlApp(api_key="your-api-key")
381
382
# Deep research on a topic
383
research_result = app.deep_research(
384
query="latest developments in artificial intelligence",
385
max_articles=10
386
)
387
388
print(f"Research summary: {research_result['data']['summary']}")
389
print(f"Sources analyzed: {len(research_result['data']['sources'])}")
390
391
for source in research_result['data']['sources']:
392
print(f"- {source['title']}: {source['url']}")
393
394
# Generate text based on data
395
text_result = app.generate_llms_text(
396
data=research_result['data'],
397
prompt="Write a brief executive summary of the AI developments"
398
)
399
400
print(f"Generated summary: {text_result['data']['text']}")
401
```
402
403
## V1 Types and Configuration
404
405
### V1 Configuration Types
406
407
```python { .api }
408
class V1JsonConfig:
409
"""V1 JSON configuration"""
410
include_html: bool
411
include_markdown: bool
412
include_raw_html: bool
413
include_links: bool
414
415
class V1ScrapeOptions:
416
"""V1 scraping options"""
417
formats: List[str] # ["markdown", "html", "rawHtml", "content", "links", "screenshot"]
418
headers: Optional[dict]
419
include_tags: Optional[List[str]]
420
exclude_tags: Optional[List[str]]
421
only_main_content: Optional[bool]
422
wait_for: Optional[int]
423
424
class V1ChangeTrackingOptions:
425
"""V1 change tracking configuration"""
426
include_html: bool
427
xpath: Optional[str]
428
css_selector: Optional[str]
429
430
class V1CrawlOptions:
431
"""V1 crawling options"""
432
includes: Optional[List[str]]
433
excludes: Optional[List[str]]
434
generate_img_alt_text: Optional[bool]
435
return_only_urls: Optional[bool]
436
max_depth: Optional[int]
437
mode: Optional[str] # "fast", "default"
438
ignore_sitemap: Optional[bool]
439
limit: Optional[int]
440
allow_backward_crawling: Optional[bool]
441
allow_external_content_links: Optional[bool]
442
443
class V1ExtractOptions:
444
"""V1 extraction options"""
445
mode: Optional[str] # "llm-extraction", "llm-extraction-from-raw-html"
446
extraction_prompt: Optional[str]
447
extraction_schema: Optional[dict]
448
```
449
450
### V1 Response Types
451
452
```python { .api }
453
class V1Document:
454
"""V1 document structure"""
455
content: str
456
html: Optional[str]
457
raw_html: Optional[str]
458
markdown: Optional[str]
459
metadata: V1DocumentMetadata
460
461
class V1DocumentMetadata:
462
"""V1 document metadata"""
463
title: Optional[str]
464
description: Optional[str]
465
language: Optional[str]
466
source_url: str
467
page_status_code: Optional[int]
468
page_error: Optional[str]
469
470
class V1ScrapeResponse:
471
"""V1 scrape response"""
472
success: bool
473
data: V1Document
474
475
class V1CrawlResponse:
476
"""V1 crawl response"""
477
success: bool
478
data: List[V1Document]
479
480
class V1CrawlJobStatus:
481
"""V1 crawl job status"""
482
status: str # "active", "paused", "completed", "failed"
483
job_id: str
484
current: Optional[int]
485
total: Optional[int]
486
data: Optional[List[V1Document]]
487
partial_data: Optional[List[V1Document]]
488
```
489
490
## Unified Client V1 Access
491
492
Access v1 API through the unified client:
493
494
```python
495
from firecrawl import Firecrawl
496
497
# Main client defaults to v2
498
app = Firecrawl(api_key="your-api-key")
499
500
# Access v1 API via .v1 property
501
v1_result = app.v1.scrape_url("https://example.com")
502
v1_crawl = app.v1.crawl_url("https://example.com", {"crawlerOptions": {"limit": 10}})
503
504
# V1 methods are available on the .v1 proxy
505
print(f"V1 scrape result: {v1_result['data']['content']}")
506
```
507
508
## Migration from V1 to V2
509
510
### Key Differences
511
512
```python
513
# V1 style
514
from firecrawl import V1FirecrawlApp
515
app = V1FirecrawlApp(api_key="key")
516
result = app.scrape_url("https://example.com", {
517
"pageOptions": {"includeMarkdown": True}
518
})
519
520
# V2 style
521
from firecrawl import Firecrawl, ScrapeOptions
522
app = Firecrawl(api_key="key")
523
result = app.scrape("https://example.com", ScrapeOptions(formats=["markdown"]))
524
525
# Unified client (recommended)
526
from firecrawl import Firecrawl
527
app = Firecrawl(api_key="key")
528
529
# Use v2 by default
530
v2_result = app.scrape("https://example.com")
531
532
# Use v1 when needed
533
v1_result = app.v1.scrape_url("https://example.com")
534
```
535
536
## Async V1 Usage
537
538
```python
539
import asyncio
540
from firecrawl import AsyncV1FirecrawlApp
541
542
async def v1_async_example():
543
app = AsyncV1FirecrawlApp(api_key="your-api-key")
544
545
# Async v1 scraping
546
result = await app.scrape_url("https://example.com")
547
548
# Async v1 crawling
549
crawl_result = await app.crawl_url("https://example.com", {
550
"crawlerOptions": {"limit": 50}
551
})
552
553
# Async v1 batch scraping
554
batch_result = await app.batch_scrape_urls([
555
"https://example1.com",
556
"https://example2.com"
557
])
558
559
asyncio.run(v1_async_example())
560
```