Tessl Tile for pypi/firecrawl-py@4.3.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

batch.md crawling.md extraction.md index.md monitoring.md scraping.md usage.md v1-api.md

v1-api.mddocs/

0
# Legacy V1 API
1

2
Complete v1 API support for backward compatibility with existing implementations. The v1 API provides the original Firecrawl functionality with its own set of methods, types, and patterns.
3

4
## Capabilities
5

6
### V1 Synchronous Client
7

8
Legacy synchronous client for v1 API operations with traditional method signatures and response formats.
9

10
```python { .api }
11
class V1FirecrawlApp:
12
    """Legacy v1 synchronous client"""
13
    
14
    def __init__(self, api_key: str = None, api_url: str = None):
15
        """
16
        Initialize v1 client.
17
        
18
        Parameters:
19
        - api_key: str, Firecrawl API key
20
        - api_url: str, optional custom API URL
21
        """
22
    
23
    def scrape_url(self, url: str, params: Optional[dict] = None) -> dict:
24
        """
25
        Scrape a single URL using v1 API.
26
        
27
        Parameters:
28
        - url: str, target URL to scrape
29
        - params: dict, optional scraping parameters
30
        
31
        Returns:
32
        - dict: v1 scrape response format
33
        """
34
    
35
    def crawl_url(self, url: str, params: Optional[dict] = None, poll_interval: int = 2) -> dict:
36
        """
37
        Crawl a website using v1 API with automatic polling.
38
        
39
        Parameters:
40
        - url: str, target URL to crawl
41
        - params: dict, optional crawling parameters  
42
        - poll_interval: int, polling interval in seconds
43
        
44
        Returns:
45
        - dict: v1 crawl response with all results
46
        """
47
    
48
    def async_crawl_url(self, url: str, params: Optional[dict] = None) -> dict:
49
        """
50
        Start async crawl job using v1 API.
51
        
52
        Parameters:
53
        - url: str, target URL to crawl
54
        - params: dict, optional crawling parameters
55
        
56
        Returns:
57
        - dict: v1 job response with job ID
58
        """
59
    
60
    def check_crawl_status(self, job_id: str) -> dict:
61
        """
62
        Check v1 crawl job status.
63
        
64
        Parameters:
65
        - job_id: str, job ID from async_crawl_url
66
        
67
        Returns:
68
        - dict: v1 job status response
69
        """
70
    
71
    def check_crawl_errors(self, job_id: str) -> dict:
72
        """
73
        Check v1 crawl job errors.
74
        
75
        Parameters:
76
        - job_id: str, job ID from async_crawl_url
77
        
78
        Returns:
79
        - dict: v1 job error information
80
        """
81
    
82
    def cancel_crawl(self, job_id: str) -> dict:
83
        """
84
        Cancel a v1 crawl job.
85
        
86
        Parameters:
87
        - job_id: str, job ID to cancel
88
        
89
        Returns:
90
        - dict: v1 cancellation response
91
        """
92
    
93
    def crawl_url_and_watch(self, url: str, params: Optional[dict] = None, poll_interval: int = 2) -> dict:
94
        """
95
        Crawl URL with real-time progress monitoring.
96
        
97
        Parameters:
98
        - url: str, target URL to crawl
99
        - params: dict, optional crawling parameters
100
        - poll_interval: int, polling interval in seconds
101
        
102
        Returns:
103
        - dict: v1 crawl response with progress tracking
104
        """
105
```
106

107
### V1 Asynchronous Client
108

109
Legacy asynchronous client providing async versions of all v1 operations.
110

111
```python { .api }
112
class AsyncV1FirecrawlApp:
113
    """Legacy v1 asynchronous client"""
114
    
115
    def __init__(self, api_key: str = None, api_url: str = None):
116
        """Initialize async v1 client"""
117
    
118
    async def scrape_url(self, url: str, params: Optional[dict] = None) -> dict:
119
        """Async version of v1 scrape_url"""
120
    
121
    async def crawl_url(self, url: str, params: Optional[dict] = None, poll_interval: int = 2) -> dict:
122
        """Async version of v1 crawl_url with polling"""
123
    
124
    async def async_crawl_url(self, url: str, params: Optional[dict] = None) -> dict:
125
        """Async version of v1 async_crawl_url"""
126
    
127
    async def check_crawl_status(self, job_id: str) -> dict:
128
        """Async version of v1 check_crawl_status"""
129
```
130

131
### V1 Extended Operations
132

133
Additional v1 operations for mapping, extraction, research, and batch processing.
134

135
```python { .api }
136
# V1FirecrawlApp additional methods
137
def map_url(self, url: str) -> dict:
138
    """
139
    Generate URL map using v1 API.
140
    
141
    Parameters:
142
    - url: str, target website URL
143
    
144
    Returns:
145
    - dict: v1 map response
146
    """
147

148
def extract(self, data: dict, schema: dict, prompt: Optional[str] = None) -> dict:
149
    """
150
    Extract structured data using v1 API.
151
    
152
    Parameters:
153
    - data: dict, input data for extraction
154
    - schema: dict, extraction schema
155
    - prompt: str, optional extraction prompt
156
    
157
    Returns:
158
    - dict: v1 extraction response
159
    """
160

161
def deep_research(self, query: str, max_articles: int = 5) -> dict:
162
    """
163
    Perform deep research using v1 API.
164
    
165
    Parameters:
166
    - query: str, research query
167
    - max_articles: int, maximum articles to analyze
168
    
169
    Returns:
170
    - dict: v1 research response
171
    """
172

173
def generate_llms_text(self, data: dict, prompt: str) -> dict:
174
    """
175
    Generate text using LLM via v1 API.
176
    
177
    Parameters:
178
    - data: dict, input data
179
    - prompt: str, generation prompt
180
    
181
    Returns:
182
    - dict: v1 text generation response
183
    """
184

185
def batch_scrape_urls(self, urls: List[str], params: Optional[dict] = None) -> dict:
186
    """
187
    Batch scrape URLs using v1 API.
188
    
189
    Parameters:
190
    - urls: List[str], list of URLs to scrape
191
    - params: dict, optional scraping parameters
192
    
193
    Returns:
194
    - dict: v1 batch scrape response
195
    """
196

197
def async_batch_scrape_urls(self, urls: List[str], params: Optional[dict] = None) -> dict:
198
    """
199
    Start async batch scrape using v1 API.
200
    
201
    Parameters:
202
    - urls: List[str], list of URLs to scrape
203
    - params: dict, optional scraping parameters
204
    
205
    Returns:
206
    - dict: v1 batch job response
207
    """
208

209
def batch_scrape_urls_and_watch(self, urls: List[str], params: Optional[dict] = None, poll_interval: int = 2) -> dict:
210
    """
211
    Batch scrape URLs with real-time progress monitoring.
212
    
213
    Parameters:
214
    - urls: List[str], list of URLs to scrape
215
    - params: dict, optional scraping parameters
216
    - poll_interval: int, polling interval in seconds
217
    
218
    Returns:
219
    - dict: v1 batch scrape response with progress tracking
220
    """
221

222
def check_batch_scrape_status(self, job_id: str) -> dict:
223
    """
224
    Check v1 batch scrape job status.
225
    
226
    Parameters:
227
    - job_id: str, job ID from async_batch_scrape_urls
228
    
229
    Returns:
230
    - dict: v1 batch job status response
231
    """
232

233
def check_batch_scrape_errors(self, job_id: str) -> dict:
234
    """
235
    Check v1 batch scrape job errors.
236
    
237
    Parameters:
238
    - job_id: str, job ID from async_batch_scrape_urls
239
    
240
    Returns:
241
    - dict: v1 batch job error information
242
    """
243
```
244

245
## Usage Examples
246

247
### Basic V1 Operations
248

249
```python
250
from firecrawl import V1FirecrawlApp
251

252
# Initialize v1 client
253
app = V1FirecrawlApp(api_key="your-api-key")
254

255
# V1 scraping
256
result = app.scrape_url("https://example.com", {
257
    "pageOptions": {
258
        "includeHtml": True,
259
        "includeMarkdown": True,
260
        "includeRawHtml": False,
261
        "screenshot": False
262
    }
263
})
264

265
print(f"Content: {result['data']['content']}")
266
print(f"HTML: {result['data']['html']}")
267

268
# V1 crawling with polling
269
crawl_result = app.crawl_url("https://example.com", {
270
    "crawlerOptions": {
271
        "maxDepth": 2,
272
        "limit": 50
273
    },
274
    "pageOptions": {
275
        "includeMarkdown": True
276
    }
277
})
278

279
print(f"Crawled {len(crawl_result['data'])} pages")
280
```
281

282
### V1 Async Operations
283

284
```python
285
from firecrawl import V1FirecrawlApp
286
import time
287

288
app = V1FirecrawlApp(api_key="your-api-key")
289

290
# Start async crawl
291
job_response = app.async_crawl_url("https://example.com", {
292
    "crawlerOptions": {"limit": 100}
293
})
294

295
job_id = job_response['jobId']
296
print(f"Started job: {job_id}")
297

298
# Poll for completion
299
while True:
300
    status = app.check_crawl_status(job_id)
301
    print(f"Status: {status['status']}")
302
    
303
    if status['status'] == 'completed':
304
        print(f"Crawl completed with {len(status['data'])} pages")
305
        break
306
    elif status['status'] == 'failed':
307
        print("Crawl failed")
308
        break
309
    
310
    time.sleep(5)
311
```
312

313
### V1 Data Extraction
314

315
```python
316
from firecrawl import V1FirecrawlApp
317

318
app = V1FirecrawlApp(api_key="your-api-key")
319

320
# First scrape the page
321
scrape_result = app.scrape_url("https://store.example.com/product/123")
322

323
# Extract structured data
324
extraction_schema = {
325
    "type": "object",
326
    "properties": {
327
        "product_name": {"type": "string"},
328
        "price": {"type": "number"},
329
        "availability": {"type": "string"},
330
        "features": {
331
            "type": "array",
332
            "items": {"type": "string"}
333
        }
334
    }
335
}
336

337
extracted = app.extract(
338
    data=scrape_result['data'],
339
    schema=extraction_schema,
340
    prompt="Extract product information from this e-commerce page"
341
)
342

343
print(f"Product: {extracted['data']['product_name']}")
344
print(f"Price: ${extracted['data']['price']}")
345
```
346

347
### V1 Batch Operations
348

349
```python
350
from firecrawl import V1FirecrawlApp
351

352
app = V1FirecrawlApp(api_key="your-api-key")
353

354
# Batch scrape multiple URLs
355
urls = [
356
    "https://example1.com",
357
    "https://example2.com", 
358
    "https://example3.com"
359
]
360

361
batch_result = app.batch_scrape_urls(urls, {
362
    "pageOptions": {
363
        "includeMarkdown": True,
364
        "includeHtml": False
365
    }
366
})
367

368
for item in batch_result['data']:
369
    print(f"URL: {item['metadata']['sourceURL']}")
370
    print(f"Title: {item['metadata'].get('title', 'No title')}")
371
    print(f"Content length: {len(item['content'])}")
372
    print("---")
373
```
374

375
### V1 Research Operations
376

377
```python
378
from firecrawl import V1FirecrawlApp
379

380
app = V1FirecrawlApp(api_key="your-api-key")
381

382
# Deep research on a topic
383
research_result = app.deep_research(
384
    query="latest developments in artificial intelligence",
385
    max_articles=10
386
)
387

388
print(f"Research summary: {research_result['data']['summary']}")
389
print(f"Sources analyzed: {len(research_result['data']['sources'])}")
390

391
for source in research_result['data']['sources']:
392
    print(f"- {source['title']}: {source['url']}")
393

394
# Generate text based on data
395
text_result = app.generate_llms_text(
396
    data=research_result['data'],
397
    prompt="Write a brief executive summary of the AI developments"
398
)
399

400
print(f"Generated summary: {text_result['data']['text']}")
401
```
402

403
## V1 Types and Configuration
404

405
### V1 Configuration Types
406

407
```python { .api }
408
class V1JsonConfig:
409
    """V1 JSON configuration"""
410
    include_html: bool
411
    include_markdown: bool  
412
    include_raw_html: bool
413
    include_links: bool
414

415
class V1ScrapeOptions:
416
    """V1 scraping options"""
417
    formats: List[str]  # ["markdown", "html", "rawHtml", "content", "links", "screenshot"]
418
    headers: Optional[dict]
419
    include_tags: Optional[List[str]]
420
    exclude_tags: Optional[List[str]]
421
    only_main_content: Optional[bool]
422
    wait_for: Optional[int]
423
    
424
class V1ChangeTrackingOptions:
425
    """V1 change tracking configuration"""
426
    include_html: bool
427
    xpath: Optional[str]
428
    css_selector: Optional[str]
429
    
430
class V1CrawlOptions:
431
    """V1 crawling options"""
432
    includes: Optional[List[str]]
433
    excludes: Optional[List[str]]
434
    generate_img_alt_text: Optional[bool]
435
    return_only_urls: Optional[bool]
436
    max_depth: Optional[int]
437
    mode: Optional[str]  # "fast", "default"
438
    ignore_sitemap: Optional[bool]
439
    limit: Optional[int]
440
    allow_backward_crawling: Optional[bool]
441
    allow_external_content_links: Optional[bool]
442

443
class V1ExtractOptions:
444
    """V1 extraction options"""
445
    mode: Optional[str]  # "llm-extraction", "llm-extraction-from-raw-html"
446
    extraction_prompt: Optional[str]
447
    extraction_schema: Optional[dict]
448
```
449

450
### V1 Response Types
451

452
```python { .api }
453
class V1Document:
454
    """V1 document structure"""
455
    content: str
456
    html: Optional[str]
457
    raw_html: Optional[str]
458
    markdown: Optional[str]
459
    metadata: V1DocumentMetadata
460
    
461
class V1DocumentMetadata:
462
    """V1 document metadata"""
463
    title: Optional[str]
464
    description: Optional[str]
465
    language: Optional[str]
466
    source_url: str
467
    page_status_code: Optional[int]
468
    page_error: Optional[str]
469
    
470
class V1ScrapeResponse:
471
    """V1 scrape response"""
472
    success: bool
473
    data: V1Document
474
    
475
class V1CrawlResponse:
476
    """V1 crawl response"""
477
    success: bool
478
    data: List[V1Document]
479
    
480
class V1CrawlJobStatus:
481
    """V1 crawl job status"""
482
    status: str  # "active", "paused", "completed", "failed"
483
    job_id: str
484
    current: Optional[int]
485
    total: Optional[int]
486
    data: Optional[List[V1Document]]
487
    partial_data: Optional[List[V1Document]]
488
```
489

490
## Unified Client V1 Access
491

492
Access v1 API through the unified client:
493

494
```python
495
from firecrawl import Firecrawl
496

497
# Main client defaults to v2
498
app = Firecrawl(api_key="your-api-key")
499

500
# Access v1 API via .v1 property
501
v1_result = app.v1.scrape_url("https://example.com")
502
v1_crawl = app.v1.crawl_url("https://example.com", {"crawlerOptions": {"limit": 10}})
503

504
# V1 methods are available on the .v1 proxy
505
print(f"V1 scrape result: {v1_result['data']['content']}")
506
```
507

508
## Migration from V1 to V2
509

510
### Key Differences
511

512
```python
513
# V1 style
514
from firecrawl import V1FirecrawlApp
515
app = V1FirecrawlApp(api_key="key")
516
result = app.scrape_url("https://example.com", {
517
    "pageOptions": {"includeMarkdown": True}
518
})
519

520
# V2 style  
521
from firecrawl import Firecrawl, ScrapeOptions
522
app = Firecrawl(api_key="key")
523
result = app.scrape("https://example.com", ScrapeOptions(formats=["markdown"]))
524

525
# Unified client (recommended)
526
from firecrawl import Firecrawl
527
app = Firecrawl(api_key="key")
528

529
# Use v2 by default
530
v2_result = app.scrape("https://example.com")
531

532
# Use v1 when needed
533
v1_result = app.v1.scrape_url("https://example.com")
534
```
535

536
## Async V1 Usage
537

538
```python
539
import asyncio
540
from firecrawl import AsyncV1FirecrawlApp
541

542
async def v1_async_example():
543
    app = AsyncV1FirecrawlApp(api_key="your-api-key")
544
    
545
    # Async v1 scraping
546
    result = await app.scrape_url("https://example.com")
547
    
548
    # Async v1 crawling
549
    crawl_result = await app.crawl_url("https://example.com", {
550
        "crawlerOptions": {"limit": 50}
551
    })
552
    
553
    # Async v1 batch scraping
554
    batch_result = await app.batch_scrape_urls([
555
        "https://example1.com",
556
        "https://example2.com"
557
    ])
558

559
asyncio.run(v1_async_example())
560
```

Version

Tile

Files

v1-api.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

v1-api.mddocs/