Python SDK for Firecrawl API that enables web scraping, crawling, and content extraction with LLM-optimized output formats
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Complete v1 API support for backward compatibility with existing implementations. The v1 API provides the original Firecrawl functionality with its own set of methods, types, and patterns.
Legacy synchronous client for v1 API operations with traditional method signatures and response formats.
class V1FirecrawlApp:
"""Legacy v1 synchronous client"""
def __init__(self, api_key: str = None, api_url: str = None):
"""
Initialize v1 client.
Parameters:
- api_key: str, Firecrawl API key
- api_url: str, optional custom API URL
"""
def scrape_url(self, url: str, params: Optional[dict] = None) -> dict:
"""
Scrape a single URL using v1 API.
Parameters:
- url: str, target URL to scrape
- params: dict, optional scraping parameters
Returns:
- dict: v1 scrape response format
"""
def crawl_url(self, url: str, params: Optional[dict] = None, poll_interval: int = 2) -> dict:
"""
Crawl a website using v1 API with automatic polling.
Parameters:
- url: str, target URL to crawl
- params: dict, optional crawling parameters
- poll_interval: int, polling interval in seconds
Returns:
- dict: v1 crawl response with all results
"""
def async_crawl_url(self, url: str, params: Optional[dict] = None) -> dict:
"""
Start async crawl job using v1 API.
Parameters:
- url: str, target URL to crawl
- params: dict, optional crawling parameters
Returns:
- dict: v1 job response with job ID
"""
def check_crawl_status(self, job_id: str) -> dict:
"""
Check v1 crawl job status.
Parameters:
- job_id: str, job ID from async_crawl_url
Returns:
- dict: v1 job status response
"""
def check_crawl_errors(self, job_id: str) -> dict:
"""
Check v1 crawl job errors.
Parameters:
- job_id: str, job ID from async_crawl_url
Returns:
- dict: v1 job error information
"""
def cancel_crawl(self, job_id: str) -> dict:
"""
Cancel a v1 crawl job.
Parameters:
- job_id: str, job ID to cancel
Returns:
- dict: v1 cancellation response
"""
def crawl_url_and_watch(self, url: str, params: Optional[dict] = None, poll_interval: int = 2) -> dict:
"""
Crawl URL with real-time progress monitoring.
Parameters:
- url: str, target URL to crawl
- params: dict, optional crawling parameters
- poll_interval: int, polling interval in seconds
Returns:
- dict: v1 crawl response with progress tracking
"""Legacy asynchronous client providing async versions of all v1 operations.
class AsyncV1FirecrawlApp:
"""Legacy v1 asynchronous client"""
def __init__(self, api_key: str = None, api_url: str = None):
"""Initialize async v1 client"""
async def scrape_url(self, url: str, params: Optional[dict] = None) -> dict:
"""Async version of v1 scrape_url"""
async def crawl_url(self, url: str, params: Optional[dict] = None, poll_interval: int = 2) -> dict:
"""Async version of v1 crawl_url with polling"""
async def async_crawl_url(self, url: str, params: Optional[dict] = None) -> dict:
"""Async version of v1 async_crawl_url"""
async def check_crawl_status(self, job_id: str) -> dict:
"""Async version of v1 check_crawl_status"""Additional v1 operations for mapping, extraction, research, and batch processing.
# V1FirecrawlApp additional methods
def map_url(self, url: str) -> dict:
"""
Generate URL map using v1 API.
Parameters:
- url: str, target website URL
Returns:
- dict: v1 map response
"""
def extract(self, data: dict, schema: dict, prompt: Optional[str] = None) -> dict:
"""
Extract structured data using v1 API.
Parameters:
- data: dict, input data for extraction
- schema: dict, extraction schema
- prompt: str, optional extraction prompt
Returns:
- dict: v1 extraction response
"""
def deep_research(self, query: str, max_articles: int = 5) -> dict:
"""
Perform deep research using v1 API.
Parameters:
- query: str, research query
- max_articles: int, maximum articles to analyze
Returns:
- dict: v1 research response
"""
def generate_llms_text(self, data: dict, prompt: str) -> dict:
"""
Generate text using LLM via v1 API.
Parameters:
- data: dict, input data
- prompt: str, generation prompt
Returns:
- dict: v1 text generation response
"""
def batch_scrape_urls(self, urls: List[str], params: Optional[dict] = None) -> dict:
"""
Batch scrape URLs using v1 API.
Parameters:
- urls: List[str], list of URLs to scrape
- params: dict, optional scraping parameters
Returns:
- dict: v1 batch scrape response
"""
def async_batch_scrape_urls(self, urls: List[str], params: Optional[dict] = None) -> dict:
"""
Start async batch scrape using v1 API.
Parameters:
- urls: List[str], list of URLs to scrape
- params: dict, optional scraping parameters
Returns:
- dict: v1 batch job response
"""
def batch_scrape_urls_and_watch(self, urls: List[str], params: Optional[dict] = None, poll_interval: int = 2) -> dict:
"""
Batch scrape URLs with real-time progress monitoring.
Parameters:
- urls: List[str], list of URLs to scrape
- params: dict, optional scraping parameters
- poll_interval: int, polling interval in seconds
Returns:
- dict: v1 batch scrape response with progress tracking
"""
def check_batch_scrape_status(self, job_id: str) -> dict:
"""
Check v1 batch scrape job status.
Parameters:
- job_id: str, job ID from async_batch_scrape_urls
Returns:
- dict: v1 batch job status response
"""
def check_batch_scrape_errors(self, job_id: str) -> dict:
"""
Check v1 batch scrape job errors.
Parameters:
- job_id: str, job ID from async_batch_scrape_urls
Returns:
- dict: v1 batch job error information
"""from firecrawl import V1FirecrawlApp
# Initialize v1 client
app = V1FirecrawlApp(api_key="your-api-key")
# V1 scraping
result = app.scrape_url("https://example.com", {
"pageOptions": {
"includeHtml": True,
"includeMarkdown": True,
"includeRawHtml": False,
"screenshot": False
}
})
print(f"Content: {result['data']['content']}")
print(f"HTML: {result['data']['html']}")
# V1 crawling with polling
crawl_result = app.crawl_url("https://example.com", {
"crawlerOptions": {
"maxDepth": 2,
"limit": 50
},
"pageOptions": {
"includeMarkdown": True
}
})
print(f"Crawled {len(crawl_result['data'])} pages")from firecrawl import V1FirecrawlApp
import time
app = V1FirecrawlApp(api_key="your-api-key")
# Start async crawl
job_response = app.async_crawl_url("https://example.com", {
"crawlerOptions": {"limit": 100}
})
job_id = job_response['jobId']
print(f"Started job: {job_id}")
# Poll for completion
while True:
status = app.check_crawl_status(job_id)
print(f"Status: {status['status']}")
if status['status'] == 'completed':
print(f"Crawl completed with {len(status['data'])} pages")
break
elif status['status'] == 'failed':
print("Crawl failed")
break
time.sleep(5)from firecrawl import V1FirecrawlApp
app = V1FirecrawlApp(api_key="your-api-key")
# First scrape the page
scrape_result = app.scrape_url("https://store.example.com/product/123")
# Extract structured data
extraction_schema = {
"type": "object",
"properties": {
"product_name": {"type": "string"},
"price": {"type": "number"},
"availability": {"type": "string"},
"features": {
"type": "array",
"items": {"type": "string"}
}
}
}
extracted = app.extract(
data=scrape_result['data'],
schema=extraction_schema,
prompt="Extract product information from this e-commerce page"
)
print(f"Product: {extracted['data']['product_name']}")
print(f"Price: ${extracted['data']['price']}")from firecrawl import V1FirecrawlApp
app = V1FirecrawlApp(api_key="your-api-key")
# Batch scrape multiple URLs
urls = [
"https://example1.com",
"https://example2.com",
"https://example3.com"
]
batch_result = app.batch_scrape_urls(urls, {
"pageOptions": {
"includeMarkdown": True,
"includeHtml": False
}
})
for item in batch_result['data']:
print(f"URL: {item['metadata']['sourceURL']}")
print(f"Title: {item['metadata'].get('title', 'No title')}")
print(f"Content length: {len(item['content'])}")
print("---")from firecrawl import V1FirecrawlApp
app = V1FirecrawlApp(api_key="your-api-key")
# Deep research on a topic
research_result = app.deep_research(
query="latest developments in artificial intelligence",
max_articles=10
)
print(f"Research summary: {research_result['data']['summary']}")
print(f"Sources analyzed: {len(research_result['data']['sources'])}")
for source in research_result['data']['sources']:
print(f"- {source['title']}: {source['url']}")
# Generate text based on data
text_result = app.generate_llms_text(
data=research_result['data'],
prompt="Write a brief executive summary of the AI developments"
)
print(f"Generated summary: {text_result['data']['text']}")class V1JsonConfig:
"""V1 JSON configuration"""
include_html: bool
include_markdown: bool
include_raw_html: bool
include_links: bool
class V1ScrapeOptions:
"""V1 scraping options"""
formats: List[str] # ["markdown", "html", "rawHtml", "content", "links", "screenshot"]
headers: Optional[dict]
include_tags: Optional[List[str]]
exclude_tags: Optional[List[str]]
only_main_content: Optional[bool]
wait_for: Optional[int]
class V1ChangeTrackingOptions:
"""V1 change tracking configuration"""
include_html: bool
xpath: Optional[str]
css_selector: Optional[str]
class V1CrawlOptions:
"""V1 crawling options"""
includes: Optional[List[str]]
excludes: Optional[List[str]]
generate_img_alt_text: Optional[bool]
return_only_urls: Optional[bool]
max_depth: Optional[int]
mode: Optional[str] # "fast", "default"
ignore_sitemap: Optional[bool]
limit: Optional[int]
allow_backward_crawling: Optional[bool]
allow_external_content_links: Optional[bool]
class V1ExtractOptions:
"""V1 extraction options"""
mode: Optional[str] # "llm-extraction", "llm-extraction-from-raw-html"
extraction_prompt: Optional[str]
extraction_schema: Optional[dict]class V1Document:
"""V1 document structure"""
content: str
html: Optional[str]
raw_html: Optional[str]
markdown: Optional[str]
metadata: V1DocumentMetadata
class V1DocumentMetadata:
"""V1 document metadata"""
title: Optional[str]
description: Optional[str]
language: Optional[str]
source_url: str
page_status_code: Optional[int]
page_error: Optional[str]
class V1ScrapeResponse:
"""V1 scrape response"""
success: bool
data: V1Document
class V1CrawlResponse:
"""V1 crawl response"""
success: bool
data: List[V1Document]
class V1CrawlJobStatus:
"""V1 crawl job status"""
status: str # "active", "paused", "completed", "failed"
job_id: str
current: Optional[int]
total: Optional[int]
data: Optional[List[V1Document]]
partial_data: Optional[List[V1Document]]Access v1 API through the unified client:
from firecrawl import Firecrawl
# Main client defaults to v2
app = Firecrawl(api_key="your-api-key")
# Access v1 API via .v1 property
v1_result = app.v1.scrape_url("https://example.com")
v1_crawl = app.v1.crawl_url("https://example.com", {"crawlerOptions": {"limit": 10}})
# V1 methods are available on the .v1 proxy
print(f"V1 scrape result: {v1_result['data']['content']}")# V1 style
from firecrawl import V1FirecrawlApp
app = V1FirecrawlApp(api_key="key")
result = app.scrape_url("https://example.com", {
"pageOptions": {"includeMarkdown": True}
})
# V2 style
from firecrawl import Firecrawl, ScrapeOptions
app = Firecrawl(api_key="key")
result = app.scrape("https://example.com", ScrapeOptions(formats=["markdown"]))
# Unified client (recommended)
from firecrawl import Firecrawl
app = Firecrawl(api_key="key")
# Use v2 by default
v2_result = app.scrape("https://example.com")
# Use v1 when needed
v1_result = app.v1.scrape_url("https://example.com")import asyncio
from firecrawl import AsyncV1FirecrawlApp
async def v1_async_example():
app = AsyncV1FirecrawlApp(api_key="your-api-key")
# Async v1 scraping
result = await app.scrape_url("https://example.com")
# Async v1 crawling
crawl_result = await app.crawl_url("https://example.com", {
"crawlerOptions": {"limit": 50}
})
# Async v1 batch scraping
batch_result = await app.batch_scrape_urls([
"https://example1.com",
"https://example2.com"
])
asyncio.run(v1_async_example())Install with Tessl CLI
npx tessl i tessl/pypi-firecrawl-py