Python SDK for Firecrawl API that enables web scraping, crawling, and content extraction with LLM-optimized output formats
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Website crawling functionality for discovering and processing multiple pages from a website. Supports both complete crawling with result polling and asynchronous job-based crawling for large sites.
Crawl a website and return complete results, automatically polling for completion. Best for smaller sites or when you need immediate complete results.
def crawl(url: str, options: Optional[CrawlOptions] = None) -> CrawlResponse:
"""
Crawl a website and return complete results.
Parameters:
- url: str, target website URL to crawl
- options: CrawlOptions, optional crawling configuration
Returns:
- CrawlResponse: complete crawl results with all discovered pages
"""Start a crawl job and manage it asynchronously, ideal for large websites or when you need to track progress.
def start_crawl(url: str, options: Optional[CrawlOptions] = None) -> str:
"""
Start a crawl job asynchronously.
Parameters:
- url: str, target website URL to crawl
- options: CrawlOptions, optional crawling configuration
Returns:
- str: crawl job ID for status tracking
"""
def get_crawl_status(crawl_id: str) -> CrawlJobStatus:
"""
Get status of a running crawl job.
Parameters:
- crawl_id: str, crawl job ID from start_crawl
Returns:
- CrawlJobStatus: current status and progress information
"""
def cancel_crawl(crawl_id: str) -> dict:
"""
Cancel a running crawl job.
Parameters:
- crawl_id: str, crawl job ID to cancel
Returns:
- dict: cancellation confirmation
"""
# Async clients only
async def wait_crawl(job_id: str, poll_interval: int = 2, timeout: Optional[int] = None) -> CrawlResponse:
"""
Wait for crawl completion with automatic polling (AsyncFirecrawl only).
Parameters:
- job_id: str, crawl job ID to wait for
- poll_interval: int, polling interval in seconds (default: 2)
- timeout: Optional[int], maximum wait time in seconds
Returns:
- CrawlResponse: completed crawl results
"""Manage and monitor crawl jobs with error handling and active job tracking.
def get_crawl_errors(crawl_id: str) -> dict:
"""
Get errors from a crawl job.
Parameters:
- crawl_id: str, crawl job ID
Returns:
- dict: error information and details
"""
def get_active_crawls() -> List[dict]:
"""
Get list of active crawl jobs.
Returns:
- List[dict]: list of active crawl job information
"""
def crawl_params_preview(options: CrawlOptions) -> dict:
"""
Preview crawl parameters and estimated scope.
Parameters:
- options: CrawlOptions, crawling configuration to preview
Returns:
- dict: preview information including estimated pages and cost
"""from firecrawl import Firecrawl, CrawlOptions, ScrapeOptions
app = Firecrawl(api_key="your-api-key")
# Simple crawl
result = app.crawl("https://example.com")
print(f"Crawled {len(result.data)} pages")
# Crawl with options
scrape_options = ScrapeOptions(formats=["markdown"])
crawl_options = CrawlOptions(
limit=50,
max_depth=3,
allowed_domains=["example.com"],
scrape_options=scrape_options
)
result = app.crawl("https://example.com", crawl_options)from firecrawl import Firecrawl
import time
app = Firecrawl(api_key="your-api-key")
# Start crawl job
crawl_id = app.start_crawl("https://example.com",
CrawlOptions(limit=100))
print(f"Started crawl job: {crawl_id}")
# Monitor progress
while True:
status = app.get_crawl_status(crawl_id)
print(f"Status: {status.status}")
print(f"Completed: {status.completed}/{status.total}")
if status.status in ["completed", "failed", "cancelled"]:
break
time.sleep(10)
# Get final results
if status.status == "completed":
print(f"Crawl completed with {len(status.data)} pages")
else:
# Check for errors
errors = app.get_crawl_errors(crawl_id)
print(f"Crawl failed: {errors}")from firecrawl import Firecrawl, CrawlOptions, ScrapeOptions
app = Firecrawl(api_key="your-api-key")
# Advanced crawl configuration
scrape_options = ScrapeOptions(
formats=["markdown", "html"],
include_tags=["article", "main", "content"],
exclude_tags=["nav", "footer", "aside"],
wait_for=2000
)
crawl_options = CrawlOptions(
limit=200,
max_depth=4,
allowed_domains=["example.com", "blog.example.com"],
ignored_paths=["/admin", "/api", "/search"],
scrape_options=scrape_options
)
# Preview crawl scope
preview = app.crawl_params_preview(crawl_options)
print(f"Estimated pages: {preview.get('estimated_pages')}")
print(f"Estimated cost: {preview.get('estimated_credits')}")
# Start crawl
crawl_id = app.start_crawl("https://example.com", crawl_options)class CrawlOptions:
"""Configuration options for crawling operations"""
limit: Optional[int] # Maximum pages to crawl (default: 5000)
max_depth: Optional[int] # Maximum crawl depth (default: unlimited)
allowed_domains: Optional[List[str]] # Domains to crawl
ignored_paths: Optional[List[str]] # Paths to ignore
include_paths: Optional[List[str]] # Paths to include
scrape_options: Optional[ScrapeOptions] # Options for individual page scraping
webhook: Optional[str] # Webhook URL for job completion notification
class CrawlResponse:
"""Response from crawl operation"""
success: bool
data: List[Document]
class CrawlJobStatus:
"""Status information for crawl job"""
status: str # "pending", "running", "completed", "failed", "cancelled"
job_id: str
total: int # Total pages to crawl
completed: int # Pages completed
data: Optional[List[Document]] # Results (available when completed)
class JobStatusType:
"""Enumeration of job status types"""
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"All crawling operations have async equivalents:
import asyncio
from firecrawl import AsyncFirecrawl
async def crawl_async():
app = AsyncFirecrawl(api_key="your-api-key")
# Async complete crawl
result = await app.crawl("https://example.com")
# Async job management
crawl_id = await app.start_crawl("https://example.com")
status = await app.get_crawl_status(crawl_id)
# Wait for completion (async-specific method)
final_result = await app.wait_crawl(crawl_id)
asyncio.run(crawl_async())Install with Tessl CLI
npx tessl i tessl/pypi-firecrawl-py