Python SDK for Firecrawl API that enables web scraping, crawling, and content extraction with LLM-optimized output formats
npx @tessl/cli install tessl/pypi-firecrawl-py@4.3.0A comprehensive Python SDK for the Firecrawl API that enables web scraping, crawling, and content extraction with output formatted for use with language models (LLMs). The SDK offers both synchronous and asynchronous clients for web scraping, crawling, and monitoring operations with support for multiple output formats including markdown and HTML.
pip install firecrawl-pyfrom firecrawl import Firecrawl, AsyncFirecrawlLegacy compatibility (aliases):
from firecrawl import FirecrawlApp, AsyncFirecrawlAppVersion-specific access:
from firecrawl import V1FirecrawlApp, AsyncV1FirecrawlAppMonitoring:
from firecrawl import Watcher, AsyncWatcherfrom firecrawl import Firecrawl
# Initialize client
app = Firecrawl(api_key="your-api-key")
# Scrape a single URL
result = app.scrape("https://example.com")
print(result)
# Search the web
search_results = app.search("latest AI developments")
print(search_results)
# Crawl a website
crawl_result = app.crawl("https://example.com", limit=100)
print(crawl_result)Async usage:
import asyncio
from firecrawl import AsyncFirecrawl
async def main():
app = AsyncFirecrawl(api_key="your-api-key")
# Async scraping
result = await app.scrape("https://example.com")
print(result)
asyncio.run(main())The firecrawl-py SDK provides a unified interface with dual API version support:
Firecrawl and AsyncFirecrawl expose v2 API by default with v1 access via .v1 propertyEssential web scraping functionality including single URL scraping, web search, and site mapping. These operations provide immediate results with comprehensive format options.
def scrape(url: str, *, formats: Optional[List[str]] = None, **kwargs) -> Document
def search(query: str, *, sources: Optional[List[str]] = None, **kwargs) -> SearchData
def map(url: str, **kwargs) -> MapDataWebsite crawling functionality for discovering and processing multiple pages from a website. Supports both complete crawling with result polling and asynchronous job-based crawling for large sites.
def crawl(url: str, options: Optional[CrawlOptions] = None) -> CrawlResponse
def start_crawl(url: str, options: Optional[CrawlOptions] = None) -> str
def get_crawl_status(crawl_id: str) -> CrawlJobStatus
def cancel_crawl(crawl_id: str) -> dictBatch operations for processing multiple URLs efficiently. Includes both batch scraping with full result polling and asynchronous job management for large-scale operations.
def batch_scrape(urls: List[str], options: Optional[ScrapeOptions] = None) -> BatchScrapeResponse
def start_batch_scrape(urls: List[str], options: Optional[ScrapeOptions] = None) -> str
def get_batch_scrape_status(batch_id: str) -> BatchScrapeJobStatus
def cancel_batch_scrape(batch_id: str) -> dictAI-powered structured data extraction using custom schemas. Supports both immediate extraction with result polling and asynchronous job-based extraction for complex data processing.
def extract(url: str, schema: dict, options: Optional[ExtractOptions] = None) -> ExtractResponse
def start_extract(url: str, schema: dict, options: Optional[ExtractOptions] = None) -> str
def get_extract_status(extract_id: str) -> ExtractJobStatusReal-time job monitoring using WebSocket connections for tracking long-running operations. Provides both synchronous and asynchronous monitoring interfaces.
class Watcher:
def watch(self, job_id: str, job_type: str) -> Iterator[dict]
class AsyncWatcher:
def watch(self, job_id: str, job_type: str) -> AsyncIterator[dict]Account usage monitoring including credit usage, token consumption, concurrency limits, and job queue status tracking. Includes both current usage and historical usage data.
def get_credit_usage() -> CreditUsage
def get_token_usage() -> TokenUsage
def get_credit_usage_historical(by_api_key: bool = False) -> CreditUsageHistoricalResponse
def get_token_usage_historical(by_api_key: bool = False) -> TokenUsageHistoricalResponse
def get_concurrency() -> ConcurrencyInfo
def get_queue_status() -> QueueStatusComplete v1 API support for backward compatibility with existing implementations. Includes all v1-specific operations and data types.
class V1FirecrawlApp:
def scrape_url(self, url: str, params: Optional[dict] = None) -> dict
def crawl_url(self, url: str, params: Optional[dict] = None) -> dict
def extract(self, data: dict, schema: dict, prompt: Optional[str] = None) -> dictCore type definitions used across the API:
class Document:
"""Main document result structure"""
url: str
content: str
metadata: dict
class ScrapeOptions:
"""Configuration options for scraping operations"""
formats: Optional[List[str]]
include_tags: Optional[List[str]]
exclude_tags: Optional[List[str]]
wait_for: Optional[int]
screenshot: Optional[bool]
class CrawlOptions:
"""Configuration options for crawling operations"""
limit: Optional[int]
max_depth: Optional[int]
allowed_domains: Optional[List[str]]
ignored_paths: Optional[List[str]]
scrape_options: Optional[ScrapeOptions]
class SearchOptions:
"""Configuration options for search operations"""
limit: Optional[int]
search_type: Optional[str]
language: Optional[str]
country: Optional[str]
class PaginationConfig:
"""Configuration for paginated requests"""
auto_paginate: Optional[bool]
max_pages: Optional[int]
max_results: Optional[int]
max_wait_time: Optional[int]
class CreditUsageHistoricalResponse:
"""Historical credit usage data"""
data: List[CreditUsageHistoricalPeriod]
class CreditUsageHistoricalPeriod:
"""Credit usage for a specific period"""
period_start: str
period_end: str
credits_used: int
credits_remaining: int
class TokenUsageHistoricalResponse:
"""Historical token usage data"""
data: List[TokenUsageHistoricalPeriod]
class TokenUsageHistoricalPeriod:
"""Token usage for a specific period"""
period_start: str
period_end: str
tokens_used: int
tokens_remaining: int
class Location:
"""Geographic location configuration"""
country: Optional[str]
languages: Optional[List[str]]
class Viewport:
"""Browser viewport configuration"""
width: int
height: int
class WebhookConfig:
"""Webhook configuration for job notifications"""
url: str
headers: Optional[Dict[str, str]]
metadata: Optional[Dict[str, Any]]
events: Optional[List[str]]
# Action Types for browser automation
class WaitAction:
"""Wait action for browser automation"""
type: Literal["wait"]
milliseconds: int
class ScreenshotAction:
"""Screenshot action for browser automation"""
type: Literal["screenshot"]
full_page: Optional[bool]
class ClickAction:
"""Click action for browser automation"""
type: Literal["click"]
selector: str
class WriteAction:
"""Write action for browser automation"""
type: Literal["write"]
text: str
class PressAction:
"""Press key action for browser automation"""
type: Literal["press"]
key: str
class ScrollAction:
"""Scroll action for browser automation"""
type: Literal["scroll"]
x: Optional[int]
y: Optional[int]
class ScrapeAction:
"""Scrape action for browser automation"""
type: Literal["scrape"]
class ExecuteJavascriptAction:
"""Execute JavaScript action for browser automation"""
type: Literal["execute_javascript"]
code: str
class PDFAction:
"""PDF action for browser automation"""
type: Literal["pdf"]
# Format Types for advanced output formatting
class JsonFormat:
"""JSON format configuration"""
type: Literal["json"]
schema: Optional[Dict[str, Any]]
prompt: Optional[str]
class ChangeTrackingFormat:
"""Change tracking format configuration"""
type: Literal["change_tracking"]
threshold: Optional[float]
class ScreenshotFormat:
"""Screenshot format configuration"""
type: Literal["screenshot"]
full_page: Optional[bool]
viewport: Optional[Viewport]
class AttributesFormat:
"""Attributes format configuration"""
type: Literal["attributes"]
selectors: List[AttributeSelector]
class AttributeSelector:
"""Attribute selector for extraction"""
selector: str
attribute: str
class PDFParser:
"""PDF parser configuration"""
type: Literal["pdf"]
max_pages: Optional[int]