Apify API client for Python providing access to web scraping and automation platform resources
npx @tessl/cli install tessl/pypi-apify-client@2.0.0A comprehensive Python client library for the Apify web scraping and automation platform. Provides convenient access to all Apify API resources including Actors, datasets, key-value stores, request queues, and platform management, with both synchronous and asynchronous support.
pip install apify-clientfrom apify_client import ApifyClientFor asynchronous operations:
from apify_client import ApifyClientAsyncfrom apify_client import ApifyClient
# Initialize client with API token
client = ApifyClient('your-api-token')
# Run an Actor and get results
run = client.actor('john-doe/web-scraper').call(run_input={'startUrls': ['https://example.com']})
# Access the default dataset
dataset = client.dataset(run['defaultDatasetId'])
items = dataset.list_items()
print(f"Scraped {items.count} items")
for item in items.items:
print(item)
# Store data in key-value store
kvs = client.key_value_stores().get_or_create(name='my-storage')
client.key_value_store(kvs['id']).set_record('results', {'status': 'completed', 'itemCount': items.count})The Apify Client follows a hierarchical resource-based architecture:
ApifyClient, ApifyClientAsync) managing authentication and base configurationActorClient, DatasetClient) for specific platform resourcesActorCollectionClient) providing listing and creation capabilitiesLogClient for streaming logs and RequestQueueClient for queue operationsAll operations support both synchronous and asynchronous execution patterns, with consistent method signatures and error handling across the entire API surface.
Main client classes for accessing the Apify API with authentication and configuration options.
class ApifyClient:
def __init__(
self,
token: str | None = None,
*,
api_url: str | None = None,
max_retries: int | None = 8,
min_delay_between_retries_millis: int | None = 500,
timeout_secs: int | None = 360,
) -> None: ...
class ApifyClientAsync:
def __init__(
self,
token: str | None = None,
*,
api_url: str | None = None,
max_retries: int | None = 8,
min_delay_between_retries_millis: int | None = 500,
timeout_secs: int | None = 360,
) -> None: ...Comprehensive Actor lifecycle management including creation, configuration, execution, builds, versions, and environment variables.
def actor(self, actor_id: str) -> ActorClient: ...
def actors(self) -> ActorCollectionClient: ...Access to Apify's data storage systems including datasets for structured data and key-value stores for arbitrary data storage.
def dataset(self, dataset_id: str) -> DatasetClient: ...
def datasets(self) -> DatasetCollectionClient: ...
def key_value_store(self, key_value_store_id: str) -> KeyValueStoreClient: ...
def key_value_stores(self) -> KeyValueStoreCollectionClient: ...Request queue operations for managing crawling workflows and Actor communication.
def request_queue(self, request_queue_id: str, *, client_key: str | None = None) -> RequestQueueClient: ...
def request_queues(self) -> RequestQueueCollectionClient: ...Actor run lifecycle management including execution monitoring, result access, and advanced operations.
def run(self, run_id: str) -> RunClient: ...
def runs(self) -> RunCollectionClient: ...Actor build operations including triggering builds, monitoring progress, and accessing build artifacts.
def build(self, build_id: str) -> BuildClient: ...
def builds(self) -> BuildCollectionClient: ...Task creation and management for reusable Actor configurations and scheduled executions.
def task(self, task_id: str) -> TaskClient: ...
def tasks(self) -> TaskCollectionClient: ...Schedule management for automated Actor and task executions with cron-like functionality.
def schedule(self, schedule_id: str) -> ScheduleClient: ...
def schedules(self) -> ScheduleCollectionClient: ...Webhook configuration and management for real-time notifications and integrations.
def webhook(self, webhook_id: str) -> WebhookClient: ...
def webhooks(self) -> WebhookCollectionClient: ...
def webhook_dispatch(self, webhook_dispatch_id: str) -> WebhookDispatchClient: ...
def webhook_dispatches(self) -> WebhookDispatchCollectionClient: ...User account information, usage monitoring, and account limit management.
def user(self, user_id: str | None = None) -> UserClient: ...Log access and streaming for Actor runs and builds with real-time monitoring capabilities.
def log(self, build_or_run_id: str) -> LogClient: ...Access to the Apify Store for discovering and using public Actors.
def store(self) -> StoreCollectionClient: ...from typing import Any, Generic, TypeVar
JSONSerializable = str | int | float | bool | None | dict[str, Any] | list[Any]
T = TypeVar('T')
class ListPage(Generic[T]):
"""A single page of items returned from a list() method."""
items: list[T]
"""List of returned objects on this page."""
count: int
"""Count of the returned objects on this page."""
offset: int
"""The limit on the number of returned objects offset specified in the API call."""
limit: int
"""The offset of the first object specified in the API call"""
total: int
"""Total number of objects matching the API call criteria."""
desc: bool
"""Whether the listing is descending or not."""
def __init__(self, data: dict) -> None: ...
# External types from apify_shared package
from apify_shared.consts import (
StorageGeneralAccess, # Enum for storage access levels
ActorJobStatus, # Enum for Actor job statuses
MetaOrigin, # Enum for run origins
ActorSourceType, # Enum for Actor source types
WebhookEventType, # Enum for webhook event types
RunGeneralAccess # Enum for run access levels
)class ApifyClientError(Exception):
"""Base class for errors specific to the Apify API Client."""
class ApifyApiError(ApifyClientError):
"""Error specific to requests to the Apify API."""
def __init__(self, response, attempt: int, method: str = 'GET') -> None: ...
message: str | None
type: str | None
data: dict[str, str]
name: str
status_code: int
attempt: int
http_method: str
class InvalidResponseBodyError(ApifyClientError):
"""Error caused by the response body failing to be parsed."""
def __init__(self, response) -> None: ...
name: str
code: str
response: AnyDEFAULT_API_URL: str = 'https://api.apify.com'
DEFAULT_TIMEOUT: int = 360 # seconds
API_VERSION: str = 'v2'
__version__: str # Package version