Apify API client for Python providing access to web scraping and automation platform resources
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Access to Apify's data storage systems including datasets for structured data and key-value stores for arbitrary data storage. These storage systems provide persistent, scalable data management for Actor runs and general use.
Dataset management for structured data storage with support for multiple formats and streaming access.
class DatasetClient:
def get(self) -> dict | None:
"""Get dataset information."""
def update(self, *, name: str | None = None, general_access: StorageGeneralAccess | None = None) -> dict:
"""Update dataset configuration.
Args:
name: Dataset name
general_access: Storage access level (from apify_shared.consts)
"""
def delete(self) -> None:
"""Delete dataset."""
def list_items(self, **kwargs) -> ListPage:
"""List dataset items with filtering and pagination.
Args:
offset (int, optional): Starting offset
limit (int, optional): Maximum items to return
desc (bool, optional): Sort in descending order
fields (list[str], optional): Fields to include
omit (list[str], optional): Fields to exclude
format (str, optional): Response format ('json', 'csv', 'xlsx', etc.)
clean (bool, optional): Clean items before return
**kwargs: Additional filtering parameters
"""
def iterate_items(self, **kwargs) -> Iterator[dict]:
"""Iterate over all dataset items.
Args:
offset (int, optional): Starting offset
limit (int, optional): Maximum items to iterate
**kwargs: Additional parameters passed to list_items
"""
def download_items(self, **kwargs) -> bytes:
"""Download items as bytes (deprecated - use get_items_as_bytes)."""
def get_items_as_bytes(self, **kwargs) -> bytes:
"""Get items as raw bytes.
Args:
format (str, optional): Export format
**kwargs: Additional export parameters
"""
def stream_items(self, **kwargs) -> Iterator[Response]:
"""Stream items as context manager.
Args:
format (str, optional): Stream format
**kwargs: Additional streaming parameters
"""
def push_items(self, items: list | dict) -> None:
"""Push items to dataset.
Args:
items: Items to push (single item or list of items)
"""
def get_statistics(self) -> dict | None:
"""Get dataset statistics including item count and size."""
def create_items_public_url(self, **kwargs) -> str:
"""Generate public URL for dataset items.
Args:
format (str, optional): Export format
**kwargs: Additional URL parameters
"""
class DatasetClientAsync:
"""Async version of DatasetClient with identical methods."""
class DatasetCollectionClient:
def list(self, **kwargs) -> ListPage[dict]:
"""List datasets.
Args:
unnamed (bool, optional): Include unnamed datasets
limit (int, optional): Maximum number of items
offset (int, optional): Offset for pagination
desc (bool, optional): Sort in descending order
"""
def get_or_create(self, *, name: str | None = None, schema: dict | None = None) -> dict:
"""Get or create dataset.
Args:
name: Dataset name
schema: Dataset schema definition
"""
class DatasetCollectionClientAsync:
"""Async version of DatasetCollectionClient with identical methods."""Key-value store management for arbitrary data storage with support for binary data and streaming.
class KeyValueStoreClient:
def get(self) -> dict | None:
"""Get key-value store information."""
def update(self, *, name: str | None = None, general_access: StorageGeneralAccess | None = None) -> dict:
"""Update store configuration.
Args:
name: Store name
general_access: Storage access level (from apify_shared.consts)
"""
def delete(self) -> None:
"""Delete store."""
def list_keys(self, **kwargs) -> dict:
"""List keys in the store.
Args:
limit (int, optional): Maximum keys to return
exclusive_start_key (str, optional): Key to start listing from
"""
def get_record(self, key: str) -> dict | None:
"""Get record by key.
Args:
key: Record key
"""
def record_exists(self, key: str) -> bool:
"""Check if record exists.
Args:
key: Record key
"""
def get_record_as_bytes(self, key: str) -> bytes | None:
"""Get record as raw bytes.
Args:
key: Record key
"""
def stream_record(self, key: str) -> Iterator[dict | None]:
"""Stream record as context manager.
Args:
key: Record key
"""
def set_record(self, key: str, value: Any, content_type: str | None = None) -> None:
"""Set record value.
Args:
key: Record key
value: Record value (dict, str, bytes, etc.)
content_type: MIME content type
"""
def delete_record(self, key: str) -> None:
"""Delete record.
Args:
key: Record key
"""
def create_keys_public_url(self, **kwargs) -> str:
"""Generate public URL for accessing keys."""
class KeyValueStoreClientAsync:
"""Async version of KeyValueStoreClient with identical methods."""
class KeyValueStoreCollectionClient:
def list(self, **kwargs) -> ListPage[dict]:
"""List key-value stores.
Args:
unnamed (bool, optional): Include unnamed stores
limit (int, optional): Maximum number of items
offset (int, optional): Offset for pagination
desc (bool, optional): Sort in descending order
"""
def get_or_create(self, *, name: str | None = None, schema: dict | None = None) -> dict:
"""Get or create key-value store.
Args:
name: Store name
schema: Store schema definition
"""
class KeyValueStoreCollectionClientAsync:
"""Async version of KeyValueStoreCollectionClient with identical methods."""from apify_client import ApifyClient
client = ApifyClient('your-api-token')
# Create or get dataset
dataset = client.datasets().get_or_create(name='web-scraping-results')
dataset_client = client.dataset(dataset['id'])
# Push data to dataset
data = [
{'url': 'https://example.com', 'title': 'Example Page', 'price': 29.99},
{'url': 'https://example.org', 'title': 'Another Page', 'price': 39.99}
]
dataset_client.push_items(data)
# List items with pagination
items = dataset_client.list_items(limit=100, offset=0, format='json')
print(f"Retrieved {items.count} items")
# Iterate over all items
for item in dataset_client.iterate_items():
print(f"Title: {item['title']}, Price: {item['price']}")
# Export dataset as CSV
csv_data = dataset_client.get_items_as_bytes(format='csv')
with open('results.csv', 'wb') as f:
f.write(csv_data)
# Get dataset statistics
stats = dataset_client.get_statistics()
print(f"Dataset contains {stats['itemCount']} items")# Create or get key-value store
store = client.key_value_stores().get_or_create(name='app-config')
store_client = client.key_value_store(store['id'])
# Store configuration data
config = {
'api_endpoint': 'https://api.example.com',
'timeout': 30,
'retry_count': 3
}
store_client.set_record('config', config, content_type='application/json')
# Store binary data
with open('screenshot.png', 'rb') as f:
image_data = f.read()
store_client.set_record('screenshot', image_data, content_type='image/png')
# Retrieve data
stored_config = store_client.get_record('config')
print(f"API endpoint: {stored_config['api_endpoint']}")
# Check if record exists
if store_client.record_exists('screenshot'):
image_bytes = store_client.get_record_as_bytes('screenshot')
print(f"Screenshot size: {len(image_bytes)} bytes")
# List all keys
keys = store_client.list_keys()
print(f"Store contains keys: {keys['keys']}")
# Stream large records
with store_client.stream_record('large-file') as stream:
for chunk in stream:
process_chunk(chunk)# Process dataset items in batches
dataset_client = client.dataset('dataset-id')
def process_batch(items):
# Process items in batch
processed = []
for item in items:
processed.append({
**item,
'processed_at': datetime.now().isoformat(),
'price_usd': item['price'] * 1.2 # Convert currency
})
return processed
# Iterate with batch processing
batch_size = 1000
offset = 0
while True:
batch = dataset_client.list_items(limit=batch_size, offset=offset)
if not batch.items:
break
processed_items = process_batch(batch.items)
# Store processed results
processed_dataset = client.datasets().get_or_create(name='processed-results')
client.dataset(processed_dataset['id']).push_items(processed_items)
offset += batch_size
print(f"Processed {offset} items")Install with Tessl CLI
npx tessl i tessl/pypi-apify-client