Tessl Tile for pypi/apify-client@2.0.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

actors.md builds.md index.md logging.md request-queues.md runs.md schedules.md storage.md store.md tasks.md users.md webhooks.md

storage.mddocs/

0
# Data Storage
1

2
Access to Apify's data storage systems including datasets for structured data and key-value stores for arbitrary data storage. These storage systems provide persistent, scalable data management for Actor runs and general use.
3

4
## Capabilities
5

6
### Dataset Operations
7

8
Dataset management for structured data storage with support for multiple formats and streaming access.
9

10
```python { .api }
11
class DatasetClient:
12
    def get(self) -> dict | None:
13
        """Get dataset information."""
14
    
15
    def update(self, *, name: str | None = None, general_access: StorageGeneralAccess | None = None) -> dict:
16
        """Update dataset configuration.
17
        
18
        Args:
19
            name: Dataset name
20
            general_access: Storage access level (from apify_shared.consts)
21
        """
22
    
23
    def delete(self) -> None:
24
        """Delete dataset."""
25
    
26
    def list_items(self, **kwargs) -> ListPage:
27
        """List dataset items with filtering and pagination.
28
        
29
        Args:
30
            offset (int, optional): Starting offset
31
            limit (int, optional): Maximum items to return
32
            desc (bool, optional): Sort in descending order
33
            fields (list[str], optional): Fields to include
34
            omit (list[str], optional): Fields to exclude
35
            format (str, optional): Response format ('json', 'csv', 'xlsx', etc.)
36
            clean (bool, optional): Clean items before return
37
            **kwargs: Additional filtering parameters
38
        """
39
    
40
    def iterate_items(self, **kwargs) -> Iterator[dict]:
41
        """Iterate over all dataset items.
42
        
43
        Args:
44
            offset (int, optional): Starting offset
45
            limit (int, optional): Maximum items to iterate
46
            **kwargs: Additional parameters passed to list_items
47
        """
48
    
49
    def download_items(self, **kwargs) -> bytes:
50
        """Download items as bytes (deprecated - use get_items_as_bytes)."""
51
    
52
    def get_items_as_bytes(self, **kwargs) -> bytes:
53
        """Get items as raw bytes.
54
        
55
        Args:
56
            format (str, optional): Export format
57
            **kwargs: Additional export parameters
58
        """
59
    
60
    def stream_items(self, **kwargs) -> Iterator[Response]:
61
        """Stream items as context manager.
62
        
63
        Args:
64
            format (str, optional): Stream format
65
            **kwargs: Additional streaming parameters
66
        """
67
    
68
    def push_items(self, items: list | dict) -> None:
69
        """Push items to dataset.
70
        
71
        Args:
72
            items: Items to push (single item or list of items)
73
        """
74
    
75
    def get_statistics(self) -> dict | None:
76
        """Get dataset statistics including item count and size."""
77
    
78
    def create_items_public_url(self, **kwargs) -> str:
79
        """Generate public URL for dataset items.
80
        
81
        Args:
82
            format (str, optional): Export format
83
            **kwargs: Additional URL parameters
84
        """
85

86
class DatasetClientAsync:
87
    """Async version of DatasetClient with identical methods."""
88

89
class DatasetCollectionClient:
90
    def list(self, **kwargs) -> ListPage[dict]:
91
        """List datasets.
92
        
93
        Args:
94
            unnamed (bool, optional): Include unnamed datasets
95
            limit (int, optional): Maximum number of items
96
            offset (int, optional): Offset for pagination
97
            desc (bool, optional): Sort in descending order
98
        """
99
    
100
    def get_or_create(self, *, name: str | None = None, schema: dict | None = None) -> dict:
101
        """Get or create dataset.
102
        
103
        Args:
104
            name: Dataset name
105
            schema: Dataset schema definition
106
        """
107

108
class DatasetCollectionClientAsync:
109
    """Async version of DatasetCollectionClient with identical methods."""
110
```
111

112
### Key-Value Store Operations
113

114
Key-value store management for arbitrary data storage with support for binary data and streaming.
115

116
```python { .api }
117
class KeyValueStoreClient:
118
    def get(self) -> dict | None:
119
        """Get key-value store information."""
120
    
121
    def update(self, *, name: str | None = None, general_access: StorageGeneralAccess | None = None) -> dict:
122
        """Update store configuration.
123
        
124
        Args:
125
            name: Store name
126
            general_access: Storage access level (from apify_shared.consts)
127
        """
128
    
129
    def delete(self) -> None:
130
        """Delete store."""
131
    
132
    def list_keys(self, **kwargs) -> dict:
133
        """List keys in the store.
134
        
135
        Args:
136
            limit (int, optional): Maximum keys to return
137
            exclusive_start_key (str, optional): Key to start listing from
138
        """
139
    
140
    def get_record(self, key: str) -> dict | None:
141
        """Get record by key.
142
        
143
        Args:
144
            key: Record key
145
        """
146
    
147
    def record_exists(self, key: str) -> bool:
148
        """Check if record exists.
149
        
150
        Args:
151
            key: Record key
152
        """
153
    
154
    def get_record_as_bytes(self, key: str) -> bytes | None:
155
        """Get record as raw bytes.
156
        
157
        Args:
158
            key: Record key
159
        """
160
    
161
    def stream_record(self, key: str) -> Iterator[dict | None]:
162
        """Stream record as context manager.
163
        
164
        Args:
165
            key: Record key
166
        """
167
    
168
    def set_record(self, key: str, value: Any, content_type: str | None = None) -> None:
169
        """Set record value.
170
        
171
        Args:
172
            key: Record key
173
            value: Record value (dict, str, bytes, etc.)
174
            content_type: MIME content type
175
        """
176
    
177
    def delete_record(self, key: str) -> None:
178
        """Delete record.
179
        
180
        Args:
181
            key: Record key
182
        """
183
    
184
    def create_keys_public_url(self, **kwargs) -> str:
185
        """Generate public URL for accessing keys."""
186

187
class KeyValueStoreClientAsync:
188
    """Async version of KeyValueStoreClient with identical methods."""
189

190
class KeyValueStoreCollectionClient:
191
    def list(self, **kwargs) -> ListPage[dict]:
192
        """List key-value stores.
193
        
194
        Args:
195
            unnamed (bool, optional): Include unnamed stores
196
            limit (int, optional): Maximum number of items
197
            offset (int, optional): Offset for pagination
198
            desc (bool, optional): Sort in descending order
199
        """
200
    
201
    def get_or_create(self, *, name: str | None = None, schema: dict | None = None) -> dict:
202
        """Get or create key-value store.
203
        
204
        Args:
205
            name: Store name
206
            schema: Store schema definition
207
        """
208

209
class KeyValueStoreCollectionClientAsync:
210
    """Async version of KeyValueStoreCollectionClient with identical methods."""
211
```
212

213
## Usage Examples
214

215
### Dataset Operations
216

217
```python
218
from apify_client import ApifyClient
219

220
client = ApifyClient('your-api-token')
221

222
# Create or get dataset
223
dataset = client.datasets().get_or_create(name='web-scraping-results')
224
dataset_client = client.dataset(dataset['id'])
225

226
# Push data to dataset
227
data = [
228
    {'url': 'https://example.com', 'title': 'Example Page', 'price': 29.99},
229
    {'url': 'https://example.org', 'title': 'Another Page', 'price': 39.99}
230
]
231
dataset_client.push_items(data)
232

233
# List items with pagination
234
items = dataset_client.list_items(limit=100, offset=0, format='json')
235
print(f"Retrieved {items.count} items")
236

237
# Iterate over all items
238
for item in dataset_client.iterate_items():
239
    print(f"Title: {item['title']}, Price: {item['price']}")
240

241
# Export dataset as CSV
242
csv_data = dataset_client.get_items_as_bytes(format='csv')
243
with open('results.csv', 'wb') as f:
244
    f.write(csv_data)
245

246
# Get dataset statistics
247
stats = dataset_client.get_statistics()
248
print(f"Dataset contains {stats['itemCount']} items")
249
```
250

251
### Key-Value Store Operations
252

253
```python
254
# Create or get key-value store
255
store = client.key_value_stores().get_or_create(name='app-config')
256
store_client = client.key_value_store(store['id'])
257

258
# Store configuration data
259
config = {
260
    'api_endpoint': 'https://api.example.com',
261
    'timeout': 30,
262
    'retry_count': 3
263
}
264
store_client.set_record('config', config, content_type='application/json')
265

266
# Store binary data
267
with open('screenshot.png', 'rb') as f:
268
    image_data = f.read()
269
store_client.set_record('screenshot', image_data, content_type='image/png')
270

271
# Retrieve data
272
stored_config = store_client.get_record('config')
273
print(f"API endpoint: {stored_config['api_endpoint']}")
274

275
# Check if record exists
276
if store_client.record_exists('screenshot'):
277
    image_bytes = store_client.get_record_as_bytes('screenshot')
278
    print(f"Screenshot size: {len(image_bytes)} bytes")
279

280
# List all keys
281
keys = store_client.list_keys()
282
print(f"Store contains keys: {keys['keys']}")
283

284
# Stream large records
285
with store_client.stream_record('large-file') as stream:
286
    for chunk in stream:
287
        process_chunk(chunk)
288
```
289

290
### Advanced Data Processing
291

292
```python
293
# Process dataset items in batches
294
dataset_client = client.dataset('dataset-id')
295

296
def process_batch(items):
297
    # Process items in batch
298
    processed = []
299
    for item in items:
300
        processed.append({
301
            **item,
302
            'processed_at': datetime.now().isoformat(),
303
            'price_usd': item['price'] * 1.2  # Convert currency
304
        })
305
    return processed
306

307
# Iterate with batch processing
308
batch_size = 1000
309
offset = 0
310

311
while True:
312
    batch = dataset_client.list_items(limit=batch_size, offset=offset)
313
    if not batch.items:
314
        break
315
    
316
    processed_items = process_batch(batch.items)
317
    
318
    # Store processed results
319
    processed_dataset = client.datasets().get_or_create(name='processed-results')
320
    client.dataset(processed_dataset['id']).push_items(processed_items)
321
    
322
    offset += batch_size
323
    print(f"Processed {offset} items")
324
```

Version

Tile

Files

storage.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

storage.mddocs/