0
# Storage
1
2
Crawlee provides comprehensive storage solutions for persisting scraped data, managing request queues, and handling key-value storage. The storage system supports both cloud-based and local storage backends.
3
4
## Capabilities
5
6
### Dataset
7
8
Datasets store structured data in JSON format, ideal for storing scraped results and enabling easy export to various formats.
9
10
```typescript { .api }
11
/**
12
* Dataset for storing structured data (JSON objects)
13
*/
14
class Dataset {
15
/** Open an existing dataset or create a new one */
16
static open(idOrName?: string): Promise<Dataset>;
17
18
/** Get the default dataset instance */
19
static getDefaultDataset(): Promise<Dataset>;
20
21
/** Push data to the dataset */
22
pushData(data: Dictionary | Dictionary[]): Promise<void>;
23
24
/** Get data from the dataset */
25
getData(options?: DatasetDataOptions): Promise<DatasetData>;
26
27
/** Get dataset information */
28
getInfo(): Promise<DatasetInfo>;
29
30
/** Export dataset to various formats */
31
exportTo(options: DatasetExportOptions): Promise<void>;
32
33
/** Delete the dataset */
34
drop(): Promise<void>;
35
36
/** Convert dataset to a stream */
37
stream(options?: DatasetStreamOptions): NodeJS.ReadableStream;
38
39
/** The dataset ID */
40
readonly id: string;
41
42
/** The dataset name */
43
readonly name?: string;
44
}
45
```
46
47
### DatasetDataOptions
48
49
Options for retrieving data from datasets.
50
51
```typescript { .api }
52
interface DatasetDataOptions {
53
/** Number of items to retrieve */
54
limit?: number;
55
56
/** Number of items to skip */
57
offset?: number;
58
59
/** Whether to return data in clean JSON format */
60
clean?: boolean;
61
62
/** Fields to include in results */
63
fields?: string[];
64
65
/** Whether to return data in descending order */
66
desc?: boolean;
67
68
/** JSON streaming options */
69
streaming?: boolean;
70
}
71
```
72
73
### DatasetExportOptions
74
75
Options for exporting dataset data to different formats.
76
77
```typescript { .api }
78
interface DatasetExportOptions {
79
/** Format to export to */
80
format: 'json' | 'csv' | 'xlsx' | 'xml' | 'rss';
81
82
/** Fields to include in export */
83
fields?: string[];
84
85
/** Whether to exclude empty fields */
86
omitEmptyFields?: boolean;
87
88
/** Whether to exclude null values */
89
omitNullValues?: boolean;
90
91
/** Key to store the exported file under */
92
key: string;
93
94
/** Key-value store to save to */
95
keyValueStore?: KeyValueStore;
96
97
/** Maximum number of items to export */
98
limit?: number;
99
100
/** Number of items to skip */
101
offset?: number;
102
}
103
```
104
105
**Usage Examples:**
106
107
```typescript
108
import { Dataset, CheerioCrawler } from "crawlee";
109
110
const crawler = new CheerioCrawler({
111
requestHandler: async ({ $, request, pushData }) => {
112
// Extract product data
113
const products = [];
114
$('.product').each((_, element) => {
115
products.push({
116
name: $(element).find('.name').text().trim(),
117
price: $(element).find('.price').text().trim(),
118
url: new URL($(element).find('a').attr('href'), request.loadedUrl).href,
119
extractedAt: new Date().toISOString(),
120
});
121
});
122
123
// Save to default dataset
124
await pushData(products);
125
},
126
});
127
128
await crawler.run();
129
130
// Work with the dataset after crawling
131
const dataset = await Dataset.getDefaultDataset();
132
133
// Get all data
134
const { items } = await dataset.getData();
135
console.log(`Extracted ${items.length} products`);
136
137
// Export to CSV
138
await dataset.exportTo({
139
format: 'csv',
140
key: 'products.csv',
141
fields: ['name', 'price', 'url'],
142
});
143
144
// Get specific data with filtering
145
const recentItems = await dataset.getData({
146
limit: 100,
147
clean: true,
148
desc: true,
149
});
150
151
// Stream large datasets
152
const stream = dataset.stream({
153
limit: 10000,
154
});
155
156
stream.on('data', (item) => {
157
console.log('Processing item:', item.name);
158
});
159
```
160
161
### KeyValueStore
162
163
Key-value stores handle unstructured data, files, and configuration storage with support for various data formats.
164
165
```typescript { .api }
166
/**
167
* Key-value store for storing unstructured data and files
168
*/
169
class KeyValueStore {
170
/** Open an existing store or create a new one */
171
static open(idOrName?: string): Promise<KeyValueStore>;
172
173
/** Get the default key-value store */
174
static getDefaultKeyValueStore(): Promise<KeyValueStore>;
175
176
/** Set a value for a key */
177
setValue(key: string, value: any, options?: RecordOptions): Promise<void>;
178
179
/** Get a value by key */
180
getValue<T = any>(key: string): Promise<T | null>;
181
182
/** Get a public URL for a stored file */
183
getPublicUrl(key: string): string;
184
185
/** Delete a record */
186
delete(key: string): Promise<void>;
187
188
/** Get store information */
189
getInfo(): Promise<KeyValueStoreInfo>;
190
191
/** List all keys in the store */
192
listKeys(options?: ListKeysOptions): Promise<KeyValueStoreKeys>;
193
194
/** Delete the entire store */
195
drop(): Promise<void>;
196
197
/** The store ID */
198
readonly id: string;
199
200
/** The store name */
201
readonly name?: string;
202
}
203
```
204
205
### RecordOptions
206
207
Options for storing records in key-value stores.
208
209
```typescript { .api }
210
interface RecordOptions {
211
/** Content type of the stored data */
212
contentType?: string;
213
214
/** Whether to gzip the content */
215
gzip?: boolean;
216
217
/** Custom metadata */
218
metadata?: Dictionary;
219
}
220
```
221
222
**Usage Examples:**
223
224
```typescript
225
import { KeyValueStore, CheerioCrawler } from "crawlee";
226
227
const crawler = new CheerioCrawler({
228
requestHandler: async ({ request, $, setValue }) => {
229
// Save HTML content
230
await setValue(`html-${request.uniqueKey}`, $.html());
231
232
// Save screenshot (if using browser crawler)
233
await setValue(`screenshot-${request.uniqueKey}`, screenshotBuffer, {
234
contentType: 'image/png',
235
});
236
237
// Save structured data
238
await setValue(`metadata-${request.uniqueKey}`, {
239
title: $('title').text(),
240
description: $('meta[name="description"]').attr('content'),
241
extractedAt: new Date(),
242
});
243
244
// Save with custom options
245
await setValue('large-file', largeJsonData, {
246
gzip: true,
247
contentType: 'application/json',
248
});
249
},
250
});
251
252
// Work with key-value store after crawling
253
const store = await KeyValueStore.getDefaultKeyValueStore();
254
255
// Retrieve stored data
256
const htmlContent = await store.getValue('html-page-1');
257
const metadata = await store.getValue('metadata-page-1');
258
259
// List all keys
260
const { keys } = await store.listKeys({ prefix: 'screenshot-' });
261
console.log(`Found ${keys.length} screenshots`);
262
263
// Get public URLs for files
264
const screenshotUrl = store.getPublicUrl('screenshot-page-1');
265
console.log(`Screenshot available at: ${screenshotUrl}`);
266
267
// Clean up old data
268
for (const key of keys) {
269
if (key.includes('temp-')) {
270
await store.delete(key);
271
}
272
}
273
```
274
275
### RequestQueue
276
277
Request queues manage crawling requests in FIFO order with support for priorities, deduplication, and persistence.
278
279
```typescript { .api }
280
/**
281
* Request queue for managing crawling requests in FIFO order
282
*/
283
class RequestQueue {
284
/** Open an existing queue or create a new one */
285
static open(idOrName?: string): Promise<RequestQueue>;
286
287
/** Get the default request queue */
288
static getDefaultRequestQueue(): Promise<RequestQueue>;
289
290
/** Add a single request to the queue */
291
addRequest(request: RequestOptions | string, options?: RequestQueueOptions): Promise<QueueOperationInfo>;
292
293
/** Add multiple requests to the queue */
294
addRequests(requests: (RequestOptions | string)[], options?: RequestQueueOptions): Promise<BatchAddRequestsResult>;
295
296
/** Get the next request from the queue */
297
fetchNextRequest(): Promise<Request | null>;
298
299
/** Mark a request as handled */
300
markRequestHandled(request: Request): Promise<QueueOperationInfo>;
301
302
/** Return a request to the queue for retry */
303
reclaimRequest(request: Request, options?: ReclaimRequestOptions): Promise<QueueOperationInfo>;
304
305
/** Check if the queue is empty */
306
isEmpty(): Promise<boolean>;
307
308
/** Check if the queue is finished (no pending requests) */
309
isFinished(): Promise<boolean>;
310
311
/** Get queue information and statistics */
312
getInfo(): Promise<RequestQueueInfo>;
313
314
/** Delete the queue */
315
drop(): Promise<void>;
316
317
/** The queue ID */
318
readonly id: string;
319
320
/** The queue name */
321
readonly name?: string;
322
}
323
```
324
325
### RequestOptions
326
327
Options for creating requests to add to queues.
328
329
```typescript { .api }
330
interface RequestOptions<UserData = Dictionary> {
331
/** Request URL */
332
url: string;
333
334
/** Unique key for deduplication */
335
uniqueKey?: string;
336
337
/** HTTP method */
338
method?: HttpMethod;
339
340
/** Request payload for POST/PUT requests */
341
payload?: string;
342
343
/** HTTP headers */
344
headers?: Dictionary;
345
346
/** Custom user data */
347
userData?: UserData;
348
349
/** Request label for routing */
350
label?: string;
351
352
/** Whether to skip this request if it fails */
353
noRetry?: boolean;
354
355
/** Priority (higher numbers = higher priority) */
356
priority?: number;
357
358
/** Whether to keep URL fragments */
359
keepUrlFragment?: boolean;
360
}
361
```
362
363
**Usage Examples:**
364
365
```typescript
366
import { RequestQueue, BasicCrawler } from "crawlee";
367
368
// Initialize queue before starting crawler
369
const requestQueue = await RequestQueue.open('my-crawl-queue');
370
371
// Add initial requests
372
await requestQueue.addRequests([
373
'https://example.com/page1',
374
'https://example.com/page2',
375
{
376
url: 'https://example.com/api/data',
377
method: 'POST',
378
payload: JSON.stringify({ query: 'products' }),
379
headers: { 'Content-Type': 'application/json' },
380
label: 'API',
381
userData: { type: 'api-call' },
382
},
383
]);
384
385
const crawler = new BasicCrawler({
386
requestQueue,
387
requestHandler: async ({ request, enqueueLinks }) => {
388
if (request.label === 'API') {
389
// Handle API requests differently
390
console.log(`Processing API request: ${request.url}`);
391
return;
392
}
393
394
// Add more requests dynamically
395
await enqueueLinks({
396
selector: 'a[href]',
397
transformRequestFunction: (req) => ({
398
...req,
399
priority: req.url.includes('/important/') ? 10 : 1,
400
}),
401
});
402
},
403
});
404
405
// Monitor queue status
406
const info = await requestQueue.getInfo();
407
console.log(`Queue has ${info.totalRequestCount} total requests`);
408
console.log(`${info.handledRequestCount} handled, ${info.pendingRequestCount} pending`);
409
410
await crawler.run();
411
```
412
413
### RequestList
414
415
Request lists provide finite, static collections of requests for bounded crawling scenarios.
416
417
```typescript { .api }
418
/**
419
* Static list of requests for finite crawling scenarios
420
*/
421
class RequestList {
422
/** Create a new request list from URLs or request objects */
423
static open(sources: (string | RequestOptions)[], options?: RequestListOptions): Promise<RequestList>;
424
425
/** Get the next request from the list */
426
fetchNextRequest(): Promise<Request | null>;
427
428
/** Mark a request as handled */
429
markRequestHandled(request: Request): Promise<void>;
430
431
/** Return a request to the list for retry */
432
reclaimRequest(request: Request): Promise<void>;
433
434
/** Check if all requests have been processed */
435
isFinished(): Promise<boolean>;
436
437
/** Check if the list is empty */
438
isEmpty(): Promise<boolean>;
439
440
/** Get the total number of requests */
441
length(): number;
442
443
/** Get the number of handled requests */
444
handledCount(): number;
445
446
/** Get list information and statistics */
447
getState(): RequestListState;
448
}
449
```
450
451
### RequestListOptions
452
453
Options for creating request lists.
454
455
```typescript { .api }
456
interface RequestListOptions {
457
/** Whether to keep duplicates */
458
keepDuplicateUrls?: boolean;
459
460
/** Whether to check duplicates by URL only */
461
checksumOptions?: {
462
forceUrlEncoding?: boolean;
463
includeFragment?: boolean;
464
};
465
466
/** Custom unique key function */
467
uniqueKey?: (requestOptions: RequestOptions) => string;
468
469
/** Persist state to key-value store */
470
persistStateKey?: string;
471
472
/** Key-value store for persistence */
473
persistStateKeyValueStore?: KeyValueStore;
474
}
475
```
476
477
**Usage Examples:**
478
479
```typescript
480
import { RequestList, CheerioCrawler } from "crawlee";
481
482
// Create request list from mixed sources
483
const requestList = await RequestList.open([
484
'https://example.com/category/electronics',
485
'https://example.com/category/books',
486
{
487
url: 'https://example.com/category/clothing',
488
userData: { category: 'fashion' },
489
label: 'CATEGORY',
490
},
491
{
492
url: 'https://example.com/special-page',
493
priority: 10,
494
label: 'PRIORITY',
495
},
496
]);
497
498
const crawler = new CheerioCrawler({
499
requestList,
500
requestHandler: async ({ request, $ }) => {
501
console.log(`Processing ${request.label || 'page'}: ${request.url}`);
502
503
if (request.userData?.category) {
504
console.log(`Category: ${request.userData.category}`);
505
}
506
507
// Extract data specific to the page type
508
const data = {
509
url: request.loadedUrl,
510
title: $('title').text(),
511
timestamp: new Date(),
512
};
513
514
await Dataset.pushData(data);
515
},
516
517
// Don't add new requests - just process the static list
518
maxConcurrency: 5,
519
});
520
521
// Monitor progress
522
console.log(`Total requests: ${requestList.length()}`);
523
524
await crawler.run();
525
526
console.log(`Processed ${requestList.handledCount()} requests`);
527
console.log(`Finished: ${await requestList.isFinished()}`);
528
```
529
530
### Storage Management
531
532
Central management for all storage instances and their lifecycle.
533
534
```typescript { .api }
535
/**
536
* Manages storage instances and their lifecycle
537
*/
538
class StorageManager {
539
constructor(options?: StorageManagerOptions);
540
541
/** Get or create a dataset */
542
dataset(idOrName?: string): Promise<Dataset>;
543
544
/** Get or create a key-value store */
545
keyValueStore(idOrName?: string): Promise<KeyValueStore>;
546
547
/** Get or create a request queue */
548
requestQueue(idOrName?: string): Promise<RequestQueue>;
549
550
/** Close all storage instances */
551
closeAll(): Promise<void>;
552
553
/** List all storage instances by type */
554
listDatasets(): Promise<DatasetCollectionInfo>;
555
listKeyValueStores(): Promise<KeyValueStoreCollectionInfo>;
556
listRequestQueues(): Promise<RequestQueueCollectionInfo>;
557
}
558
559
interface StorageManagerOptions {
560
/** Storage client to use */
561
storageClient?: StorageClient;
562
563
/** Local data directory */
564
localDataDirectory?: string;
565
566
/** Whether to purge storage on startup */
567
purgeOnStart?: boolean;
568
}
569
```
570
571
### Storage Client Configuration
572
573
Low-level storage client for advanced storage operations.
574
575
```typescript { .api }
576
/**
577
* Low-level storage client for advanced operations
578
*/
579
interface StorageClient {
580
/** Dataset operations */
581
datasets(): DatasetClient;
582
583
/** Key-value store operations */
584
keyValueStores(): KeyValueStoreClient;
585
586
/** Request queue operations */
587
requestQueues(): RequestQueueClient;
588
589
/** Set storage options */
590
setOptions(options: StorageClientOptions): void;
591
}
592
593
interface StorageClientOptions {
594
/** Base URL for storage API */
595
baseUrl?: string;
596
597
/** Authentication token */
598
token?: string;
599
600
/** Default timeout for requests */
601
timeoutSecs?: number;
602
603
/** Maximum retries for failed requests */
604
maxRetries?: number;
605
}
606
```
607
608
### State Management
609
610
Advanced state management capabilities for persistent and recoverable crawler state.
611
612
```typescript { .api }
613
/**
614
* Class for managing persistent recoverable state
615
*/
616
class RecoverableState<TStateModel = Record<string, unknown>> {
617
constructor(options: RecoverableStateOptions<TStateModel>);
618
619
/** Initialize and load persisted state */
620
initialize(): Promise<TStateModel>;
621
622
/** Clean up resources and persist state */
623
teardown(): Promise<void>;
624
625
/** Get current state value */
626
get currentValue(): TStateModel;
627
628
/** Reset state to default values */
629
reset(): Promise<void>;
630
631
/** Manually persist current state */
632
persistState(eventData?: { isMigrating: boolean }): Promise<void>;
633
}
634
635
/**
636
* Simple state management with automatic persistence
637
*/
638
function useState<State extends Dictionary = Dictionary>(
639
name?: string,
640
defaultValue?: State,
641
options?: UseStateOptions
642
): Promise<AutoSavedValue<State>>;
643
644
/**
645
* Purge default storage directories
646
*/
647
function purgeDefaultStorages(options?: PurgeDefaultStorageOptions): Promise<void>;
648
function purgeDefaultStorages(config?: Configuration, client?: StorageClient): Promise<void>;
649
```
650
651
**Usage Examples:**
652
653
```typescript
654
import { RecoverableState, useState, Configuration } from "crawlee";
655
656
// Advanced recoverable state for complex crawler state
657
interface CrawlerState {
658
processedUrls: Set<string>;
659
categoryProgress: Record<string, number>;
660
lastCheckpoint: Date;
661
errorCounts: Record<string, number>;
662
}
663
664
const state = new RecoverableState<CrawlerState>({
665
defaultState: {
666
processedUrls: new Set<string>(),
667
categoryProgress: {},
668
lastCheckpoint: new Date(),
669
errorCounts: {},
670
},
671
persistStateKey: 'CRAWLER_STATE',
672
persistenceEnabled: true,
673
persistStateKvsName: 'crawler-checkpoints',
674
// Custom serialization for Set objects
675
serialize: (state) => JSON.stringify({
676
...state,
677
processedUrls: Array.from(state.processedUrls),
678
}),
679
deserialize: (json) => {
680
const parsed = JSON.parse(json);
681
return {
682
...parsed,
683
processedUrls: new Set(parsed.processedUrls),
684
lastCheckpoint: new Date(parsed.lastCheckpoint),
685
};
686
},
687
});
688
689
// Initialize state at crawler start
690
await state.initialize();
691
692
const crawler = new CheerioCrawler({
693
requestHandler: async ({ request, $, enqueueLinks }) => {
694
const currentState = state.currentValue;
695
696
// Skip if already processed
697
if (currentState.processedUrls.has(request.url)) {
698
return;
699
}
700
701
// Process page and update state
702
const category = extractCategory(request.url);
703
currentState.processedUrls.add(request.url);
704
currentState.categoryProgress[category] = (currentState.categoryProgress[category] || 0) + 1;
705
currentState.lastCheckpoint = new Date();
706
707
// Find and enqueue new links
708
await enqueueLinks({ selector: 'a[href]' });
709
710
console.log(`Processed ${currentState.processedUrls.size} URLs so far`);
711
},
712
713
failedRequestHandler: async ({ request }) => {
714
const currentState = state.currentValue;
715
const domain = new URL(request.url).hostname;
716
currentState.errorCounts[domain] = (currentState.errorCounts[domain] || 0) + 1;
717
},
718
});
719
720
// Clean up state on finish
721
crawler.teardown = async () => {
722
await state.teardown();
723
};
724
725
// Simple state management with useState
726
const simpleState = await useState('SIMPLE_CRAWLER_STATE', {
727
totalProcessed: 0,
728
startTime: new Date(),
729
categories: {} as Record<string, number>,
730
});
731
732
const simpleCrawler = new CheerioCrawler({
733
requestHandler: async ({ request }) => {
734
// useState automatically persists changes
735
simpleState.totalProcessed++;
736
737
const category = extractCategory(request.url);
738
simpleState.categories[category] = (simpleState.categories[category] || 0) + 1;
739
740
console.log(`Total processed: ${simpleState.totalProcessed}`);
741
},
742
});
743
744
// Clean up storage before starting (optional)
745
await purgeDefaultStorages({
746
onlyPurgeOnce: true,
747
});
748
```
749
750
### Storage Utilities
751
752
Additional utilities for storage management and cleanup.
753
754
```typescript { .api }
755
/**
756
* Get request ID from unique key for local storage
757
*/
758
function getRequestId(uniqueKey: string): string;
759
```
760
761
**Usage Examples:**
762
763
```typescript
764
import { getRequestId, RequestQueue } from "crawlee";
765
766
// Create consistent request IDs for caching
767
const url = "https://example.com/page";
768
const uniqueKey = `${url}-${Date.now()}`;
769
const requestId = getRequestId(uniqueKey);
770
771
console.log(`Request ID: ${requestId}`); // e.g., "a1b2c3d4e5f6g7h"
772
773
// Use in custom request queue implementations
774
class CustomRequestQueue extends RequestQueue {
775
private cache = new Map<string, any>();
776
777
async addRequest(request: RequestOptions) {
778
const id = getRequestId(request.uniqueKey || request.url);
779
780
// Check cache before adding
781
if (this.cache.has(id)) {
782
console.log('Request already cached');
783
return;
784
}
785
786
const result = await super.addRequest(request);
787
this.cache.set(id, result);
788
789
return result;
790
}
791
}
792
```
793
794
## Types
795
796
```typescript { .api }
797
interface DatasetInfo {
798
/** Dataset ID */
799
id: string;
800
801
/** Dataset name */
802
name?: string;
803
804
/** Creation time */
805
createdAt: Date;
806
807
/** Last modification time */
808
modifiedAt: Date;
809
810
/** Number of items in dataset */
811
itemCount: number;
812
813
/** Total size in bytes */
814
cleanItemCount: number;
815
}
816
817
interface DatasetData<T = Dictionary> {
818
/** Array of data items */
819
items: T[];
820
821
/** Total number of items available */
822
total: number;
823
824
/** Number of items in this response */
825
count: number;
826
827
/** Starting offset of items */
828
offset: number;
829
830
/** Maximum items per response */
831
limit: number;
832
}
833
834
interface KeyValueStoreInfo {
835
/** Store ID */
836
id: string;
837
838
/** Store name */
839
name?: string;
840
841
/** Creation time */
842
createdAt: Date;
843
844
/** Last modification time */
845
modifiedAt: Date;
846
}
847
848
interface KeyValueStoreKeys {
849
/** Array of key information */
850
keys: Array<{
851
key: string;
852
size: number;
853
contentType?: string;
854
modifiedAt: Date;
855
}>;
856
857
/** Total number of keys */
858
total: number;
859
860
/** Number of keys in this response */
861
count: number;
862
863
/** Starting offset */
864
offset: number;
865
866
/** Maximum keys per response */
867
limit: number;
868
869
/** Whether there are more keys */
870
isTruncated: boolean;
871
}
872
873
interface RequestQueueInfo {
874
/** Queue ID */
875
id: string;
876
877
/** Queue name */
878
name?: string;
879
880
/** Creation time */
881
createdAt: Date;
882
883
/** Last modification time */
884
modifiedAt: Date;
885
886
/** Total number of requests ever added */
887
totalRequestCount: number;
888
889
/** Number of handled requests */
890
handledRequestCount: number;
891
892
/** Number of pending requests */
893
pendingRequestCount: number;
894
}
895
896
interface QueueOperationInfo {
897
/** Request ID */
898
requestId: string;
899
900
/** Whether this was a new request */
901
wasAlreadyHandled: boolean;
902
903
/** Whether this was already present */
904
wasAlreadyPresent: boolean;
905
906
/** Unique key of the request */
907
uniqueKey: string;
908
}
909
910
interface BatchAddRequestsResult {
911
/** Requests that were added */
912
addedRequests: QueueOperationInfo[];
913
914
/** Requests that were already present */
915
existingRequests: QueueOperationInfo[];
916
917
/** Number of processed requests */
918
processedRequests: number;
919
920
/** Number of unprocessed requests */
921
unprocessedRequests: number;
922
}
923
924
interface RequestListState {
925
/** Total number of requests */
926
total: number;
927
928
/** Number of finished requests */
929
finished: number;
930
931
/** Number of pending requests */
932
pending: number;
933
934
/** Number of reclaimed requests */
935
reclaimed: number;
936
937
/** List of finished request IDs */
938
finishedRequestIds: string[];
939
940
/** List of reclaimed request IDs */
941
reclaimedRequestIds: string[];
942
}
943
944
interface ReclaimRequestOptions {
945
/** Whether to put the request in front of the queue */
946
forefront?: boolean;
947
}
948
949
interface ListKeysOptions {
950
/** Maximum number of keys to return */
951
limit?: number;
952
953
/** Prefix to filter keys */
954
prefix?: string;
955
956
/** Starting position for pagination */
957
offset?: number;
958
}
959
960
interface DatasetStreamOptions {
961
/** Number of items to stream */
962
limit?: number;
963
964
/** Starting offset */
965
offset?: number;
966
967
/** Whether to return clean JSON */
968
clean?: boolean;
969
}
970
971
type HttpMethod = 'GET' | 'POST' | 'PUT' | 'DELETE' | 'HEAD' | 'OPTIONS' | 'PATCH';
972
973
interface RecoverableStateOptions<TStateModel = Record<string, unknown>> {
974
/** Default state used if no persisted state is found */
975
defaultState: TStateModel;
976
977
/** The key under which the state is stored */
978
persistStateKey: string;
979
980
/** Flag to enable or disable state persistence */
981
persistenceEnabled?: boolean;
982
983
/** KeyValueStore name for persistence */
984
persistStateKvsName?: string;
985
986
/** KeyValueStore ID for persistence */
987
persistStateKvsId?: string;
988
989
/** Logger instance */
990
logger?: Log;
991
992
/** Configuration instance */
993
config?: Configuration;
994
995
/** Custom serialization function */
996
serialize?: (state: TStateModel) => string;
997
998
/** Custom deserialization function */
999
deserialize?: (serializedState: string) => TStateModel;
1000
}
1001
1002
interface UseStateOptions {
1003
/** Configuration instance */
1004
config?: Configuration;
1005
1006
/** KeyValueStore name for state storage */
1007
keyValueStoreName?: string | null;
1008
}
1009
1010
interface PurgeDefaultStorageOptions {
1011
/** If true, purge only once per execution */
1012
onlyPurgeOnce?: boolean;
1013
1014
/** Configuration instance */
1015
config?: Configuration;
1016
1017
/** Storage client instance */
1018
client?: StorageClient;
1019
}
1020
1021
interface AutoSavedValue<T> extends T {
1022
/** Manually save the current state */
1023
save(): Promise<void>;
1024
1025
/** Reset to initial value */
1026
reset(): Promise<void>;
1027
}
1028
```