or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

browser-crawling.mdconfiguration-proxies.mdcore-crawling.mdhttp-crawling.mdindex.mdsession-management.mdstorage.mdutilities.md

storage.mddocs/

0

# Storage

1

2

Crawlee provides comprehensive storage solutions for persisting scraped data, managing request queues, and handling key-value storage. The storage system supports both cloud-based and local storage backends.

3

4

## Capabilities

5

6

### Dataset

7

8

Datasets store structured data in JSON format, ideal for storing scraped results and enabling easy export to various formats.

9

10

```typescript { .api }

11

/**

12

* Dataset for storing structured data (JSON objects)

13

*/

14

class Dataset {

15

/** Open an existing dataset or create a new one */

16

static open(idOrName?: string): Promise<Dataset>;

17

18

/** Get the default dataset instance */

19

static getDefaultDataset(): Promise<Dataset>;

20

21

/** Push data to the dataset */

22

pushData(data: Dictionary | Dictionary[]): Promise<void>;

23

24

/** Get data from the dataset */

25

getData(options?: DatasetDataOptions): Promise<DatasetData>;

26

27

/** Get dataset information */

28

getInfo(): Promise<DatasetInfo>;

29

30

/** Export dataset to various formats */

31

exportTo(options: DatasetExportOptions): Promise<void>;

32

33

/** Delete the dataset */

34

drop(): Promise<void>;

35

36

/** Convert dataset to a stream */

37

stream(options?: DatasetStreamOptions): NodeJS.ReadableStream;

38

39

/** The dataset ID */

40

readonly id: string;

41

42

/** The dataset name */

43

readonly name?: string;

44

}

45

```

46

47

### DatasetDataOptions

48

49

Options for retrieving data from datasets.

50

51

```typescript { .api }

52

interface DatasetDataOptions {

53

/** Number of items to retrieve */

54

limit?: number;

55

56

/** Number of items to skip */

57

offset?: number;

58

59

/** Whether to return data in clean JSON format */

60

clean?: boolean;

61

62

/** Fields to include in results */

63

fields?: string[];

64

65

/** Whether to return data in descending order */

66

desc?: boolean;

67

68

/** JSON streaming options */

69

streaming?: boolean;

70

}

71

```

72

73

### DatasetExportOptions

74

75

Options for exporting dataset data to different formats.

76

77

```typescript { .api }

78

interface DatasetExportOptions {

79

/** Format to export to */

80

format: 'json' | 'csv' | 'xlsx' | 'xml' | 'rss';

81

82

/** Fields to include in export */

83

fields?: string[];

84

85

/** Whether to exclude empty fields */

86

omitEmptyFields?: boolean;

87

88

/** Whether to exclude null values */

89

omitNullValues?: boolean;

90

91

/** Key to store the exported file under */

92

key: string;

93

94

/** Key-value store to save to */

95

keyValueStore?: KeyValueStore;

96

97

/** Maximum number of items to export */

98

limit?: number;

99

100

/** Number of items to skip */

101

offset?: number;

102

}

103

```

104

105

**Usage Examples:**

106

107

```typescript

108

import { Dataset, CheerioCrawler } from "crawlee";

109

110

const crawler = new CheerioCrawler({

111

requestHandler: async ({ $, request, pushData }) => {

112

// Extract product data

113

const products = [];

114

$('.product').each((_, element) => {

115

products.push({

116

name: $(element).find('.name').text().trim(),

117

price: $(element).find('.price').text().trim(),

118

url: new URL($(element).find('a').attr('href'), request.loadedUrl).href,

119

extractedAt: new Date().toISOString(),

120

});

121

});

122

123

// Save to default dataset

124

await pushData(products);

125

},

126

});

127

128

await crawler.run();

129

130

// Work with the dataset after crawling

131

const dataset = await Dataset.getDefaultDataset();

132

133

// Get all data

134

const { items } = await dataset.getData();

135

console.log(`Extracted ${items.length} products`);

136

137

// Export to CSV

138

await dataset.exportTo({

139

format: 'csv',

140

key: 'products.csv',

141

fields: ['name', 'price', 'url'],

142

});

143

144

// Get specific data with filtering

145

const recentItems = await dataset.getData({

146

limit: 100,

147

clean: true,

148

desc: true,

149

});

150

151

// Stream large datasets

152

const stream = dataset.stream({

153

limit: 10000,

154

});

155

156

stream.on('data', (item) => {

157

console.log('Processing item:', item.name);

158

});

159

```

160

161

### KeyValueStore

162

163

Key-value stores handle unstructured data, files, and configuration storage with support for various data formats.

164

165

```typescript { .api }

166

/**

167

* Key-value store for storing unstructured data and files

168

*/

169

class KeyValueStore {

170

/** Open an existing store or create a new one */

171

static open(idOrName?: string): Promise<KeyValueStore>;

172

173

/** Get the default key-value store */

174

static getDefaultKeyValueStore(): Promise<KeyValueStore>;

175

176

/** Set a value for a key */

177

setValue(key: string, value: any, options?: RecordOptions): Promise<void>;

178

179

/** Get a value by key */

180

getValue<T = any>(key: string): Promise<T | null>;

181

182

/** Get a public URL for a stored file */

183

getPublicUrl(key: string): string;

184

185

/** Delete a record */

186

delete(key: string): Promise<void>;

187

188

/** Get store information */

189

getInfo(): Promise<KeyValueStoreInfo>;

190

191

/** List all keys in the store */

192

listKeys(options?: ListKeysOptions): Promise<KeyValueStoreKeys>;

193

194

/** Delete the entire store */

195

drop(): Promise<void>;

196

197

/** The store ID */

198

readonly id: string;

199

200

/** The store name */

201

readonly name?: string;

202

}

203

```

204

205

### RecordOptions

206

207

Options for storing records in key-value stores.

208

209

```typescript { .api }

210

interface RecordOptions {

211

/** Content type of the stored data */

212

contentType?: string;

213

214

/** Whether to gzip the content */

215

gzip?: boolean;

216

217

/** Custom metadata */

218

metadata?: Dictionary;

219

}

220

```

221

222

**Usage Examples:**

223

224

```typescript

225

import { KeyValueStore, CheerioCrawler } from "crawlee";

226

227

const crawler = new CheerioCrawler({

228

requestHandler: async ({ request, $, setValue }) => {

229

// Save HTML content

230

await setValue(`html-${request.uniqueKey}`, $.html());

231

232

// Save screenshot (if using browser crawler)

233

await setValue(`screenshot-${request.uniqueKey}`, screenshotBuffer, {

234

contentType: 'image/png',

235

});

236

237

// Save structured data

238

await setValue(`metadata-${request.uniqueKey}`, {

239

title: $('title').text(),

240

description: $('meta[name="description"]').attr('content'),

241

extractedAt: new Date(),

242

});

243

244

// Save with custom options

245

await setValue('large-file', largeJsonData, {

246

gzip: true,

247

contentType: 'application/json',

248

});

249

},

250

});

251

252

// Work with key-value store after crawling

253

const store = await KeyValueStore.getDefaultKeyValueStore();

254

255

// Retrieve stored data

256

const htmlContent = await store.getValue('html-page-1');

257

const metadata = await store.getValue('metadata-page-1');

258

259

// List all keys

260

const { keys } = await store.listKeys({ prefix: 'screenshot-' });

261

console.log(`Found ${keys.length} screenshots`);

262

263

// Get public URLs for files

264

const screenshotUrl = store.getPublicUrl('screenshot-page-1');

265

console.log(`Screenshot available at: ${screenshotUrl}`);

266

267

// Clean up old data

268

for (const key of keys) {

269

if (key.includes('temp-')) {

270

await store.delete(key);

271

}

272

}

273

```

274

275

### RequestQueue

276

277

Request queues manage crawling requests in FIFO order with support for priorities, deduplication, and persistence.

278

279

```typescript { .api }

280

/**

281

* Request queue for managing crawling requests in FIFO order

282

*/

283

class RequestQueue {

284

/** Open an existing queue or create a new one */

285

static open(idOrName?: string): Promise<RequestQueue>;

286

287

/** Get the default request queue */

288

static getDefaultRequestQueue(): Promise<RequestQueue>;

289

290

/** Add a single request to the queue */

291

addRequest(request: RequestOptions | string, options?: RequestQueueOptions): Promise<QueueOperationInfo>;

292

293

/** Add multiple requests to the queue */

294

addRequests(requests: (RequestOptions | string)[], options?: RequestQueueOptions): Promise<BatchAddRequestsResult>;

295

296

/** Get the next request from the queue */

297

fetchNextRequest(): Promise<Request | null>;

298

299

/** Mark a request as handled */

300

markRequestHandled(request: Request): Promise<QueueOperationInfo>;

301

302

/** Return a request to the queue for retry */

303

reclaimRequest(request: Request, options?: ReclaimRequestOptions): Promise<QueueOperationInfo>;

304

305

/** Check if the queue is empty */

306

isEmpty(): Promise<boolean>;

307

308

/** Check if the queue is finished (no pending requests) */

309

isFinished(): Promise<boolean>;

310

311

/** Get queue information and statistics */

312

getInfo(): Promise<RequestQueueInfo>;

313

314

/** Delete the queue */

315

drop(): Promise<void>;

316

317

/** The queue ID */

318

readonly id: string;

319

320

/** The queue name */

321

readonly name?: string;

322

}

323

```

324

325

### RequestOptions

326

327

Options for creating requests to add to queues.

328

329

```typescript { .api }

330

interface RequestOptions<UserData = Dictionary> {

331

/** Request URL */

332

url: string;

333

334

/** Unique key for deduplication */

335

uniqueKey?: string;

336

337

/** HTTP method */

338

method?: HttpMethod;

339

340

/** Request payload for POST/PUT requests */

341

payload?: string;

342

343

/** HTTP headers */

344

headers?: Dictionary;

345

346

/** Custom user data */

347

userData?: UserData;

348

349

/** Request label for routing */

350

label?: string;

351

352

/** Whether to skip this request if it fails */

353

noRetry?: boolean;

354

355

/** Priority (higher numbers = higher priority) */

356

priority?: number;

357

358

/** Whether to keep URL fragments */

359

keepUrlFragment?: boolean;

360

}

361

```

362

363

**Usage Examples:**

364

365

```typescript

366

import { RequestQueue, BasicCrawler } from "crawlee";

367

368

// Initialize queue before starting crawler

369

const requestQueue = await RequestQueue.open('my-crawl-queue');

370

371

// Add initial requests

372

await requestQueue.addRequests([

373

'https://example.com/page1',

374

'https://example.com/page2',

375

{

376

url: 'https://example.com/api/data',

377

method: 'POST',

378

payload: JSON.stringify({ query: 'products' }),

379

headers: { 'Content-Type': 'application/json' },

380

label: 'API',

381

userData: { type: 'api-call' },

382

},

383

]);

384

385

const crawler = new BasicCrawler({

386

requestQueue,

387

requestHandler: async ({ request, enqueueLinks }) => {

388

if (request.label === 'API') {

389

// Handle API requests differently

390

console.log(`Processing API request: ${request.url}`);

391

return;

392

}

393

394

// Add more requests dynamically

395

await enqueueLinks({

396

selector: 'a[href]',

397

transformRequestFunction: (req) => ({

398

...req,

399

priority: req.url.includes('/important/') ? 10 : 1,

400

}),

401

});

402

},

403

});

404

405

// Monitor queue status

406

const info = await requestQueue.getInfo();

407

console.log(`Queue has ${info.totalRequestCount} total requests`);

408

console.log(`${info.handledRequestCount} handled, ${info.pendingRequestCount} pending`);

409

410

await crawler.run();

411

```

412

413

### RequestList

414

415

Request lists provide finite, static collections of requests for bounded crawling scenarios.

416

417

```typescript { .api }

418

/**

419

* Static list of requests for finite crawling scenarios

420

*/

421

class RequestList {

422

/** Create a new request list from URLs or request objects */

423

static open(sources: (string | RequestOptions)[], options?: RequestListOptions): Promise<RequestList>;

424

425

/** Get the next request from the list */

426

fetchNextRequest(): Promise<Request | null>;

427

428

/** Mark a request as handled */

429

markRequestHandled(request: Request): Promise<void>;

430

431

/** Return a request to the list for retry */

432

reclaimRequest(request: Request): Promise<void>;

433

434

/** Check if all requests have been processed */

435

isFinished(): Promise<boolean>;

436

437

/** Check if the list is empty */

438

isEmpty(): Promise<boolean>;

439

440

/** Get the total number of requests */

441

length(): number;

442

443

/** Get the number of handled requests */

444

handledCount(): number;

445

446

/** Get list information and statistics */

447

getState(): RequestListState;

448

}

449

```

450

451

### RequestListOptions

452

453

Options for creating request lists.

454

455

```typescript { .api }

456

interface RequestListOptions {

457

/** Whether to keep duplicates */

458

keepDuplicateUrls?: boolean;

459

460

/** Whether to check duplicates by URL only */

461

checksumOptions?: {

462

forceUrlEncoding?: boolean;

463

includeFragment?: boolean;

464

};

465

466

/** Custom unique key function */

467

uniqueKey?: (requestOptions: RequestOptions) => string;

468

469

/** Persist state to key-value store */

470

persistStateKey?: string;

471

472

/** Key-value store for persistence */

473

persistStateKeyValueStore?: KeyValueStore;

474

}

475

```

476

477

**Usage Examples:**

478

479

```typescript

480

import { RequestList, CheerioCrawler } from "crawlee";

481

482

// Create request list from mixed sources

483

const requestList = await RequestList.open([

484

'https://example.com/category/electronics',

485

'https://example.com/category/books',

486

{

487

url: 'https://example.com/category/clothing',

488

userData: { category: 'fashion' },

489

label: 'CATEGORY',

490

},

491

{

492

url: 'https://example.com/special-page',

493

priority: 10,

494

label: 'PRIORITY',

495

},

496

]);

497

498

const crawler = new CheerioCrawler({

499

requestList,

500

requestHandler: async ({ request, $ }) => {

501

console.log(`Processing ${request.label || 'page'}: ${request.url}`);

502

503

if (request.userData?.category) {

504

console.log(`Category: ${request.userData.category}`);

505

}

506

507

// Extract data specific to the page type

508

const data = {

509

url: request.loadedUrl,

510

title: $('title').text(),

511

timestamp: new Date(),

512

};

513

514

await Dataset.pushData(data);

515

},

516

517

// Don't add new requests - just process the static list

518

maxConcurrency: 5,

519

});

520

521

// Monitor progress

522

console.log(`Total requests: ${requestList.length()}`);

523

524

await crawler.run();

525

526

console.log(`Processed ${requestList.handledCount()} requests`);

527

console.log(`Finished: ${await requestList.isFinished()}`);

528

```

529

530

### Storage Management

531

532

Central management for all storage instances and their lifecycle.

533

534

```typescript { .api }

535

/**

536

* Manages storage instances and their lifecycle

537

*/

538

class StorageManager {

539

constructor(options?: StorageManagerOptions);

540

541

/** Get or create a dataset */

542

dataset(idOrName?: string): Promise<Dataset>;

543

544

/** Get or create a key-value store */

545

keyValueStore(idOrName?: string): Promise<KeyValueStore>;

546

547

/** Get or create a request queue */

548

requestQueue(idOrName?: string): Promise<RequestQueue>;

549

550

/** Close all storage instances */

551

closeAll(): Promise<void>;

552

553

/** List all storage instances by type */

554

listDatasets(): Promise<DatasetCollectionInfo>;

555

listKeyValueStores(): Promise<KeyValueStoreCollectionInfo>;

556

listRequestQueues(): Promise<RequestQueueCollectionInfo>;

557

}

558

559

interface StorageManagerOptions {

560

/** Storage client to use */

561

storageClient?: StorageClient;

562

563

/** Local data directory */

564

localDataDirectory?: string;

565

566

/** Whether to purge storage on startup */

567

purgeOnStart?: boolean;

568

}

569

```

570

571

### Storage Client Configuration

572

573

Low-level storage client for advanced storage operations.

574

575

```typescript { .api }

576

/**

577

* Low-level storage client for advanced operations

578

*/

579

interface StorageClient {

580

/** Dataset operations */

581

datasets(): DatasetClient;

582

583

/** Key-value store operations */

584

keyValueStores(): KeyValueStoreClient;

585

586

/** Request queue operations */

587

requestQueues(): RequestQueueClient;

588

589

/** Set storage options */

590

setOptions(options: StorageClientOptions): void;

591

}

592

593

interface StorageClientOptions {

594

/** Base URL for storage API */

595

baseUrl?: string;

596

597

/** Authentication token */

598

token?: string;

599

600

/** Default timeout for requests */

601

timeoutSecs?: number;

602

603

/** Maximum retries for failed requests */

604

maxRetries?: number;

605

}

606

```

607

608

### State Management

609

610

Advanced state management capabilities for persistent and recoverable crawler state.

611

612

```typescript { .api }

613

/**

614

* Class for managing persistent recoverable state

615

*/

616

class RecoverableState<TStateModel = Record<string, unknown>> {

617

constructor(options: RecoverableStateOptions<TStateModel>);

618

619

/** Initialize and load persisted state */

620

initialize(): Promise<TStateModel>;

621

622

/** Clean up resources and persist state */

623

teardown(): Promise<void>;

624

625

/** Get current state value */

626

get currentValue(): TStateModel;

627

628

/** Reset state to default values */

629

reset(): Promise<void>;

630

631

/** Manually persist current state */

632

persistState(eventData?: { isMigrating: boolean }): Promise<void>;

633

}

634

635

/**

636

* Simple state management with automatic persistence

637

*/

638

function useState<State extends Dictionary = Dictionary>(

639

name?: string,

640

defaultValue?: State,

641

options?: UseStateOptions

642

): Promise<AutoSavedValue<State>>;

643

644

/**

645

* Purge default storage directories

646

*/

647

function purgeDefaultStorages(options?: PurgeDefaultStorageOptions): Promise<void>;

648

function purgeDefaultStorages(config?: Configuration, client?: StorageClient): Promise<void>;

649

```

650

651

**Usage Examples:**

652

653

```typescript

654

import { RecoverableState, useState, Configuration } from "crawlee";

655

656

// Advanced recoverable state for complex crawler state

657

interface CrawlerState {

658

processedUrls: Set<string>;

659

categoryProgress: Record<string, number>;

660

lastCheckpoint: Date;

661

errorCounts: Record<string, number>;

662

}

663

664

const state = new RecoverableState<CrawlerState>({

665

defaultState: {

666

processedUrls: new Set<string>(),

667

categoryProgress: {},

668

lastCheckpoint: new Date(),

669

errorCounts: {},

670

},

671

persistStateKey: 'CRAWLER_STATE',

672

persistenceEnabled: true,

673

persistStateKvsName: 'crawler-checkpoints',

674

// Custom serialization for Set objects

675

serialize: (state) => JSON.stringify({

676

...state,

677

processedUrls: Array.from(state.processedUrls),

678

}),

679

deserialize: (json) => {

680

const parsed = JSON.parse(json);

681

return {

682

...parsed,

683

processedUrls: new Set(parsed.processedUrls),

684

lastCheckpoint: new Date(parsed.lastCheckpoint),

685

};

686

},

687

});

688

689

// Initialize state at crawler start

690

await state.initialize();

691

692

const crawler = new CheerioCrawler({

693

requestHandler: async ({ request, $, enqueueLinks }) => {

694

const currentState = state.currentValue;

695

696

// Skip if already processed

697

if (currentState.processedUrls.has(request.url)) {

698

return;

699

}

700

701

// Process page and update state

702

const category = extractCategory(request.url);

703

currentState.processedUrls.add(request.url);

704

currentState.categoryProgress[category] = (currentState.categoryProgress[category] || 0) + 1;

705

currentState.lastCheckpoint = new Date();

706

707

// Find and enqueue new links

708

await enqueueLinks({ selector: 'a[href]' });

709

710

console.log(`Processed ${currentState.processedUrls.size} URLs so far`);

711

},

712

713

failedRequestHandler: async ({ request }) => {

714

const currentState = state.currentValue;

715

const domain = new URL(request.url).hostname;

716

currentState.errorCounts[domain] = (currentState.errorCounts[domain] || 0) + 1;

717

},

718

});

719

720

// Clean up state on finish

721

crawler.teardown = async () => {

722

await state.teardown();

723

};

724

725

// Simple state management with useState

726

const simpleState = await useState('SIMPLE_CRAWLER_STATE', {

727

totalProcessed: 0,

728

startTime: new Date(),

729

categories: {} as Record<string, number>,

730

});

731

732

const simpleCrawler = new CheerioCrawler({

733

requestHandler: async ({ request }) => {

734

// useState automatically persists changes

735

simpleState.totalProcessed++;

736

737

const category = extractCategory(request.url);

738

simpleState.categories[category] = (simpleState.categories[category] || 0) + 1;

739

740

console.log(`Total processed: ${simpleState.totalProcessed}`);

741

},

742

});

743

744

// Clean up storage before starting (optional)

745

await purgeDefaultStorages({

746

onlyPurgeOnce: true,

747

});

748

```

749

750

### Storage Utilities

751

752

Additional utilities for storage management and cleanup.

753

754

```typescript { .api }

755

/**

756

* Get request ID from unique key for local storage

757

*/

758

function getRequestId(uniqueKey: string): string;

759

```

760

761

**Usage Examples:**

762

763

```typescript

764

import { getRequestId, RequestQueue } from "crawlee";

765

766

// Create consistent request IDs for caching

767

const url = "https://example.com/page";

768

const uniqueKey = `${url}-${Date.now()}`;

769

const requestId = getRequestId(uniqueKey);

770

771

console.log(`Request ID: ${requestId}`); // e.g., "a1b2c3d4e5f6g7h"

772

773

// Use in custom request queue implementations

774

class CustomRequestQueue extends RequestQueue {

775

private cache = new Map<string, any>();

776

777

async addRequest(request: RequestOptions) {

778

const id = getRequestId(request.uniqueKey || request.url);

779

780

// Check cache before adding

781

if (this.cache.has(id)) {

782

console.log('Request already cached');

783

return;

784

}

785

786

const result = await super.addRequest(request);

787

this.cache.set(id, result);

788

789

return result;

790

}

791

}

792

```

793

794

## Types

795

796

```typescript { .api }

797

interface DatasetInfo {

798

/** Dataset ID */

799

id: string;

800

801

/** Dataset name */

802

name?: string;

803

804

/** Creation time */

805

createdAt: Date;

806

807

/** Last modification time */

808

modifiedAt: Date;

809

810

/** Number of items in dataset */

811

itemCount: number;

812

813

/** Total size in bytes */

814

cleanItemCount: number;

815

}

816

817

interface DatasetData<T = Dictionary> {

818

/** Array of data items */

819

items: T[];

820

821

/** Total number of items available */

822

total: number;

823

824

/** Number of items in this response */

825

count: number;

826

827

/** Starting offset of items */

828

offset: number;

829

830

/** Maximum items per response */

831

limit: number;

832

}

833

834

interface KeyValueStoreInfo {

835

/** Store ID */

836

id: string;

837

838

/** Store name */

839

name?: string;

840

841

/** Creation time */

842

createdAt: Date;

843

844

/** Last modification time */

845

modifiedAt: Date;

846

}

847

848

interface KeyValueStoreKeys {

849

/** Array of key information */

850

keys: Array<{

851

key: string;

852

size: number;

853

contentType?: string;

854

modifiedAt: Date;

855

}>;

856

857

/** Total number of keys */

858

total: number;

859

860

/** Number of keys in this response */

861

count: number;

862

863

/** Starting offset */

864

offset: number;

865

866

/** Maximum keys per response */

867

limit: number;

868

869

/** Whether there are more keys */

870

isTruncated: boolean;

871

}

872

873

interface RequestQueueInfo {

874

/** Queue ID */

875

id: string;

876

877

/** Queue name */

878

name?: string;

879

880

/** Creation time */

881

createdAt: Date;

882

883

/** Last modification time */

884

modifiedAt: Date;

885

886

/** Total number of requests ever added */

887

totalRequestCount: number;

888

889

/** Number of handled requests */

890

handledRequestCount: number;

891

892

/** Number of pending requests */

893

pendingRequestCount: number;

894

}

895

896

interface QueueOperationInfo {

897

/** Request ID */

898

requestId: string;

899

900

/** Whether this was a new request */

901

wasAlreadyHandled: boolean;

902

903

/** Whether this was already present */

904

wasAlreadyPresent: boolean;

905

906

/** Unique key of the request */

907

uniqueKey: string;

908

}

909

910

interface BatchAddRequestsResult {

911

/** Requests that were added */

912

addedRequests: QueueOperationInfo[];

913

914

/** Requests that were already present */

915

existingRequests: QueueOperationInfo[];

916

917

/** Number of processed requests */

918

processedRequests: number;

919

920

/** Number of unprocessed requests */

921

unprocessedRequests: number;

922

}

923

924

interface RequestListState {

925

/** Total number of requests */

926

total: number;

927

928

/** Number of finished requests */

929

finished: number;

930

931

/** Number of pending requests */

932

pending: number;

933

934

/** Number of reclaimed requests */

935

reclaimed: number;

936

937

/** List of finished request IDs */

938

finishedRequestIds: string[];

939

940

/** List of reclaimed request IDs */

941

reclaimedRequestIds: string[];

942

}

943

944

interface ReclaimRequestOptions {

945

/** Whether to put the request in front of the queue */

946

forefront?: boolean;

947

}

948

949

interface ListKeysOptions {

950

/** Maximum number of keys to return */

951

limit?: number;

952

953

/** Prefix to filter keys */

954

prefix?: string;

955

956

/** Starting position for pagination */

957

offset?: number;

958

}

959

960

interface DatasetStreamOptions {

961

/** Number of items to stream */

962

limit?: number;

963

964

/** Starting offset */

965

offset?: number;

966

967

/** Whether to return clean JSON */

968

clean?: boolean;

969

}

970

971

type HttpMethod = 'GET' | 'POST' | 'PUT' | 'DELETE' | 'HEAD' | 'OPTIONS' | 'PATCH';

972

973

interface RecoverableStateOptions<TStateModel = Record<string, unknown>> {

974

/** Default state used if no persisted state is found */

975

defaultState: TStateModel;

976

977

/** The key under which the state is stored */

978

persistStateKey: string;

979

980

/** Flag to enable or disable state persistence */

981

persistenceEnabled?: boolean;

982

983

/** KeyValueStore name for persistence */

984

persistStateKvsName?: string;

985

986

/** KeyValueStore ID for persistence */

987

persistStateKvsId?: string;

988

989

/** Logger instance */

990

logger?: Log;

991

992

/** Configuration instance */

993

config?: Configuration;

994

995

/** Custom serialization function */

996

serialize?: (state: TStateModel) => string;

997

998

/** Custom deserialization function */

999

deserialize?: (serializedState: string) => TStateModel;

1000

}

1001

1002

interface UseStateOptions {

1003

/** Configuration instance */

1004

config?: Configuration;

1005

1006

/** KeyValueStore name for state storage */

1007

keyValueStoreName?: string | null;

1008

}

1009

1010

interface PurgeDefaultStorageOptions {

1011

/** If true, purge only once per execution */

1012

onlyPurgeOnce?: boolean;

1013

1014

/** Configuration instance */

1015

config?: Configuration;

1016

1017

/** Storage client instance */

1018

client?: StorageClient;

1019

}

1020

1021

interface AutoSavedValue<T> extends T {

1022

/** Manually save the current state */

1023

save(): Promise<void>;

1024

1025

/** Reset to initial value */

1026

reset(): Promise<void>;

1027

}

1028

```