or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

account-management.mdcli-interface.mdconfiguration-auth.mdfile-management.mdindex.mditem-operations.mdmetadata-operations.mdsearch-operations.mdsession-management.mdtask-management.md

item-operations.mddocs/

0

# Item Operations

1

2

Item operations provide comprehensive access to Archive.org items, including retrieval, download, upload, and management of items and their files.

3

4

## Capabilities

5

6

### Item Retrieval

7

8

Get Item objects to access metadata, files, and perform operations on Archive.org items.

9

10

```python { .api }

11

def get_item(identifier, config=None, config_file=None, archive_session=None, debug=False, http_adapter_kwargs=None, request_kwargs=None):

12

"""

13

Get an Item object by Archive.org identifier.

14

15

Args:

16

identifier (str): The globally unique Archive.org item identifier

17

config (dict, optional): Configuration dictionary for session creation

18

config_file (str, optional): Path to configuration file

19

archive_session (ArchiveSession, optional): Existing session object to use

20

debug (bool): Enable debug logging

21

http_adapter_kwargs (dict, optional): HTTP adapter keyword arguments

22

request_kwargs (dict, optional): Additional request arguments

23

24

Returns:

25

Item: Item object for the specified identifier (or Collection if item is a collection)

26

27

Raises:

28

ItemLocateError: If item cannot be located or is dark

29

"""

30

31

class Item:

32

"""

33

Represents an Archive.org item with metadata, files, and operations.

34

"""

35

36

def __init__(self, archive_session, identifier, item_metadata=None):

37

"""

38

Initialize Item object.

39

40

Args:

41

archive_session (ArchiveSession): Session object

42

identifier (str): Item identifier

43

item_metadata (dict, optional): Pre-fetched metadata

44

"""

45

```

46

47

### Item Properties

48

49

Access item metadata, files, and status information.

50

51

```python { .api }

52

class Item:

53

@property

54

def identifier(self):

55

"""str: Item identifier."""

56

57

@property

58

def metadata(self):

59

"""dict: Complete item metadata."""

60

61

@property

62

def files(self):

63

"""list: List of file metadata dictionaries."""

64

65

@property

66

def exists(self):

67

"""bool: Whether the item exists and is accessible."""

68

69

@property

70

def session(self):

71

"""ArchiveSession: Session object used by this item."""

72

73

@property

74

def urls(self):

75

"""URLs: Object providing access to various item URLs."""

76

77

@property

78

def collection(self):

79

"""list: Collections this item belongs to."""

80

81

@property

82

def wikilink(self):

83

"""str: MediaWiki-formatted link (if item has title)."""

84

85

# Archive.org specific properties

86

@property

87

def created(self):

88

"""int: Unix timestamp of item creation."""

89

90

@property

91

def d1(self):

92

"""str: Primary server."""

93

94

@property

95

def d2(self):

96

"""str: Secondary server."""

97

98

@property

99

def dir(self):

100

"""str: Item directory path."""

101

102

@property

103

def files_count(self):

104

"""int: Number of files in item."""

105

106

@property

107

def item_size(self):

108

"""int: Total size of all files in bytes."""

109

110

@property

111

def reviews(self):

112

"""list: Item reviews."""

113

114

@property

115

def server(self):

116

"""str: Item server."""

117

118

@property

119

def uniq(self):

120

"""int: Unique item number."""

121

122

@property

123

def updated(self):

124

"""int: Unix timestamp of last update."""

125

126

@property

127

def tasks(self):

128

"""int: Number of tasks associated with item."""

129

130

@property

131

def is_dark(self):

132

"""bool: Whether item is dark (restricted access)."""

133

```

134

135

### Item Management

136

137

Refresh item data and check identifier availability.

138

139

```python { .api }

140

class Item:

141

def refresh(self, item_metadata=None, **kwargs):

142

"""

143

Refresh item metadata from Archive.org.

144

145

Args:

146

item_metadata (dict, optional): Use specific metadata instead of fetching

147

**kwargs: Additional arguments passed to get_metadata

148

"""

149

150

def identifier_available(self):

151

"""

152

Check if the item identifier is available for use.

153

154

Returns:

155

bool: True if identifier is available, False if taken

156

"""

157

```

158

159

### File Access

160

161

Access individual files and collections of files within the item.

162

163

```python { .api }

164

class Item:

165

def get_file(self, file_name):

166

"""

167

Get a File object for a specific file in the item.

168

169

Args:

170

file_name (str): Name of the file

171

172

Returns:

173

File: File object, or None if file doesn't exist

174

"""

175

176

def get_files(self, files=None, formats=None, glob_pattern=None, exclude_pattern=None, on_the_fly=False):

177

"""

178

Get File objects with optional filtering.

179

180

Args:

181

files (list, optional): Specific file names to retrieve

182

formats (list, optional): File formats to include (e.g., ['pdf', 'epub'])

183

glob_pattern (str, optional): Glob pattern for file selection

184

exclude_pattern (str, optional): Glob pattern for exclusion

185

on_the_fly (bool): Include on-the-fly derived files

186

187

Yields:

188

File: File objects matching the criteria

189

"""

190

```

191

192

### Upload Operations

193

194

Upload files to items, creating new items or updating existing ones.

195

196

```python { .api }

197

def upload(identifier, files, metadata=None, headers=None, access_key=None, secret_key=None, queue_derive=None, verbose=False, verify=False, checksum=False, delete=False, retries=None, retries_sleep=None, debug=False, validate_identifier=False, request_kwargs=None, **get_item_kwargs):

198

"""

199

Upload files to an Archive.org item (creates item if it doesn't exist).

200

201

Args:

202

identifier (str): Item identifier to upload to

203

files (list): Files to upload - can be:

204

- File paths (str)

205

- File-like objects

206

- Tuples of (remote_name, local_path_or_file_object)

207

- Dictionaries with 'name' and file content

208

metadata (dict, optional): Item metadata to set/update

209

headers (dict, optional): HTTP headers for upload requests

210

access_key (str, optional): IA-S3 access key (overrides config)

211

secret_key (str, optional): IA-S3 secret key (overrides config)

212

queue_derive (bool, optional): Queue derive task after upload

213

verbose (bool): Enable verbose output

214

verify (bool): Verify checksums after upload

215

checksum (bool): Calculate and verify MD5 checksums

216

delete (bool): Delete local files after successful upload

217

retries (int, optional): Number of retry attempts

218

retries_sleep (int, optional): Seconds to sleep between retries

219

debug (bool): Enable debug logging

220

validate_identifier (bool): Validate identifier format

221

request_kwargs (dict, optional): Additional request arguments

222

**get_item_kwargs: Additional arguments for get_item

223

224

Returns:

225

list: List of Request/Response objects from upload operations

226

227

Raises:

228

ValueError: If identifier is invalid

229

AuthenticationError: If authentication fails

230

"""

231

232

class Item:

233

def upload(self, files, metadata=None, headers=None, access_key=None, secret_key=None, queue_derive=None, verbose=False, verify=False, checksum=False, delete=False, retries=None, retries_sleep=None, debug=False, request_kwargs=None):

234

"""

235

Upload files to this item using the same parameters as the upload function.

236

237

Returns:

238

list: List of Request/Response objects from upload operations

239

"""

240

```

241

242

### Download Operations

243

244

Download files from items with extensive filtering and configuration options.

245

246

```python { .api }

247

def download(identifier, files=None, formats=None, glob_pattern=None, dry_run=False, verbose=False, ignore_existing=False, checksum=False, checksum_archive=False, destdir=None, no_directory=False, retries=None, item_index=None, ignore_errors=False, on_the_fly=False, return_responses=False, no_change_timestamp=False, timeout=None, **get_item_kwargs):

248

"""

249

Download files from an Archive.org item with extensive filtering options.

250

251

Args:

252

identifier (str): Item identifier to download from

253

files (list, optional): Specific files to download

254

formats (list, optional): File formats to download (e.g., ['pdf', 'txt'])

255

glob_pattern (str, optional): Glob pattern for file selection

256

dry_run (bool): Show what would be downloaded without downloading

257

verbose (bool): Enable verbose output

258

ignore_existing (bool): Re-download files that already exist locally

259

checksum (bool): Verify file checksums after download

260

checksum_archive (bool): Verify checksums from archive

261

destdir (str, optional): Destination directory (default: current directory)

262

no_directory (bool): Don't create item directory, save files directly to destdir

263

retries (int, optional): Number of retry attempts per file

264

item_index (int, optional): Download only files modified after this item index

265

ignore_errors (bool): Continue downloading other files if some fail

266

on_the_fly (bool): Include on-the-fly derived files

267

return_responses (bool): Return response objects instead of downloading

268

no_change_timestamp (bool): Don't update file timestamps to match archive

269

timeout (int, optional): Request timeout in seconds

270

**get_item_kwargs: Additional arguments for get_item

271

272

Returns:

273

list: List of Request/Response objects from download operations

274

275

Raises:

276

ItemLocateError: If item cannot be located

277

"""

278

279

class Item:

280

def download(self, files=None, formats=None, glob_pattern=None, dry_run=False, verbose=False, ignore_existing=False, checksum=False, checksum_archive=False, destdir=None, no_directory=False, retries=None, item_index=None, ignore_errors=False, on_the_fly=False, return_responses=False, no_change_timestamp=False, timeout=None):

281

"""

282

Download files from this item using the same parameters as the download function.

283

284

Returns:

285

list: List of Request/Response objects from download operations

286

"""

287

```

288

289

### Metadata Operations

290

291

Modify item metadata with various update strategies.

292

293

```python { .api }

294

class Item:

295

def modify_metadata(self, metadata, target=None, append=False, append_list=False, priority=0, access_key=None, secret_key=None, debug=False, request_kwargs=None):

296

"""

297

Modify metadata of this item.

298

299

Args:

300

metadata (dict): Metadata changes to apply

301

target (str, optional): Target specific metadata section

302

append (bool): Append values to existing metadata fields

303

append_list (bool): Append to metadata list fields

304

priority (int): Task priority for metadata update

305

access_key (str, optional): IA-S3 access key

306

secret_key (str, optional): IA-S3 secret key

307

debug (bool): Enable debug logging

308

request_kwargs (dict, optional): Additional request arguments

309

310

Returns:

311

Request or Response: Metadata modification result

312

313

Raises:

314

AuthenticationError: If authentication fails

315

"""

316

```

317

318

### Task Operations

319

320

Submit various Archive.org tasks for item processing.

321

322

```python { .api }

323

class Item:

324

def derive(self, priority=0, remove_derived=None, reduced_priority=False, data=None, headers=None, request_kwargs=None):

325

"""

326

Submit derive task to generate derived files.

327

328

Args:

329

priority (int): Task priority (-5 to 10)

330

remove_derived (list, optional): Derived formats to remove

331

reduced_priority (bool): Use reduced priority queue

332

data (dict, optional): Additional task data

333

headers (dict, optional): Additional HTTP headers

334

request_kwargs (dict, optional): Additional request arguments

335

336

Returns:

337

Response: Task submission response

338

"""

339

340

def fixer(self, ops=None, priority=None, reduced_priority=False, data=None, headers=None, request_kwargs=None):

341

"""

342

Submit fixer task to fix item issues.

343

344

Args:

345

ops (list, optional): Fixer operations to perform

346

priority (int, optional): Task priority

347

reduced_priority (bool): Use reduced priority queue

348

data (dict, optional): Additional task data

349

headers (dict, optional): Additional HTTP headers

350

request_kwargs (dict, optional): Additional request arguments

351

352

Returns:

353

Response: Task submission response

354

"""

355

356

def dark(self, comment, priority=None, data=None, reduced_priority=False, request_kwargs=None):

357

"""

358

Dark the item (restrict access).

359

360

Args:

361

comment (str): Reason for darking the item

362

priority (int, optional): Task priority

363

data (dict, optional): Additional task data

364

reduced_priority (bool): Use reduced priority queue

365

request_kwargs (dict, optional): Additional request arguments

366

367

Returns:

368

Response: Task submission response

369

"""

370

371

def undark(self, comment, priority=None, reduced_priority=False, data=None, request_kwargs=None):

372

"""

373

Undark the item (restore access).

374

375

Args:

376

comment (str): Reason for undarking the item

377

priority (int, optional): Task priority

378

reduced_priority (bool): Use reduced priority queue

379

data (dict, optional): Additional task data

380

request_kwargs (dict, optional): Additional request arguments

381

382

Returns:

383

Response: Task submission response

384

"""

385

```

386

387

### Review and Task Management

388

389

Manage item reviews and monitor task status.

390

391

```python { .api }

392

class Item:

393

def get_review(self):

394

"""

395

Get review information for this item.

396

397

Returns:

398

Response: Review data response

399

"""

400

401

def get_task_summary(self, params=None, request_kwargs=None):

402

"""

403

Get task count summary for this item.

404

405

Args:

406

params (dict, optional): Additional query parameters

407

request_kwargs (dict, optional): Additional request arguments

408

409

Returns:

410

dict: Task counts by status

411

"""

412

413

def no_tasks_pending(self, params=None, request_kwargs=None):

414

"""

415

Check if item has no pending tasks.

416

417

Args:

418

params (dict, optional): Additional query parameters

419

request_kwargs (dict, optional): Additional request arguments

420

421

Returns:

422

bool: True if no tasks are pending

423

"""

424

425

def get_all_item_tasks(self, params=None, request_kwargs=None):

426

"""

427

Get all tasks (completed and pending) for this item.

428

429

Args:

430

params (dict, optional): Additional query parameters

431

request_kwargs (dict, optional): Additional request arguments

432

433

Returns:

434

list: List of CatalogTask objects

435

"""

436

437

def get_history(self, params=None, request_kwargs=None):

438

"""

439

Get completed tasks for this item.

440

441

Args:

442

params (dict, optional): Additional query parameters

443

request_kwargs (dict, optional): Additional request arguments

444

445

Returns:

446

list: List of completed CatalogTask objects

447

"""

448

449

def get_catalog(self, params=None, request_kwargs=None):

450

"""

451

Get pending tasks for this item.

452

453

Args:

454

params (dict, optional): Additional query parameters

455

request_kwargs (dict, optional): Additional request arguments

456

457

Returns:

458

list: List of pending CatalogTask objects

459

"""

460

```

461

462

### URL Access

463

464

Get various URLs associated with the item.

465

466

```python { .api }

467

class URLs:

468

"""Object providing access to various item URLs."""

469

470

@property

471

def details(self):

472

"""str: Item details page URL."""

473

474

@property

475

def metadata(self):

476

"""str: Item metadata API URL."""

477

478

@property

479

def download(self):

480

"""str: Item download directory URL."""

481

482

@property

483

def history(self):

484

"""str: Item history page URL."""

485

486

@property

487

def edit(self):

488

"""str: Item edit page URL."""

489

490

@property

491

def editxml(self):

492

"""str: Item XML edit page URL."""

493

494

@property

495

def manage(self):

496

"""str: Item management page URL."""

497

```

498

499

### Review Management

500

501

Add, manage, and moderate reviews for items.

502

503

```python { .api }

504

def review(self, title: str, body: str, stars=None):

505

"""

506

Add a review to the item.

507

508

Args:

509

title (str): Review title

510

body (str): Review content/body

511

stars (int, optional): Star rating for the review (1-5)

512

513

Returns:

514

requests.Response: Response object from review submission

515

516

Example:

517

>>> item = internetarchive.get_item('my-item')

518

>>> item.review('Great content!', 'This item has excellent resources.', stars=5)

519

"""

520

521

def index_review(self, username=None, screenname=None, itemname=None):

522

"""

523

Set a review to be indexed (make it visible in search results).

524

525

Args:

526

username (str, optional): Username of the reviewer

527

screenname (str, optional): Screen name of the reviewer

528

itemname (str, optional): Item name of the reviewer

529

530

Returns:

531

requests.Response: Response object from the indexing operation

532

533

Note:

534

Requires appropriate privileges for review moderation.

535

"""

536

537

def noindex_review(self, username=None, screenname=None, itemname=None):

538

"""

539

Set a review to not be indexed (hide it from search results).

540

541

Args:

542

username (str, optional): Username of the reviewer

543

screenname (str, optional): Screen name of the reviewer

544

itemname (str, optional): Item name of the reviewer

545

546

Returns:

547

requests.Response: Response object from the operation

548

549

Note:

550

Requires appropriate privileges for review moderation.

551

"""

552

553

def delete_review(self, username=None, screenname=None, itemname=None):

554

"""

555

Delete a review from the item.

556

557

Args:

558

username (str, optional): Username of the reviewer

559

screenname (str, optional): Screen name of the reviewer

560

itemname (str, optional): Item name of the reviewer

561

562

Returns:

563

requests.Response: Response object from the deletion operation

564

565

Note:

566

Requires appropriate privileges for review management.

567

"""

568

```

569

570

### Flag Management

571

572

Add and manage administrative flags for items.

573

574

```python { .api }

575

def add_flag(self, category: str, user=None):

576

"""

577

Add a flag to the item for administrative purposes.

578

579

Args:

580

category (str): Flag category (e.g., 'copyright', 'spam', 'inappropriate')

581

user (str, optional): User adding the flag (defaults to current user)

582

583

Returns:

584

requests.Response: Response object from the flag addition

585

586

Example:

587

>>> item = internetarchive.get_item('problematic-item')

588

>>> item.add_flag('copyright', user='moderator')

589

"""

590

591

def delete_flag(self, category: str, user=None):

592

"""

593

Remove a flag from the item.

594

595

Args:

596

category (str): Flag category to remove

597

user (str, optional): User removing the flag (defaults to current user)

598

599

Returns:

600

requests.Response: Response object from the flag removal

601

"""

602

603

def get_flags(self):

604

"""

605

Retrieve all flags associated with the item.

606

607

Returns:

608

requests.Response: Response object containing flag data

609

610

Example:

611

>>> item = internetarchive.get_item('my-item')

612

>>> flags_response = item.get_flags()

613

>>> flags_data = flags_response.json()

614

"""

615

```

616

617

### Single File Upload

618

619

Upload individual files with extensive configuration options.

620

621

```python { .api }

622

def upload_file(self, body, key=None, metadata=None, file_metadata=None, headers=None, access_key=None, secret_key=None, queue_derive=False, verbose=False, verify=False, checksum=False, delete=False, retries=None, retries_sleep=None, debug=False, validate_identifier=False, request_kwargs=None):

623

"""

624

Upload a single file to the item with fine-grained control.

625

626

Args:

627

body (str or file-like): File path or file-like object to upload

628

key (str, optional): Remote filename (defaults to local filename)

629

metadata (dict, optional): Item metadata to set during upload

630

file_metadata (dict, optional): File-level metadata

631

headers (dict, optional): Additional HTTP headers

632

access_key (str, optional): IA-S3 access key

633

secret_key (str, optional): IA-S3 secret key

634

queue_derive (bool): Whether to queue derive task after upload

635

verbose (bool): Enable verbose output

636

verify (bool): Verify checksums after upload

637

checksum (bool): Calculate MD5 checksums

638

delete (bool): Delete local file after upload success

639

retries (int, optional): Number of retry attempts

640

retries_sleep (int, optional): Sleep time between retries in seconds

641

debug (bool): Enable debug mode

642

validate_identifier (bool): Validate identifier format

643

request_kwargs (dict, optional): Additional request arguments

644

645

Returns:

646

requests.Request or requests.Response: Request object (if debug=True) or Response object

647

648

Example:

649

>>> item = internetarchive.get_item('my-item')

650

>>> response = item.upload_file(

651

... 'document.pdf',

652

... key='renamed-document.pdf',

653

... file_metadata={'title': 'Important Document'},

654

... verify=True,

655

... checksum=True

656

... )

657

"""

658

```

659

660

### Collection Management

661

662

Manage item membership in simplelists and collections.

663

664

```python { .api }

665

def remove_from_simplelist(self, parent: str, list: str):

666

"""

667

Remove the item from a simplelist collection.

668

669

Args:

670

parent (str): Parent collection identifier

671

list (str): List name to remove item from

672

673

Returns:

674

requests.Response: Response object from the removal operation

675

676

Example:

677

>>> item = internetarchive.get_item('my-item')

678

>>> item.remove_from_simplelist('my-collection', 'featured-items')

679

"""

680

```

681

682

### Collection Operations

683

684

Extended functionality for collection items.

685

686

```python { .api }

687

class Collection:

688

"""

689

Represents an Archive.org collection (extends Item).

690

"""

691

692

@property

693

def searches(self):

694

"""dict: Dictionary of Search objects for collection contents."""

695

```

696

697

## Usage Examples

698

699

### Basic Item Operations

700

701

```python

702

import internetarchive

703

704

# Get an item

705

item = internetarchive.get_item('govlawgacode20071')

706

707

# Check if item exists

708

if item.exists:

709

print(f"Item title: {item.metadata.get('title')}")

710

print(f"Item has {item.files_count} files")

711

print(f"Total size: {item.item_size} bytes")

712

713

# Get specific file

714

pdf_file = item.get_file('govlawgacode20071.pdf')

715

if pdf_file:

716

print(f"PDF file size: {pdf_file.size}")

717

```

718

719

### Upload Example

720

721

```python

722

import internetarchive

723

724

# Upload files with metadata

725

response = internetarchive.upload(

726

'my-new-item',

727

files=['document.pdf', 'image.jpg'],

728

metadata={

729

'title': 'My Document Collection',

730

'creator': 'Your Name',

731

'description': 'A collection of important documents',

732

'collection': 'opensource'

733

}

734

)

735

736

print(f"Upload completed: {len(response)} files uploaded")

737

```

738

739

### Download with Filtering

740

741

```python

742

import internetarchive

743

744

# Download only PDF files

745

internetarchive.download(

746

'example-item',

747

formats=['pdf'],

748

destdir='./downloads',

749

verbose=True,

750

checksum=True

751

)

752

753

# Download files matching pattern

754

internetarchive.download(

755

'example-item',

756

glob_pattern='*.txt',

757

ignore_existing=True

758

)

759

```