or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

account-management.mdcli-interface.mdconfiguration-auth.mdfile-management.mdindex.mditem-operations.mdmetadata-operations.mdsearch-operations.mdsession-management.mdtask-management.md

search-operations.mddocs/

0

# Search Operations

1

2

Search operations provide powerful querying capabilities for discovering items in the Internet Archive using various search criteria, field selection, sorting, and full-text search options.

3

4

## Capabilities

5

6

### Basic Search

7

8

Search for items using Archive.org's search syntax with results returned as an iterable Search object.

9

10

```python { .api }

11

def search_items(query, fields=None, sorts=None, params=None, full_text_search=False, dsl_fts=False, archive_session=None, config=None, config_file=None, http_adapter_kwargs=None, request_kwargs=None, max_retries=None):

12

"""

13

Search for items on Archive.org with advanced filtering options.

14

15

Args:

16

query (str): Search query using Archive.org syntax:

17

- Basic: 'collection:nasa'

18

- Field search: 'creator:"Neil Armstrong"'

19

- Boolean: 'collection:nasa AND mediatype:movies'

20

- Date ranges: 'date:[1969-01-01 TO 1969-12-31]'

21

- Wildcards: 'title:apollo*'

22

fields (list, optional): Metadata fields to return in results:

23

- Common: ['identifier', 'title', 'creator', 'date', 'description']

24

- All available fields returned if None

25

sorts (list, optional): Sort criteria:

26

- ['downloads desc'] - Most downloaded first

27

- ['date desc'] - Newest first

28

- ['titleSorter asc'] - Alphabetical by title

29

- ['reviewdate desc', 'identifier asc'] - Multiple sorts

30

params (dict, optional): Additional URL parameters:

31

- 'rows': int, results per page (default: 25, max: 10000)

32

- 'page': int, page number (1-based)

33

- 'cursor': str, cursor for pagination

34

- 'save': bool, save search for future use

35

full_text_search (bool): Enable full-text search across item content

36

dsl_fts (bool): Enable DSL-based full-text search for advanced queries

37

archive_session (ArchiveSession, optional): Existing session to use

38

config (dict, optional): Configuration for new session

39

config_file (str, optional): Config file for new session

40

http_adapter_kwargs (dict, optional): HTTP adapter arguments

41

request_kwargs (dict, optional): Additional request arguments

42

max_retries (int, optional): Maximum retry attempts for failed requests

43

44

Returns:

45

Search: Search object for iterating over results

46

47

Raises:

48

ValueError: If query is invalid

49

requests.RequestException: If search request fails

50

"""

51

52

class Search:

53

"""

54

Represents a search query and provides access to results.

55

"""

56

57

def __init__(self, archive_session, query, fields=None, sorts=None, params=None, full_text_search=None, dsl_fts=None, request_kwargs=None, max_retries=None):

58

"""

59

Initialize Search object.

60

61

Args:

62

archive_session (ArchiveSession): Session object

63

query (str): Search query string

64

fields (list, optional): Fields to return

65

sorts (list, optional): Sort criteria

66

params (dict, optional): URL parameters

67

full_text_search (bool, optional): Enable full-text search

68

dsl_fts (bool, optional): Enable DSL full-text search

69

request_kwargs (dict, optional): Request arguments

70

max_retries (int, optional): Maximum retries

71

"""

72

```

73

74

### Search Properties

75

76

Access search configuration and result information.

77

78

```python { .api }

79

class Search:

80

@property

81

def session(self):

82

"""ArchiveSession: Session object used for this search."""

83

84

@property

85

def query(self):

86

"""str: Search query string."""

87

88

@property

89

def fields(self):

90

"""list: Metadata fields being returned."""

91

92

@property

93

def sorts(self):

94

"""list: Sort criteria applied to results."""

95

96

@property

97

def params(self):

98

"""dict: URL parameters for the search."""

99

100

@property

101

def fts(self):

102

"""bool: Whether full-text search is enabled."""

103

104

@property

105

def dsl_fts(self):

106

"""bool: Whether DSL full-text search is enabled."""

107

108

@property

109

def num_found(self):

110

"""int: Total number of results found (not just returned)."""

111

```

112

113

### Result Iteration

114

115

Iterate over search results in different formats.

116

117

```python { .api }

118

class Search:

119

def __iter__(self):

120

"""

121

Iterate over search results as dictionaries.

122

123

Yields:

124

dict: Result dictionaries with requested fields

125

"""

126

127

def iter_as_results(self):

128

"""

129

Explicitly iterate over search results as dictionaries.

130

131

Yields:

132

dict: Result dictionaries with metadata fields

133

"""

134

135

def iter_as_items(self):

136

"""

137

Iterate over search results as Item objects.

138

139

Yields:

140

Item: Item objects for each search result

141

142

Note:

143

Creates Item objects which may trigger additional API calls

144

for metadata. Use iter_as_results() for better performance

145

when you only need the search result fields.

146

"""

147

```

148

149

## Search Query Syntax

150

151

### Basic Query Examples

152

153

```python

154

import internetarchive

155

156

# Search by collection

157

search = internetarchive.search_items('collection:nasa')

158

159

# Search by media type

160

search = internetarchive.search_items('mediatype:movies')

161

162

# Search by creator

163

search = internetarchive.search_items('creator:"Internet Archive"')

164

165

# Search by title with wildcards

166

search = internetarchive.search_items('title:apollo*')

167

```

168

169

### Advanced Query Examples

170

171

```python

172

import internetarchive

173

174

# Boolean queries

175

search = internetarchive.search_items(

176

'collection:nasa AND mediatype:movies AND date:[1969-01-01 TO 1969-12-31]'

177

)

178

179

# Multiple collections

180

search = internetarchive.search_items('collection:(nasa OR loc)')

181

182

# Exclude results

183

search = internetarchive.search_items('collection:nasa NOT mediatype:data')

184

185

# Full-text search

186

search = internetarchive.search_items(

187

'moon landing',

188

full_text_search=True

189

)

190

```

191

192

### Field Selection and Sorting

193

194

```python

195

import internetarchive

196

197

# Select specific fields

198

search = internetarchive.search_items(

199

'collection:nasa',

200

fields=['identifier', 'title', 'creator', 'date', 'downloads']

201

)

202

203

# Sort by popularity

204

search = internetarchive.search_items(

205

'collection:movies',

206

sorts=['downloads desc', 'reviewdate desc']

207

)

208

209

# Sort alphabetically

210

search = internetarchive.search_items(

211

'collection:books',

212

sorts=['titleSorter asc']

213

)

214

```

215

216

### Pagination and Performance

217

218

```python

219

import internetarchive

220

221

# Large result sets

222

search = internetarchive.search_items(

223

'collection:opensource',

224

params={'rows': 1000} # Get up to 1000 results per page

225

)

226

227

# Specific page

228

search = internetarchive.search_items(

229

'collection:nasa',

230

params={'page': 5, 'rows': 50}

231

)

232

233

# Using cursor for efficient pagination

234

search = internetarchive.search_items(

235

'collection:books',

236

params={'cursor': 'next_cursor_value'}

237

)

238

```

239

240

## Usage Examples

241

242

### Basic Search and Iteration

243

244

```python

245

import internetarchive

246

247

# Search for NASA collection items

248

search = internetarchive.search_items('collection:nasa')

249

250

print(f"Found {search.num_found} total results")

251

252

# Iterate over first page of results

253

for result in search:

254

print(f"ID: {result['identifier']}")

255

if 'title' in result:

256

print(f"Title: {result['title']}")

257

print(f"Downloads: {result.get('downloads', 'N/A')}")

258

print("---")

259

```

260

261

### Working with Item Objects

262

263

```python

264

import internetarchive

265

266

# Search and get Item objects

267

search = internetarchive.search_items(

268

'collection:nasa AND mediatype:movies',

269

fields=['identifier', 'title', 'creator']

270

)

271

272

# Convert results to Item objects for full functionality

273

for item in search.iter_as_items():

274

print(f"Processing item: {item.identifier}")

275

276

# Access full metadata (triggers API call)

277

print(f"Full title: {item.metadata.get('title')}")

278

print(f"File count: {item.files_count}")

279

280

# Download first PDF file if available

281

for file in item.get_files(formats=['pdf']):

282

file.download()

283

break

284

```

285

286

### Advanced Search with Session

287

288

```python

289

import internetarchive

290

291

# Create session for multiple searches

292

session = internetarchive.get_session()

293

294

# Search with session for better performance

295

search1 = session.search_items(

296

'collection:movies AND year:2020',

297

fields=['identifier', 'title', 'year'],

298

sorts=['downloads desc']

299

)

300

301

search2 = session.search_items(

302

'creator:"Internet Archive" AND mediatype:texts',

303

fields=['identifier', 'title', 'creator', 'date']

304

)

305

306

# Process multiple searches

307

for search in [search1, search2]:

308

print(f"Query: {search.query}")

309

print(f"Results: {search.num_found}")

310

311

# Get top 10 results

312

count = 0

313

for result in search:

314

print(f" {result['identifier']}: {result.get('title', 'No title')}")

315

count += 1

316

if count >= 10:

317

break

318

print()

319

```

320

321

### Full-Text Search

322

323

```python

324

import internetarchive

325

326

# Search within document content

327

search = internetarchive.search_items(

328

'artificial intelligence machine learning',

329

full_text_search=True,

330

fields=['identifier', 'title', 'description']

331

)

332

333

print(f"Full-text search found {search.num_found} documents")

334

335

for result in search:

336

print(f"Document: {result['identifier']}")

337

print(f"Title: {result.get('title', 'No title')}")

338

if 'description' in result:

339

print(f"Description: {result['description'][:200]}...")

340

print("---")

341

```

342

343

### Specialized Collection Searches

344

345

```python

346

import internetarchive

347

348

# Search specific collections with targeted fields

349

collections_queries = {

350

'software': {

351

'query': 'collection:softwarelibrary',

352

'fields': ['identifier', 'title', 'creator', 'emulator']

353

},

354

'books': {

355

'query': 'collection:books AND language:eng',

356

'fields': ['identifier', 'title', 'creator', 'publisher', 'date']

357

},

358

'audio': {

359

'query': 'collection:etree AND year:2023',

360

'fields': ['identifier', 'title', 'creator', 'date', 'venue']

361

}

362

}

363

364

for collection_name, config in collections_queries.items():

365

search = internetarchive.search_items(

366

config['query'],

367

fields=config['fields'],

368

sorts=['downloads desc']

369

)

370

371

print(f"{collection_name.upper()} Collection ({search.num_found} items):")

372

373

count = 0

374

for result in search:

375

print(f" {result['identifier']}: {result.get('title', 'No title')}")

376

count += 1

377

if count >= 5: # Show top 5

378

break

379

print()

380

```