or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

account-management.mdcli-interface.mdconfiguration-auth.mdfile-management.mdindex.mditem-operations.mdmetadata-operations.mdsearch-operations.mdsession-management.mdtask-management.md

session-management.mddocs/

0

# Session Management

1

2

Session management in the Internet Archive Python library provides persistent configuration, authentication, and HTTP adapter customization for efficient operations across multiple API calls.

3

4

## Capabilities

5

6

### Creating Sessions

7

8

Create new ArchiveSession objects with custom configuration, authentication, and HTTP settings.

9

10

```python { .api }

11

def get_session(config=None, config_file=None, debug=False, http_adapter_kwargs=None):

12

"""

13

Return a new ArchiveSession object for persistent configuration across tasks.

14

15

Args:

16

config (dict, optional): Configuration dictionary with keys:

17

- 's3': dict with 'access' and 'secret' keys for IA-S3 authentication

18

- 'general': dict with 'secure', 'host' for connection settings

19

- 'cookies': dict with Archive.org cookies for authentication

20

config_file (str, optional): Path to configuration file

21

debug (bool): Enable debug logging for all session operations

22

http_adapter_kwargs (dict, optional): Keyword arguments for HTTPAdapter:

23

- 'max_retries': int or urllib3.Retry object

24

- 'pool_connections': int, number of connection pools to cache

25

- 'pool_maxsize': int, maximum connections in pool

26

- 'socket_options': list of socket options

27

28

Returns:

29

ArchiveSession: Session object for API interactions

30

"""

31

32

class ArchiveSession:

33

"""

34

Main session class inheriting from requests.Session with Archive.org-specific functionality.

35

"""

36

37

def __init__(self, config=None, config_file="", debug=False, http_adapter_kwargs=None):

38

"""

39

Initialize ArchiveSession with configuration and HTTP settings.

40

41

Args:

42

config (dict, optional): Configuration dictionary

43

config_file (str): Path to configuration file

44

debug (bool): Enable debug logging

45

http_adapter_kwargs (dict, optional): HTTP adapter arguments

46

"""

47

```

48

49

### Session Properties

50

51

Access session configuration, authentication details, and connection settings.

52

53

```python { .api }

54

class ArchiveSession:

55

@property

56

def config(self):

57

"""dict: Complete configuration dictionary."""

58

59

@property

60

def secure(self):

61

"""bool: Whether to use HTTPS (default: True)."""

62

63

@property

64

def host(self):

65

"""str: Archive.org host (default: 'archive.org')."""

66

67

@property

68

def user_email(self):

69

"""str: Email of logged-in user (if authenticated)."""

70

71

@property

72

def access_key(self):

73

"""str: IA-S3 access key (if configured)."""

74

75

@property

76

def secret_key(self):

77

"""str: IA-S3 secret key (if configured)."""

78

79

@property

80

def headers(self):

81

"""dict: Default HTTP headers for requests."""

82

83

@property

84

def protocol(self):

85

"""str: HTTP protocol ('https' or 'http')."""

86

```

87

88

### Item and Metadata Operations

89

90

Retrieve items and metadata through the session object.

91

92

```python { .api }

93

class ArchiveSession:

94

def get_item(self, identifier, item_metadata=None, request_kwargs=None):

95

"""

96

Get an Item or Collection object.

97

98

Args:

99

identifier (str): Archive.org item identifier

100

item_metadata (dict, optional): Pre-fetched item metadata

101

request_kwargs (dict, optional): Additional request arguments

102

103

Returns:

104

Item or Collection: Item object (Collection if item is a collection)

105

"""

106

107

def get_metadata(self, identifier, request_kwargs=None):

108

"""

109

Get item metadata from Archive.org API.

110

111

Args:

112

identifier (str): Archive.org item identifier

113

request_kwargs (dict, optional): Additional request arguments

114

115

Returns:

116

dict: Item metadata dictionary

117

"""

118

```

119

120

### Search Operations

121

122

Perform searches through the session with advanced options.

123

124

```python { .api }

125

class ArchiveSession:

126

def search_items(self, query, fields=None, sorts=None, params=None, full_text_search=False, dsl_fts=False, request_kwargs=None, max_retries=None):

127

"""

128

Search for items with advanced filtering and configuration.

129

130

Args:

131

query (str): Search query using Archive.org syntax

132

fields (list, optional): Metadata fields to return

133

sorts (list, optional): Sort criteria (e.g., ['downloads desc'])

134

params (dict, optional): Additional URL parameters

135

full_text_search (bool): Enable full-text search across item content

136

dsl_fts (bool): Enable DSL-based full-text search

137

request_kwargs (dict, optional): Additional request arguments

138

max_retries (int, optional): Maximum retry attempts

139

140

Returns:

141

Search: Search object for iterating over results

142

"""

143

```

144

145

### Task Management

146

147

Submit and manage Archive.org catalog tasks through the session.

148

149

```python { .api }

150

class ArchiveSession:

151

def submit_task(self, identifier, cmd, comment="", priority=0, data=None, headers=None, reduced_priority=False, request_kwargs=None):

152

"""

153

Submit a task to Archive.org catalog system.

154

155

Args:

156

identifier (str): Item identifier for the task

157

cmd (str): Task command (e.g., 'derive.php', 'fixer.php')

158

comment (str): Task comment

159

priority (int): Task priority (-5 to 10, higher is more priority)

160

data (dict, optional): Additional task data

161

headers (dict, optional): Additional HTTP headers

162

reduced_priority (bool): Use reduced priority queue

163

request_kwargs (dict, optional): Additional request arguments

164

165

Returns:

166

Response: HTTP response from task submission

167

"""

168

169

def get_tasks(self, identifier="", params=None, request_kwargs=None):

170

"""

171

Get tasks from Archive.org catalog.

172

173

Args:

174

identifier (str, optional): Filter by item identifier

175

params (dict, optional): Additional query parameters:

176

- 'catalog': bool, include queued/running tasks

177

- 'history': bool, include completed tasks

178

- 'summary': bool, return task count summary

179

request_kwargs (dict, optional): Additional request arguments

180

181

Returns:

182

set: Set of CatalogTask objects

183

"""

184

185

def get_my_catalog(self, params=None, request_kwargs=None):

186

"""

187

Get current user's queued and running tasks.

188

189

Args:

190

params (dict, optional): Additional query parameters

191

request_kwargs (dict, optional): Additional request arguments

192

193

Returns:

194

set: Set of CatalogTask objects for current user

195

"""

196

197

def get_task_log(self, task_id, request_kwargs=None):

198

"""

199

Get log output for a specific task.

200

201

Args:

202

task_id (int): Task ID

203

request_kwargs (dict, optional): Additional request arguments

204

205

Returns:

206

str: Task log content

207

"""

208

209

def iter_history(self, identifier=None, params=None, request_kwargs=None):

210

"""

211

Iterate over completed tasks.

212

213

Args:

214

identifier (str, optional): Filter by item identifier

215

params (dict, optional): Additional query parameters

216

request_kwargs (dict, optional): Additional request arguments

217

218

Yields:

219

CatalogTask: Completed task objects

220

"""

221

222

def iter_catalog(self, identifier=None, params=None, request_kwargs=None):

223

"""

224

Iterate over queued and running tasks.

225

226

Args:

227

identifier (str, optional): Filter by item identifier

228

params (dict, optional): Additional query parameters

229

request_kwargs (dict, optional): Additional request arguments

230

231

Yields:

232

CatalogTask: Queued/running task objects

233

"""

234

235

def get_tasks_summary(self, identifier="", params=None, request_kwargs=None):

236

"""

237

Get task count summary by status.

238

239

Args:

240

identifier (str, optional): Filter by item identifier

241

params (dict, optional): Additional query parameters

242

request_kwargs (dict, optional): Additional request arguments

243

244

Returns:

245

dict: Task counts by status (queued, running, finished, etc.)

246

"""

247

```

248

249

### User Operations

250

251

Get information about the authenticated user.

252

253

```python { .api }

254

class ArchiveSession:

255

def whoami(self):

256

"""

257

Get the email address of the logged-in user.

258

259

Returns:

260

str: User email address, or empty string if not authenticated

261

"""

262

```

263

264

### HTTP Configuration

265

266

Configure HTTP adapters and logging for the session.

267

268

```python { .api }

269

class ArchiveSession:

270

def mount_http_adapter(self, protocol=None, max_retries=None, status_forcelist=None, host=None):

271

"""

272

Mount HTTP adapter with custom retry and error handling.

273

274

Args:

275

protocol (str, optional): Protocol to mount for ('http', 'https')

276

max_retries (int or Retry, optional): Retry configuration

277

status_forcelist (list, optional): HTTP status codes to retry

278

host (str, optional): Specific host to mount adapter for

279

"""

280

281

def set_file_logger(self, log_level, path, logger_name="internetarchive"):

282

"""

283

Configure file logging for the session.

284

285

Args:

286

log_level (int or str): Logging level (DEBUG, INFO, WARNING, ERROR)

287

path (str): Path to log file

288

logger_name (str): Logger name (default: 'internetarchive')

289

"""

290

```

291

292

## Usage Examples

293

294

### Basic Session Creation

295

296

```python

297

import internetarchive

298

299

# Create session with default configuration

300

session = internetarchive.get_session()

301

302

# Create session with custom configuration

303

config = {

304

's3': {

305

'access': 'your-access-key',

306

'secret': 'your-secret-key'

307

},

308

'general': {

309

'secure': True,

310

'host': 'archive.org'

311

}

312

}

313

session = internetarchive.get_session(config=config)

314

```

315

316

### Session with HTTP Configuration

317

318

```python

319

from urllib3.util.retry import Retry

320

321

# Configure HTTP adapter with custom retry logic

322

http_adapter_kwargs = {

323

'max_retries': Retry(

324

total=5,

325

backoff_factor=1,

326

status_forcelist=[500, 502, 503, 504]

327

),

328

'pool_connections': 10,

329

'pool_maxsize': 20

330

}

331

332

session = internetarchive.get_session(

333

debug=True,

334

http_adapter_kwargs=http_adapter_kwargs

335

)

336

```

337

338

### Using Session for Multiple Operations

339

340

```python

341

import internetarchive

342

343

# Create session once

344

session = internetarchive.get_session()

345

346

# Use session for multiple operations

347

item = session.get_item('example-item')

348

search = session.search_items('collection:opensource')

349

tasks = session.get_tasks('example-item')

350

351

# Check authentication status

352

if session.user_email:

353

print(f"Authenticated as: {session.user_email}")

354

else:

355

print("Not authenticated")

356

```