or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

browser-automation.mdcli-tools.mdconfiguration.mdcore-types.mdcrawlers.mderror-handling.mdevents.mdfingerprinting.mdhttp-clients.mdindex.mdrequest-management.mdsessions.mdstatistics.mdstorage.md

sessions.mddocs/

0

# Sessions

1

2

Session and cookie management with rotation capabilities for maintaining state across requests and avoiding detection. Sessions provide persistent state management, cookie handling, and user agent rotation for more human-like crawling behavior.

3

4

## Capabilities

5

6

### Session

7

8

Individual session object managing cookies, user agent, and request state for a single logical browsing session.

9

10

```python { .api }

11

class Session:

12

def __init__(

13

self,

14

session_pool: SessionPool,

15

*,

16

id: str | None = None,

17

max_age: timedelta = timedelta(hours=1),

18

max_usage_count: int = 50,

19

max_error_score: float = 3.0

20

): ...

21

22

@property

23

def id(self) -> str:

24

"""Unique session identifier."""

25

26

@property

27

def cookies(self) -> SessionCookies:

28

"""Cookie jar for this session."""

29

30

@property

31

def user_agent(self) -> str:

32

"""User agent string for this session."""

33

34

@property

35

def usage_count(self) -> int:

36

"""Number of requests made with this session."""

37

38

@property

39

def error_score(self) -> float:

40

"""Accumulated error score (higher = more problematic)."""

41

42

@property

43

def is_blocked(self) -> bool:

44

"""True if session appears to be blocked."""

45

46

@property

47

def is_expired(self) -> bool:

48

"""True if session has exceeded age or usage limits."""

49

50

def mark_blocked(self) -> None:

51

"""Mark session as blocked/detected."""

52

53

def retire(self) -> None:

54

"""Remove session from pool and mark as retired."""

55

56

def get_state(self) -> dict[str, any]:

57

"""Get session state for persistence."""

58

59

def set_state(self, state: dict[str, any]) -> None:

60

"""Restore session state from persistence."""

61

```

62

63

### Session Pool

64

65

Pool managing multiple sessions with automatic rotation, creation, and cleanup of sessions to maintain anonymity.

66

67

```python { .api }

68

class SessionPool:

69

def __init__(

70

self,

71

*,

72

max_pool_size: int = 1000,

73

create_session_function: Callable[[], Session] | None = None,

74

persist_state_key: str | None = None,

75

persist_state_key_value_store_id: str | None = None

76

): ...

77

78

async def get_session(self, session_id: str | None = None) -> Session:

79

"""

80

Get session from pool, creating new one if needed.

81

82

Args:

83

session_id: Specific session ID to retrieve

84

85

Returns:

86

Session object

87

"""

88

89

async def retire_session(self, session: Session) -> None:

90

"""Remove session from pool."""

91

92

def get_session_count(self) -> int:

93

"""Get number of sessions in pool."""

94

95

def get_state(self) -> dict[str, any]:

96

"""Get pool state for persistence."""

97

98

async def persist_state(self) -> None:

99

"""Save pool state to storage."""

100

101

async def initialize(self) -> None:

102

"""Initialize pool and restore state if configured."""

103

104

async def teardown(self) -> None:

105

"""Clean up pool resources."""

106

107

@property

108

def max_pool_size(self) -> int: ...

109

```

110

111

### Session Cookies

112

113

Cookie management within sessions supporting standard HTTP cookie operations with domain and path handling.

114

115

```python { .api }

116

class SessionCookies:

117

def __init__(self): ...

118

119

def add_cookie(

120

self,

121

cookie: CookieParam,

122

*,

123

url: str | None = None

124

) -> None:

125

"""

126

Add cookie to session.

127

128

Args:

129

cookie: Cookie data

130

url: URL context for cookie domain/path

131

"""

132

133

def get_cookie(

134

self,

135

name: str,

136

domain: str | None = None,

137

path: str | None = None

138

) -> Cookie | None:

139

"""

140

Get cookie by name and optional domain/path.

141

142

Args:

143

name: Cookie name

144

domain: Cookie domain

145

path: Cookie path

146

147

Returns:

148

Cookie object or None if not found

149

"""

150

151

def delete_cookie(

152

self,

153

name: str,

154

domain: str | None = None,

155

path: str | None = None

156

) -> None:

157

"""Delete cookie by name."""

158

159

def clear(self) -> None:

160

"""Remove all cookies."""

161

162

def get_cookies_for_url(self, url: str) -> list[Cookie]:

163

"""Get all cookies applicable to given URL."""

164

165

def to_dict(self) -> dict[str, any]:

166

"""Serialize cookies to dictionary."""

167

168

def from_dict(self, data: dict[str, any]) -> None:

169

"""Restore cookies from dictionary."""

170

171

def __len__(self) -> int: ...

172

173

def __iter__(self) -> Iterator[Cookie]: ...

174

```

175

176

### Cookie Types

177

178

Type definitions for cookie parameters and cookie objects.

179

180

```python { .api }

181

CookieParam = Union[

182

dict[str, str | int | float | bool | None],

183

Cookie

184

]

185

```

186

187

```python { .api }

188

class Cookie:

189

def __init__(

190

self,

191

name: str,

192

value: str,

193

*,

194

domain: str | None = None,

195

path: str = "/",

196

expires: datetime | None = None,

197

max_age: int | None = None,

198

secure: bool = False,

199

http_only: bool = False,

200

same_site: Literal["Strict", "Lax", "None"] | None = None

201

): ...

202

203

@property

204

def name(self) -> str: ...

205

206

@property

207

def value(self) -> str: ...

208

209

@property

210

def domain(self) -> str | None: ...

211

212

@property

213

def path(self) -> str: ...

214

215

@property

216

def expires(self) -> datetime | None: ...

217

218

@property

219

def secure(self) -> bool: ...

220

221

@property

222

def http_only(self) -> bool: ...

223

224

def is_expired(self) -> bool:

225

"""Check if cookie has expired."""

226

227

def matches_url(self, url: str) -> bool:

228

"""Check if cookie should be sent with given URL."""

229

```

230

231

## Usage Examples

232

233

### Basic Session Usage

234

235

```python

236

import asyncio

237

from crawlee.sessions import SessionPool, Session

238

239

async def main():

240

# Create session pool

241

pool = SessionPool(max_pool_size=100)

242

await pool.initialize()

243

244

# Get session from pool

245

session = await pool.get_session()

246

247

print(f"Session ID: {session.id}")

248

print(f"User Agent: {session.user_agent}")

249

print(f"Usage count: {session.usage_count}")

250

251

# Add cookies to session

252

session.cookies.add_cookie({

253

'name': 'sessionid',

254

'value': 'abc123',

255

'domain': 'example.com'

256

})

257

258

# Use session multiple times

259

print(f"Cookies for example.com: {len(session.cookies.get_cookies_for_url('https://example.com'))}")

260

261

# Mark session as blocked if detected

262

if should_retire_session():

263

session.mark_blocked()

264

await pool.retire_session(session)

265

266

await pool.teardown()

267

268

def should_retire_session() -> bool:

269

# Your logic to detect if session is blocked

270

return False

271

272

asyncio.run(main())

273

```

274

275

### Session with HTTP Crawler

276

277

```python

278

import asyncio

279

from crawlee.crawlers import HttpCrawler, HttpCrawlingContext

280

from crawlee.sessions import SessionPool

281

282

async def main():

283

# Configure session pool

284

session_pool = SessionPool(

285

max_pool_size=10,

286

persist_state_key='my-crawler-sessions'

287

)

288

289

# Create crawler with session pool

290

crawler = HttpCrawler(

291

session_pool=session_pool,

292

use_session_pool=True

293

)

294

295

@crawler.router.default_handler

296

async def handler(context: HttpCrawlingContext):

297

session = context.session

298

299

context.log.info(f"Using session: {session.id}")

300

context.log.info(f"Session usage: {session.usage_count}")

301

302

# Add authentication cookie if needed

303

if not session.cookies.get_cookie('auth_token'):

304

session.cookies.add_cookie({

305

'name': 'auth_token',

306

'value': 'your_auth_token_here',

307

'domain': 'example.com'

308

})

309

310

# Extract data

311

data = {

312

'url': context.request.url,

313

'session_id': session.id,

314

'status': context.response.status_code

315

}

316

317

await context.push_data(data)

318

319

# Mark session as blocked if we get blocked

320

if context.response.status_code == 403:

321

context.log.warning(f"Session {session.id} may be blocked")

322

session.mark_blocked()

323

324

await crawler.run(['https://example.com/page1', 'https://example.com/page2'])

325

326

asyncio.run(main())

327

```

328

329

### Custom Session Creation

330

331

```python

332

import asyncio

333

from crawlee.sessions import SessionPool, Session

334

335

def create_custom_session() -> Session:

336

"""Custom session factory with specific configuration."""

337

session = Session(

338

session_pool=None, # Will be set by pool

339

max_age=timedelta(minutes=30),

340

max_usage_count=25,

341

max_error_score=2.0

342

)

343

344

# Add custom cookies or configuration

345

session.cookies.add_cookie({

346

'name': 'preferences',

347

'value': 'theme=dark;lang=en',

348

'domain': '.example.com'

349

})

350

351

return session

352

353

async def main():

354

pool = SessionPool(

355

max_pool_size=50,

356

create_session_function=create_custom_session

357

)

358

359

await pool.initialize()

360

361

# Get custom-configured session

362

session = await pool.get_session()

363

364

# Verify custom cookie was added

365

prefs_cookie = session.cookies.get_cookie('preferences', domain='.example.com')

366

print(f"Custom cookie: {prefs_cookie.value if prefs_cookie else 'Not found'}")

367

368

await pool.teardown()

369

370

asyncio.run(main())

371

```

372

373

### Session State Persistence

374

375

```python

376

import asyncio

377

from crawlee.sessions import SessionPool

378

379

async def main():

380

# Create pool with state persistence

381

pool = SessionPool(

382

max_pool_size=100,

383

persist_state_key='crawler-sessions',

384

persist_state_key_value_store_id='session-store'

385

)

386

387

# Initialize will restore previous session state

388

await pool.initialize()

389

390

# Use sessions for crawling...

391

session1 = await pool.get_session()

392

session2 = await pool.get_session()

393

394

print(f"Pool has {pool.get_session_count()} sessions")

395

396

# Manually persist state

397

await pool.persist_state()

398

399

# Teardown will also persist state

400

await pool.teardown()

401

402

print("Session state saved for next run")

403

404

asyncio.run(main())

405

```

406

407

### Cookie Management

408

409

```python

410

import asyncio

411

from crawlee.sessions import SessionPool

412

from datetime import datetime, timedelta

413

414

async def main():

415

pool = SessionPool()

416

await pool.initialize()

417

418

session = await pool.get_session()

419

420

# Add various types of cookies

421

session.cookies.add_cookie({

422

'name': 'session_id',

423

'value': 'abc123',

424

'domain': 'example.com',

425

'path': '/',

426

'expires': datetime.now() + timedelta(hours=1),

427

'secure': True,

428

'http_only': True

429

})

430

431

session.cookies.add_cookie({

432

'name': 'preferences',

433

'value': 'theme=dark',

434

'domain': '.example.com',

435

'path': '/settings'

436

})

437

438

# Get cookies for specific URL

439

url = 'https://example.com/settings/profile'

440

cookies = session.cookies.get_cookies_for_url(url)

441

442

print(f"Cookies for {url}:")

443

for cookie in cookies:

444

print(f" {cookie.name}={cookie.value}")

445

446

# Remove specific cookie

447

session.cookies.delete_cookie('preferences', domain='.example.com')

448

449

# Check remaining cookies

450

print(f"Remaining cookies: {len(session.cookies)}")

451

452

await pool.teardown()

453

454

asyncio.run(main())

455

```