or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

browser-automation.mdcli-tools.mdconfiguration.mdcore-types.mdcrawlers.mderror-handling.mdevents.mdfingerprinting.mdhttp-clients.mdindex.mdrequest-management.mdsessions.mdstatistics.mdstorage.md

fingerprinting.mddocs/

0

# Fingerprinting

1

2

Browser fingerprint generation and header randomization for enhanced stealth capabilities and bot protection bypass. Fingerprinting capabilities help make HTTP requests and browser sessions appear more human-like to avoid detection.

3

4

## Capabilities

5

6

### Fingerprint Generator

7

8

Base class for generating browser fingerprints with realistic device characteristics.

9

10

```python { .api }

11

class FingerprintGenerator:

12

async def generate_fingerprint(self, **options) -> dict[str, any]:

13

"""

14

Generate browser fingerprint with realistic characteristics.

15

16

Args:

17

**options: Fingerprint generation options

18

19

Returns:

20

Dictionary containing fingerprint data

21

"""

22

23

def get_headers(self, fingerprint: dict[str, any]) -> HttpHeaders:

24

"""

25

Generate HTTP headers from fingerprint.

26

27

Args:

28

fingerprint: Generated fingerprint data

29

30

Returns:

31

HttpHeaders object with realistic headers

32

"""

33

```

34

35

### Default Fingerprint Generator

36

37

Default implementation using browserforge for generating realistic browser fingerprints.

38

39

```python { .api }

40

class DefaultFingerprintGenerator(FingerprintGenerator):

41

def __init__(

42

self,

43

*,

44

browser_name: str | None = None,

45

browser_version: str | None = None,

46

device_category: str | None = None,

47

operating_system: str | None = None,

48

locale: str | None = None

49

): ...

50

51

async def generate_fingerprint(

52

self,

53

**options

54

) -> BrowserFingerprintData:

55

"""Generate realistic browser fingerprint."""

56

57

@property

58

def browser_name(self) -> str | None: ...

59

60

@property

61

def device_category(self) -> str | None: ...

62

```

63

64

### Header Generator

65

66

Specialized generator for creating realistic HTTP headers with proper ordering and values.

67

68

```python { .api }

69

class HeaderGenerator:

70

def __init__(

71

self,

72

*,

73

browser_name: str | None = None,

74

browser_version: str | None = None,

75

operating_system: str | None = None,

76

device: str | None = None,

77

locale: str | None = None

78

): ...

79

80

def get_headers(

81

self,

82

*,

83

url: str | None = None,

84

method: HttpMethod = "GET",

85

**options: HeaderGeneratorOptions

86

) -> HttpHeaders:

87

"""

88

Generate realistic HTTP headers.

89

90

Args:

91

url: Target URL for headers

92

method: HTTP method

93

**options: Additional options

94

95

Returns:

96

HttpHeaders with realistic browser headers

97

"""

98

99

def get_fingerprint_headers(

100

self,

101

fingerprint: dict[str, any]

102

) -> HttpHeaders:

103

"""Generate headers from existing fingerprint data."""

104

```

105

106

### Configuration Types

107

108

Configuration classes for customizing fingerprint and header generation.

109

110

```python { .api }

111

class HeaderGeneratorOptions:

112

def __init__(

113

self,

114

*,

115

accept: str | None = None,

116

accept_encoding: str | None = None,

117

accept_language: str | None = None,

118

cache_control: str | None = None,

119

referer: str | None = None,

120

sec_fetch_dest: str | None = None,

121

sec_fetch_mode: str | None = None,

122

sec_fetch_site: str | None = None,

123

sec_fetch_user: str | None = None,

124

upgrade_insecure_requests: str | None = None,

125

user_agent: str | None = None

126

): ...

127

128

@property

129

def accept(self) -> str | None: ...

130

131

@property

132

def user_agent(self) -> str | None: ...

133

```

134

135

```python { .api }

136

class ScreenOptions:

137

def __init__(

138

self,

139

*,

140

width: int | None = None,

141

height: int | None = None,

142

pixel_ratio: float | None = None

143

): ...

144

145

@property

146

def width(self) -> int | None:

147

"""Screen width in pixels."""

148

149

@property

150

def height(self) -> int | None:

151

"""Screen height in pixels."""

152

153

@property

154

def pixel_ratio(self) -> float | None:

155

"""Device pixel ratio."""

156

```

157

158

### Fingerprint Data Types

159

160

Data structures containing generated fingerprint information.

161

162

```python { .api }

163

class BrowserFingerprintData:

164

user_agent: str

165

viewport: ViewportSize

166

screen: ScreenSize

167

headers: dict[str, str]

168

webgl_vendor: str | None

169

webgl_renderer: str | None

170

languages: list[str]

171

timezone: str

172

platform: str

173

cookie_enabled: bool

174

do_not_track: bool | None

175

plugins: list[PluginData]

176

```

177

178

```python { .api }

179

class ViewportSize:

180

width: int

181

height: int

182

```

183

184

```python { .api }

185

class ScreenSize:

186

width: int

187

height: int

188

available_width: int

189

available_height: int

190

color_depth: int

191

pixel_depth: int

192

```

193

194

```python { .api }

195

class PluginData:

196

name: str

197

filename: str

198

description: str

199

```

200

201

## Usage Examples

202

203

### Basic Fingerprint Generation

204

205

```python

206

import asyncio

207

from crawlee.fingerprint_suite import DefaultFingerprintGenerator

208

209

async def main():

210

# Create fingerprint generator

211

generator = DefaultFingerprintGenerator(

212

browser_name='chrome',

213

device_category='desktop',

214

operating_system='windows'

215

)

216

217

# Generate fingerprint

218

fingerprint = await generator.generate_fingerprint()

219

220

print(f"User Agent: {fingerprint.user_agent}")

221

print(f"Viewport: {fingerprint.viewport.width}x{fingerprint.viewport.height}")

222

print(f"Screen: {fingerprint.screen.width}x{fingerprint.screen.height}")

223

print(f"Platform: {fingerprint.platform}")

224

print(f"Languages: {fingerprint.languages}")

225

print(f"Timezone: {fingerprint.timezone}")

226

227

# Generate headers from fingerprint

228

headers = generator.get_headers(fingerprint)

229

print(f"Generated headers: {headers.to_dict()}")

230

231

asyncio.run(main())

232

```

233

234

### Header Generation

235

236

```python

237

from crawlee.fingerprint_suite import HeaderGenerator, HeaderGeneratorOptions

238

239

# Create header generator

240

generator = HeaderGenerator(

241

browser_name='chrome',

242

operating_system='macos',

243

locale='en-US'

244

)

245

246

# Generate headers for specific URL

247

headers = generator.get_headers(

248

url='https://example.com/api/data',

249

method='GET',

250

referer='https://example.com',

251

accept='application/json'

252

)

253

254

print("Generated headers:")

255

for key, value in headers.items():

256

print(f" {key}: {value}")

257

258

# Custom header options

259

options = HeaderGeneratorOptions(

260

accept='text/html,application/xhtml+xml',

261

accept_language='en-US,en;q=0.9',

262

cache_control='max-age=0',

263

sec_fetch_dest='document',

264

sec_fetch_mode='navigate'

265

)

266

267

headers = generator.get_headers(

268

url='https://example.com',

269

method='GET',

270

**options.__dict__

271

)

272

```

273

274

### Using with HTTP Crawler

275

276

```python

277

import asyncio

278

from crawlee.crawlers import HttpCrawler, HttpCrawlingContext

279

from crawlee.fingerprint_suite import DefaultFingerprintGenerator

280

281

async def main():

282

# Create fingerprint generator

283

fingerprint_generator = DefaultFingerprintGenerator(

284

browser_name='chrome',

285

device_category='mobile',

286

operating_system='android'

287

)

288

289

crawler = HttpCrawler()

290

291

@crawler.router.default_handler

292

async def handler(context: HttpCrawlingContext):

293

# Generate new fingerprint for each request

294

fingerprint = await fingerprint_generator.generate_fingerprint()

295

296

# Update request headers with fingerprint

297

headers = fingerprint_generator.get_headers(fingerprint)

298

299

# Log fingerprint info

300

context.log.info(f"Using fingerprint: {fingerprint.user_agent}")

301

context.log.info(f"Screen: {fingerprint.screen.width}x{fingerprint.screen.height}")

302

303

# Process response

304

data = {

305

'url': context.request.url,

306

'user_agent': fingerprint.user_agent,

307

'screen_size': f"{fingerprint.screen.width}x{fingerprint.screen.height}",

308

'status': context.response.status_code

309

}

310

311

await context.push_data(data)

312

313

await crawler.run(['https://httpbin.org/user-agent', 'https://httpbin.org/headers'])

314

315

asyncio.run(main())

316

```

317

318

### Session-Specific Fingerprints

319

320

```python

321

import asyncio

322

from crawlee.crawlers import HttpCrawler, HttpCrawlingContext

323

from crawlee.fingerprint_suite import DefaultFingerprintGenerator

324

from crawlee.sessions import SessionPool

325

326

async def main():

327

# Create fingerprint generator

328

generator = DefaultFingerprintGenerator()

329

330

# Create session pool

331

session_pool = SessionPool(max_pool_size=5)

332

333

crawler = HttpCrawler(

334

session_pool=session_pool,

335

use_session_pool=True

336

)

337

338

# Store fingerprints per session

339

session_fingerprints = {}

340

341

@crawler.router.default_handler

342

async def handler(context: HttpCrawlingContext):

343

session_id = context.session.id

344

345

# Generate fingerprint once per session

346

if session_id not in session_fingerprints:

347

fingerprint = await generator.generate_fingerprint()

348

session_fingerprints[session_id] = fingerprint

349

context.log.info(f"Generated new fingerprint for session {session_id}")

350

351

fingerprint = session_fingerprints[session_id]

352

353

# Use consistent fingerprint for this session

354

headers = generator.get_headers(fingerprint)

355

356

data = {

357

'url': context.request.url,

358

'session_id': session_id,

359

'user_agent': fingerprint.user_agent,

360

'consistent_fingerprint': True

361

}

362

363

await context.push_data(data)

364

365

# Multiple requests will reuse fingerprints per session

366

urls = ['https://httpbin.org/headers'] * 10

367

await crawler.run(urls)

368

369

asyncio.run(main())

370

```

371

372

### Custom Fingerprint Generator

373

374

```python

375

import asyncio

376

import random

377

from crawlee.fingerprint_suite import FingerprintGenerator, HeaderGenerator

378

379

class CustomFingerprintGenerator(FingerprintGenerator):

380

"""Custom fingerprint generator with specific characteristics."""

381

382

def __init__(self):

383

self.header_generator = HeaderGenerator()

384

self.user_agents = [

385

'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',

386

'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',

387

'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'

388

]

389

390

async def generate_fingerprint(self, **options) -> dict[str, any]:

391

"""Generate custom fingerprint with specific characteristics."""

392

393

# Select random user agent

394

user_agent = random.choice(self.user_agents)

395

396

# Define screen resolutions

397

screen_resolutions = [

398

(1920, 1080),

399

(1366, 768),

400

(1440, 900),

401

(1600, 900)

402

]

403

404

screen_width, screen_height = random.choice(screen_resolutions)

405

406

# Generate viewport (slightly smaller than screen)

407

viewport_width = screen_width - random.randint(0, 100)

408

viewport_height = screen_height - random.randint(100, 200)

409

410

fingerprint = {

411

'user_agent': user_agent,

412

'viewport': {

413

'width': viewport_width,

414

'height': viewport_height

415

},

416

'screen': {

417

'width': screen_width,

418

'height': screen_height,

419

'color_depth': 24,

420

'pixel_depth': 24

421

},

422

'languages': ['en-US', 'en'],

423

'timezone': random.choice(['America/New_York', 'Europe/London', 'America/Los_Angeles']),

424

'platform': self._get_platform_from_ua(user_agent),

425

'cookie_enabled': True,

426

'do_not_track': random.choice([None, False])

427

}

428

429

return fingerprint

430

431

def get_headers(self, fingerprint: dict[str, any]) -> dict[str, str]:

432

"""Generate headers from fingerprint."""

433

434

return {

435

'User-Agent': fingerprint['user_agent'],

436

'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',

437

'Accept-Language': ','.join(fingerprint['languages']) + ';q=0.9',

438

'Accept-Encoding': 'gzip, deflate, br',

439

'DNT': '1' if fingerprint.get('do_not_track') else '0',

440

'Connection': 'keep-alive',

441

'Upgrade-Insecure-Requests': '1',

442

'Sec-Fetch-Dest': 'document',

443

'Sec-Fetch-Mode': 'navigate',

444

'Sec-Fetch-Site': 'none',

445

'Cache-Control': 'max-age=0'

446

}

447

448

def _get_platform_from_ua(self, user_agent: str) -> str:

449

"""Extract platform from user agent."""

450

if 'Windows' in user_agent:

451

return 'Win32'

452

elif 'Macintosh' in user_agent:

453

return 'MacIntel'

454

elif 'Linux' in user_agent:

455

return 'Linux x86_64'

456

else:

457

return 'Unknown'

458

459

async def main():

460

# Use custom fingerprint generator

461

generator = CustomFingerprintGenerator()

462

463

# Generate multiple fingerprints

464

for i in range(3):

465

fingerprint = await generator.generate_fingerprint()

466

headers = generator.get_headers(fingerprint)

467

468

print(f"\nFingerprint {i+1}:")

469

print(f" User-Agent: {fingerprint['user_agent']}")

470

print(f" Screen: {fingerprint['screen']['width']}x{fingerprint['screen']['height']}")

471

print(f" Viewport: {fingerprint['viewport']['width']}x{fingerprint['viewport']['height']}")

472

print(f" Platform: {fingerprint['platform']}")

473

print(f" Timezone: {fingerprint['timezone']}")

474

print(f" Accept-Language: {headers.get('Accept-Language', 'N/A')}")

475

476

asyncio.run(main())

477

```

478

479

### Integration with Playwright

480

481

```python

482

import asyncio

483

from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext

484

from crawlee.fingerprint_suite import DefaultFingerprintGenerator

485

486

async def main():

487

generator = DefaultFingerprintGenerator()

488

489

crawler = PlaywrightCrawler()

490

491

@crawler.router.default_handler

492

async def handler(context: PlaywrightCrawlingContext):

493

page = context.page

494

495

# Generate fingerprint

496

fingerprint = await generator.generate_fingerprint()

497

498

# Apply fingerprint to browser page

499

await page.set_viewport_size({

500

'width': fingerprint.viewport.width,

501

'height': fingerprint.viewport.height

502

})

503

504

# Override JavaScript properties to match fingerprint

505

await page.add_init_script(f"""

506

// Override screen properties

507

Object.defineProperty(screen, 'width', {{ get: () => {fingerprint.screen.width} }});

508

Object.defineProperty(screen, 'height', {{ get: () => {fingerprint.screen.height} }});

509

Object.defineProperty(screen, 'availWidth', {{ get: () => {fingerprint.screen.width} }});

510

Object.defineProperty(screen, 'availHeight', {{ get: () => {fingerprint.screen.height - 40} }});

511

Object.defineProperty(screen, 'colorDepth', {{ get: () => {fingerprint.screen.color_depth} }});

512

513

// Override navigator properties

514

Object.defineProperty(navigator, 'languages', {{ get: () => {fingerprint.languages} }});

515

Object.defineProperty(navigator, 'platform', {{ get: () => '{fingerprint.platform}' }});

516

Object.defineProperty(navigator, 'cookieEnabled', {{ get: () => {str(fingerprint.cookie_enabled).lower()} }});

517

518

// Override timezone

519

Date.prototype.getTimezoneOffset = function() {{

520

return {random.randint(-720, 720)};

521

}};

522

""")

523

524

# Navigate with fingerprint applied

525

await page.goto(context.request.url)

526

527

# Extract data

528

data = {

529

'url': context.request.url,

530

'title': await page.title(),

531

'fingerprint_applied': True,

532

'viewport': f"{fingerprint.viewport.width}x{fingerprint.viewport.height}",

533

'user_agent': fingerprint.user_agent

534

}

535

536

await context.push_data(data)

537

538

await crawler.run(['https://httpbin.org/headers'])

539

540

asyncio.run(main())

541

```