or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

captcha-solving.mdchallenge-handling.mdcore-scraper.mdindex.mdjavascript-interpreters.mdproxy-management.mdstealth-mode.mduser-agent.md

proxy-management.mddocs/

0

# Proxy Management

1

2

Intelligent proxy rotation with multiple strategies, automatic failure detection, temporary banning of failed proxies, and success rate tracking for optimal performance. CloudScraper's proxy management helps distribute requests and avoid IP-based blocking.

3

4

## Capabilities

5

6

### ProxyManager Class

7

8

Core class that handles proxy rotation, failure tracking, and intelligent selection based on performance metrics.

9

10

```python { .api }

11

class ProxyManager:

12

def __init__(self, proxies, proxy_rotation_strategy: str = 'sequential', ban_time: int = 300):

13

"""

14

Initialize proxy manager with rotation strategy.

15

16

Parameters:

17

- proxies: list|dict, proxy URLs or dict mapping schemes to proxies

18

- proxy_rotation_strategy: str, rotation strategy ('sequential', 'random', 'smart')

19

- ban_time: int, time in seconds to ban failed proxies

20

"""

21

22

def get_proxy(self) -> dict:

23

"""

24

Get next proxy based on configured strategy.

25

26

Returns:

27

dict: Proxy configuration for requests (e.g., {'http': 'proxy_url', 'https': 'proxy_url'})

28

29

Raises:

30

- Exception: If no proxies are available

31

"""

32

33

def report_success(self, proxy: dict):

34

"""

35

Report successful proxy usage for smart rotation.

36

37

Parameters:

38

- proxy: dict, proxy configuration that succeeded

39

"""

40

41

def report_failure(self, proxy: dict):

42

"""

43

Report failed proxy usage for temporary banning.

44

45

Parameters:

46

- proxy: dict, proxy configuration that failed

47

"""

48

49

def add_proxy(self, proxy: str):

50

"""

51

Add a new proxy to the rotation pool.

52

53

Parameters:

54

- proxy: str, proxy URL to add to the pool

55

"""

56

57

def remove_proxy(self, proxy: str):

58

"""

59

Remove a proxy from the rotation pool.

60

61

Parameters:

62

- proxy: str, proxy URL to remove from the pool

63

"""

64

65

def get_stats(self) -> dict:

66

"""

67

Get proxy usage statistics and performance metrics.

68

69

Returns:

70

dict: Statistics including success rates, failure counts, and ban status

71

"""

72

73

def _format_proxy(self, proxy: str) -> dict:

74

"""

75

Format proxy URL as a requests-compatible proxy dict.

76

77

Parameters:

78

- proxy: str, proxy URL

79

80

Returns:

81

dict: Formatted proxy configuration for requests

82

"""

83

```

84

85

### Basic Proxy Configuration

86

87

Simple proxy setup with list of proxy URLs:

88

89

```python

90

# Single proxy

91

scraper = cloudscraper.create_scraper(

92

proxies={'http': 'http://proxy.example.com:8080', 'https': 'http://proxy.example.com:8080'}

93

)

94

95

# Proxy rotation with list of URLs

96

proxy_list = [

97

'http://user:pass@proxy1.example.com:8080',

98

'http://user:pass@proxy2.example.com:8080',

99

'http://user:pass@proxy3.example.com:8080'

100

]

101

102

scraper = cloudscraper.create_scraper(

103

rotating_proxies=proxy_list

104

)

105

106

# Default sequential rotation

107

response1 = scraper.get('https://httpbin.org/ip') # Uses proxy1

108

response2 = scraper.get('https://httpbin.org/ip') # Uses proxy2

109

response3 = scraper.get('https://httpbin.org/ip') # Uses proxy3

110

response4 = scraper.get('https://httpbin.org/ip') # Uses proxy1 again

111

```

112

113

### Advanced Proxy Configuration

114

115

Comprehensive proxy setup with rotation strategies and failure handling:

116

117

```python

118

# Advanced proxy configuration

119

scraper = cloudscraper.create_scraper(

120

rotating_proxies=[

121

'http://user:pass@proxy1.example.com:8080',

122

'http://user:pass@proxy2.example.com:8080',

123

'http://user:pass@proxy3.example.com:8080',

124

'http://user:pass@proxy4.example.com:8080'

125

],

126

proxy_options={

127

'rotation_strategy': 'smart', # Intelligent rotation based on success rate

128

'ban_time': 600 # Ban failed proxies for 10 minutes

129

}

130

)

131

132

# Conservative proxy settings for sensitive sites

133

scraper = cloudscraper.create_scraper(

134

rotating_proxies=proxy_list,

135

proxy_options={

136

'rotation_strategy': 'random', # Random selection

137

'ban_time': 1800 # Ban failed proxies for 30 minutes

138

}

139

)

140

```

141

142

## Rotation Strategies

143

144

### Sequential Rotation

145

146

Rotate through proxies in order, returning to the first after reaching the end:

147

148

```python

149

# Sequential rotation (default)

150

scraper = cloudscraper.create_scraper(

151

rotating_proxies=[

152

'http://proxy1.example.com:8080',

153

'http://proxy2.example.com:8080',

154

'http://proxy3.example.com:8080'

155

],

156

proxy_options={'rotation_strategy': 'sequential'}

157

)

158

159

# Predictable order: proxy1 -> proxy2 -> proxy3 -> proxy1 -> ...

160

for i in range(6):

161

response = scraper.get('https://httpbin.org/ip')

162

print(f"Request {i+1}: {response.json()['origin']}")

163

```

164

165

### Random Rotation

166

167

Randomly select from available proxies for each request:

168

169

```python

170

# Random rotation

171

scraper = cloudscraper.create_scraper(

172

rotating_proxies=[

173

'http://proxy1.example.com:8080',

174

'http://proxy2.example.com:8080',

175

'http://proxy3.example.com:8080'

176

],

177

proxy_options={'rotation_strategy': 'random'}

178

)

179

180

# Unpredictable order - good for avoiding patterns

181

for i in range(6):

182

response = scraper.get('https://httpbin.org/ip')

183

print(f"Request {i+1}: {response.json()['origin']}")

184

```

185

186

### Smart Rotation

187

188

Intelligent rotation based on proxy success rates and performance metrics:

189

190

```python

191

# Smart rotation (recommended)

192

scraper = cloudscraper.create_scraper(

193

rotating_proxies=[

194

'http://proxy1.example.com:8080', # Fast, reliable proxy

195

'http://proxy2.example.com:8080', # Slower proxy

196

'http://proxy3.example.com:8080', # Sometimes fails

197

],

198

proxy_options={

199

'rotation_strategy': 'smart', # Prefer better-performing proxies

200

'ban_time': 300 # 5 minute ban for failures

201

}

202

)

203

204

# Smart rotation learns which proxies work better

205

for i in range(10):

206

try:

207

response = scraper.get('https://httpbin.org/ip', timeout=10)

208

print(f"Request {i+1}: Success with {response.json()['origin']}")

209

except Exception as e:

210

print(f"Request {i+1}: Failed - {e}")

211

```

212

213

## Proxy Types and Formats

214

215

### HTTP/HTTPS Proxies

216

217

Standard HTTP and HTTPS proxy configurations:

218

219

```python

220

# HTTP proxy

221

http_proxies = [

222

'http://proxy.example.com:8080',

223

'http://user:password@proxy.example.com:8080'

224

]

225

226

# HTTPS proxy

227

https_proxies = [

228

'https://proxy.example.com:8080',

229

'https://user:password@proxy.example.com:8080'

230

]

231

232

# Both HTTP and HTTPS

233

mixed_proxies = [

234

'http://proxy1.example.com:8080',

235

'https://proxy2.example.com:8080'

236

]

237

238

scraper = cloudscraper.create_scraper(rotating_proxies=mixed_proxies)

239

```

240

241

### SOCKS Proxies

242

243

SOCKS4 and SOCKS5 proxy support:

244

245

```python

246

# SOCKS proxies

247

socks_proxies = [

248

'socks4://proxy.example.com:1080',

249

'socks5://user:pass@proxy.example.com:1080',

250

'socks5://proxy.example.com:1080'

251

]

252

253

scraper = cloudscraper.create_scraper(rotating_proxies=socks_proxies)

254

255

# Note: Requires PySocks or requests[socks]

256

# pip install requests[socks]

257

```

258

259

### Proxy Dictionary Format

260

261

Alternative proxy specification using dictionary format:

262

263

```python

264

# Dictionary format for complex proxy setups

265

proxy_configs = [

266

{

267

'http': 'http://user:pass@proxy1.example.com:8080',

268

'https': 'https://user:pass@proxy1.example.com:8080'

269

},

270

{

271

'http': 'socks5://proxy2.example.com:1080',

272

'https': 'socks5://proxy2.example.com:1080'

273

}

274

]

275

276

# Note: Pass as rotating_proxies parameter

277

scraper = cloudscraper.create_scraper(rotating_proxies=proxy_configs)

278

```

279

280

## Proxy Failure Handling

281

282

### Automatic Ban Management

283

284

Failed proxies are temporarily banned to avoid repeated failures:

285

286

```python

287

# Configure ban behavior

288

scraper = cloudscraper.create_scraper(

289

rotating_proxies=[

290

'http://reliable-proxy.example.com:8080',

291

'http://unreliable-proxy.example.com:8080',

292

'http://slow-proxy.example.com:8080'

293

],

294

proxy_options={

295

'rotation_strategy': 'smart',

296

'ban_time': 900 # Ban failed proxies for 15 minutes

297

},

298

debug=True # See proxy selection and banning

299

)

300

301

# Automatic handling of proxy failures

302

for i in range(10):

303

try:

304

response = scraper.get('https://httpbin.org/delay/2', timeout=5)

305

print(f"Success: {response.json()['origin']}")

306

except Exception as e:

307

print(f"Failed: {e}")

308

# Failed proxy is automatically banned

309

```

310

311

### Manual Proxy Management

312

313

Direct interaction with the proxy manager:

314

315

```python

316

# Access proxy manager directly

317

scraper = cloudscraper.create_scraper(

318

rotating_proxies=['http://proxy1.com:8080', 'http://proxy2.com:8080']

319

)

320

321

# Get current proxy

322

current_proxy = scraper.proxy_manager.get_proxy()

323

print(f"Current proxy: {current_proxy}")

324

325

# Report success/failure manually

326

try:

327

response = scraper.get('https://httpbin.org/ip')

328

scraper.proxy_manager.report_success(current_proxy)

329

except Exception:

330

scraper.proxy_manager.report_failure(current_proxy)

331

```

332

333

### Proxy Health Monitoring

334

335

Monitor proxy performance and health:

336

337

```python

338

def monitor_proxy_health(scraper, test_url='https://httpbin.org/ip', rounds=10):

339

"""Monitor proxy performance over multiple requests."""

340

proxy_stats = {}

341

342

for i in range(rounds):

343

current_proxy = scraper.proxy_manager.get_proxy()

344

proxy_id = str(current_proxy)

345

346

if proxy_id not in proxy_stats:

347

proxy_stats[proxy_id] = {'success': 0, 'failure': 0, 'total_time': 0}

348

349

start_time = time.time()

350

try:

351

response = scraper.get(test_url, timeout=10)

352

end_time = time.time()

353

354

if response.status_code == 200:

355

proxy_stats[proxy_id]['success'] += 1

356

proxy_stats[proxy_id]['total_time'] += (end_time - start_time)

357

print(f"✅ {proxy_id}: Success in {end_time - start_time:.2f}s")

358

else:

359

proxy_stats[proxy_id]['failure'] += 1

360

print(f"❌ {proxy_id}: HTTP {response.status_code}")

361

362

except Exception as e:

363

proxy_stats[proxy_id]['failure'] += 1

364

print(f"❌ {proxy_id}: {e}")

365

366

# Calculate statistics

367

for proxy_id, stats in proxy_stats.items():

368

total = stats['success'] + stats['failure']

369

success_rate = (stats['success'] / total) * 100 if total > 0 else 0

370

avg_time = stats['total_time'] / stats['success'] if stats['success'] > 0 else 0

371

372

print(f"\n{proxy_id}:")

373

print(f" Success rate: {success_rate:.1f}%")

374

print(f" Average response time: {avg_time:.2f}s")

375

376

return proxy_stats

377

378

# Monitor proxy health

379

import time

380

scraper = cloudscraper.create_scraper(

381

rotating_proxies=[

382

'http://proxy1.example.com:8080',

383

'http://proxy2.example.com:8080'

384

],

385

proxy_options={'rotation_strategy': 'smart'}

386

)

387

388

stats = monitor_proxy_health(scraper)

389

```

390

391

## Proxy Integration with Other Features

392

393

### Proxies with CAPTCHA Solving

394

395

Configure how proxies interact with CAPTCHA solving services:

396

397

```python

398

# Forward proxy information to CAPTCHA service (recommended)

399

scraper = cloudscraper.create_scraper(

400

rotating_proxies=[

401

'http://user:pass@proxy1.example.com:8080',

402

'http://user:pass@proxy2.example.com:8080'

403

],

404

captcha={

405

'provider': '2captcha',

406

'api_key': 'your_api_key',

407

'no_proxy': False # Send proxy info to 2captcha for accuracy

408

}

409

)

410

411

# Don't forward proxy to CAPTCHA service (faster but less accurate)

412

scraper = cloudscraper.create_scraper(

413

rotating_proxies=proxy_list,

414

captcha={

415

'provider': '2captcha',

416

'api_key': 'your_api_key',

417

'no_proxy': True # Solve CAPTCHA without proxy

418

}

419

)

420

```

421

422

### Proxies with Stealth Mode

423

424

Combine proxy rotation with stealth techniques:

425

426

```python

427

# Proxies + stealth mode for maximum anonymity

428

scraper = cloudscraper.create_scraper(

429

rotating_proxies=[

430

'http://proxy1.example.com:8080',

431

'http://proxy2.example.com:8080',

432

'http://proxy3.example.com:8080'

433

],

434

proxy_options={

435

'rotation_strategy': 'random', # Random proxy selection

436

'ban_time': 600

437

},

438

enable_stealth=True,

439

stealth_options={

440

'min_delay': 2.0,

441

'max_delay': 6.0,

442

'human_like_delays': True,

443

'randomize_headers': True

444

}

445

)

446

447

# Each request uses different proxy + stealth techniques

448

for i in range(5):

449

response = scraper.get('https://httpbin.org/headers')

450

headers = response.json()['headers']

451

print(f"Request {i+1}:")

452

print(f" Origin: {response.json().get('origin', 'N/A')}")

453

print(f" User-Agent: {headers.get('User-Agent', '')[:50]}...")

454

```

455

456

### Geographic Proxy Distribution

457

458

Use proxies from different geographic locations:

459

460

```python

461

# Geographic proxy distribution

462

geo_proxies = [

463

'http://user:pass@us-proxy1.example.com:8080', # US East

464

'http://user:pass@us-proxy2.example.com:8080', # US West

465

'http://user:pass@eu-proxy1.example.com:8080', # Europe

466

'http://user:pass@asia-proxy1.example.com:8080', # Asia

467

]

468

469

scraper = cloudscraper.create_scraper(

470

rotating_proxies=geo_proxies,

471

proxy_options={

472

'rotation_strategy': 'random', # Random geographic distribution

473

'ban_time': 300

474

}

475

)

476

477

# Test geographic distribution

478

for i in range(8):

479

response = scraper.get('https://httpbin.org/ip')

480

print(f"Request {i+1}: {response.json()['origin']}")

481

```

482

483

## Proxy Authentication

484

485

### Basic Authentication

486

487

HTTP Basic authentication for proxy access:

488

489

```python

490

# Username/password in URL

491

authenticated_proxies = [

492

'http://username:password@proxy1.example.com:8080',

493

'http://user2:pass2@proxy2.example.com:8080'

494

]

495

496

scraper = cloudscraper.create_scraper(rotating_proxies=authenticated_proxies)

497

498

# URL encoding for special characters in credentials

499

import urllib.parse

500

501

username = 'user@domain.com'

502

password = 'p@ssw0rd!'

503

encoded_user = urllib.parse.quote(username)

504

encoded_pass = urllib.parse.quote(password)

505

506

proxy_url = f'http://{encoded_user}:{encoded_pass}@proxy.example.com:8080'

507

scraper = cloudscraper.create_scraper(rotating_proxies=[proxy_url])

508

```

509

510

### Advanced Proxy Authentication

511

512

Custom authentication methods for enterprise proxies:

513

514

```python

515

# Custom authentication headers (if supported by proxy)

516

import requests

517

from requests.auth import HTTPProxyAuth

518

519

# Manual proxy configuration with custom auth

520

session = requests.Session()

521

session.proxies = {'http': 'http://proxy.example.com:8080'}

522

session.auth = HTTPProxyAuth('username', 'password')

523

524

scraper = cloudscraper.create_scraper(sess=session)

525

526

# Or with custom authentication headers

527

scraper = cloudscraper.create_scraper()

528

scraper.headers.update({

529

'Proxy-Authorization': 'Basic base64encodedcreds'

530

})

531

scraper.proxies = {'http': 'http://proxy.example.com:8080'}

532

```

533

534

## Troubleshooting Proxies

535

536

### Common Proxy Issues

537

538

Handle common proxy-related problems:

539

540

```python

541

# Comprehensive proxy error handling

542

def robust_proxy_request(scraper, url, max_retries=3):

543

"""Make request with proxy error handling."""

544

for attempt in range(max_retries):

545

try:

546

response = scraper.get(url, timeout=30)

547

if response.status_code == 200:

548

return response

549

else:

550

print(f"HTTP {response.status_code} on attempt {attempt + 1}")

551

552

except requests.exceptions.ProxyError as e:

553

print(f"Proxy error on attempt {attempt + 1}: {e}")

554

# Proxy manager will automatically try next proxy

555

556

except requests.exceptions.ConnectTimeout as e:

557

print(f"Connection timeout on attempt {attempt + 1}: {e}")

558

559

except requests.exceptions.ReadTimeout as e:

560

print(f"Read timeout on attempt {attempt + 1}: {e}")

561

562

except Exception as e:

563

print(f"Unexpected error on attempt {attempt + 1}: {e}")

564

565

raise Exception(f"Failed after {max_retries} attempts")

566

567

# Usage with robust error handling

568

scraper = cloudscraper.create_scraper(

569

rotating_proxies=proxy_list,

570

proxy_options={'rotation_strategy': 'smart', 'ban_time': 300}

571

)

572

573

try:

574

response = robust_proxy_request(scraper, 'https://httpbin.org/ip')

575

print(f"Success: {response.json()['origin']}")

576

except Exception as e:

577

print(f"All proxy attempts failed: {e}")

578

```

579

580

### Proxy Testing and Validation

581

582

Test proxy functionality before use:

583

584

```python

585

def test_proxy_list(proxy_list, test_url='https://httpbin.org/ip'):

586

"""Test a list of proxies for functionality."""

587

working_proxies = []

588

failed_proxies = []

589

590

for proxy_url in proxy_list:

591

try:

592

test_scraper = cloudscraper.create_scraper(

593

proxies={'http': proxy_url, 'https': proxy_url}

594

)

595

596

response = test_scraper.get(test_url, timeout=10)

597

if response.status_code == 200:

598

origin_ip = response.json().get('origin', 'Unknown')

599

working_proxies.append((proxy_url, origin_ip))

600

print(f"✅ {proxy_url} -> {origin_ip}")

601

else:

602

failed_proxies.append((proxy_url, f"HTTP {response.status_code}"))

603

print(f"❌ {proxy_url} -> HTTP {response.status_code}")

604

605

except Exception as e:

606

failed_proxies.append((proxy_url, str(e)))

607

print(f"❌ {proxy_url} -> {e}")

608

609

return working_proxies, failed_proxies

610

611

# Test proxy list before using

612

proxy_list = [

613

'http://proxy1.example.com:8080',

614

'http://proxy2.example.com:8080',

615

'http://broken-proxy.example.com:8080'

616

]

617

618

working, failed = test_proxy_list(proxy_list)

619

print(f"\nWorking proxies: {len(working)}")

620

print(f"Failed proxies: {len(failed)}")

621

622

# Use only working proxies

623

if working:

624

working_proxy_urls = [proxy[0] for proxy in working]

625

scraper = cloudscraper.create_scraper(rotating_proxies=working_proxy_urls)

626

```

627

628

### Proxy Performance Optimization

629

630

Optimize proxy settings for different use cases:

631

632

```python

633

# High-throughput scraping

634

fast_scraper = cloudscraper.create_scraper(

635

rotating_proxies=proxy_list,

636

proxy_options={

637

'rotation_strategy': 'random', # Distribute load

638

'ban_time': 60 # Quick recovery from bans

639

},

640

enable_stealth=True,

641

stealth_options={

642

'min_delay': 0.1, # Minimal delays

643

'max_delay': 0.5

644

}

645

)

646

647

# Cautious scraping for sensitive sites

648

careful_scraper = cloudscraper.create_scraper(

649

rotating_proxies=proxy_list,

650

proxy_options={

651

'rotation_strategy': 'smart', # Use best-performing proxies

652

'ban_time': 1800 # Long ban time for failures

653

},

654

enable_stealth=True,

655

stealth_options={

656

'min_delay': 5.0, # Conservative delays

657

'max_delay': 15.0

658

}

659

)

660

```