0
# Proxy Management
1
2
Intelligent proxy rotation with multiple strategies, automatic failure detection, temporary banning of failed proxies, and success rate tracking for optimal performance. CloudScraper's proxy management helps distribute requests and avoid IP-based blocking.
3
4
## Capabilities
5
6
### ProxyManager Class
7
8
Core class that handles proxy rotation, failure tracking, and intelligent selection based on performance metrics.
9
10
```python { .api }
11
class ProxyManager:
12
def __init__(self, proxies, proxy_rotation_strategy: str = 'sequential', ban_time: int = 300):
13
"""
14
Initialize proxy manager with rotation strategy.
15
16
Parameters:
17
- proxies: list|dict, proxy URLs or dict mapping schemes to proxies
18
- proxy_rotation_strategy: str, rotation strategy ('sequential', 'random', 'smart')
19
- ban_time: int, time in seconds to ban failed proxies
20
"""
21
22
def get_proxy(self) -> dict:
23
"""
24
Get next proxy based on configured strategy.
25
26
Returns:
27
dict: Proxy configuration for requests (e.g., {'http': 'proxy_url', 'https': 'proxy_url'})
28
29
Raises:
30
- Exception: If no proxies are available
31
"""
32
33
def report_success(self, proxy: dict):
34
"""
35
Report successful proxy usage for smart rotation.
36
37
Parameters:
38
- proxy: dict, proxy configuration that succeeded
39
"""
40
41
def report_failure(self, proxy: dict):
42
"""
43
Report failed proxy usage for temporary banning.
44
45
Parameters:
46
- proxy: dict, proxy configuration that failed
47
"""
48
49
def add_proxy(self, proxy: str):
50
"""
51
Add a new proxy to the rotation pool.
52
53
Parameters:
54
- proxy: str, proxy URL to add to the pool
55
"""
56
57
def remove_proxy(self, proxy: str):
58
"""
59
Remove a proxy from the rotation pool.
60
61
Parameters:
62
- proxy: str, proxy URL to remove from the pool
63
"""
64
65
def get_stats(self) -> dict:
66
"""
67
Get proxy usage statistics and performance metrics.
68
69
Returns:
70
dict: Statistics including success rates, failure counts, and ban status
71
"""
72
73
def _format_proxy(self, proxy: str) -> dict:
74
"""
75
Format proxy URL as a requests-compatible proxy dict.
76
77
Parameters:
78
- proxy: str, proxy URL
79
80
Returns:
81
dict: Formatted proxy configuration for requests
82
"""
83
```
84
85
### Basic Proxy Configuration
86
87
Simple proxy setup with list of proxy URLs:
88
89
```python
90
# Single proxy
91
scraper = cloudscraper.create_scraper(
92
proxies={'http': 'http://proxy.example.com:8080', 'https': 'http://proxy.example.com:8080'}
93
)
94
95
# Proxy rotation with list of URLs
96
proxy_list = [
97
'http://user:pass@proxy1.example.com:8080',
98
'http://user:pass@proxy2.example.com:8080',
99
'http://user:pass@proxy3.example.com:8080'
100
]
101
102
scraper = cloudscraper.create_scraper(
103
rotating_proxies=proxy_list
104
)
105
106
# Default sequential rotation
107
response1 = scraper.get('https://httpbin.org/ip') # Uses proxy1
108
response2 = scraper.get('https://httpbin.org/ip') # Uses proxy2
109
response3 = scraper.get('https://httpbin.org/ip') # Uses proxy3
110
response4 = scraper.get('https://httpbin.org/ip') # Uses proxy1 again
111
```
112
113
### Advanced Proxy Configuration
114
115
Comprehensive proxy setup with rotation strategies and failure handling:
116
117
```python
118
# Advanced proxy configuration
119
scraper = cloudscraper.create_scraper(
120
rotating_proxies=[
121
'http://user:pass@proxy1.example.com:8080',
122
'http://user:pass@proxy2.example.com:8080',
123
'http://user:pass@proxy3.example.com:8080',
124
'http://user:pass@proxy4.example.com:8080'
125
],
126
proxy_options={
127
'rotation_strategy': 'smart', # Intelligent rotation based on success rate
128
'ban_time': 600 # Ban failed proxies for 10 minutes
129
}
130
)
131
132
# Conservative proxy settings for sensitive sites
133
scraper = cloudscraper.create_scraper(
134
rotating_proxies=proxy_list,
135
proxy_options={
136
'rotation_strategy': 'random', # Random selection
137
'ban_time': 1800 # Ban failed proxies for 30 minutes
138
}
139
)
140
```
141
142
## Rotation Strategies
143
144
### Sequential Rotation
145
146
Rotate through proxies in order, returning to the first after reaching the end:
147
148
```python
149
# Sequential rotation (default)
150
scraper = cloudscraper.create_scraper(
151
rotating_proxies=[
152
'http://proxy1.example.com:8080',
153
'http://proxy2.example.com:8080',
154
'http://proxy3.example.com:8080'
155
],
156
proxy_options={'rotation_strategy': 'sequential'}
157
)
158
159
# Predictable order: proxy1 -> proxy2 -> proxy3 -> proxy1 -> ...
160
for i in range(6):
161
response = scraper.get('https://httpbin.org/ip')
162
print(f"Request {i+1}: {response.json()['origin']}")
163
```
164
165
### Random Rotation
166
167
Randomly select from available proxies for each request:
168
169
```python
170
# Random rotation
171
scraper = cloudscraper.create_scraper(
172
rotating_proxies=[
173
'http://proxy1.example.com:8080',
174
'http://proxy2.example.com:8080',
175
'http://proxy3.example.com:8080'
176
],
177
proxy_options={'rotation_strategy': 'random'}
178
)
179
180
# Unpredictable order - good for avoiding patterns
181
for i in range(6):
182
response = scraper.get('https://httpbin.org/ip')
183
print(f"Request {i+1}: {response.json()['origin']}")
184
```
185
186
### Smart Rotation
187
188
Intelligent rotation based on proxy success rates and performance metrics:
189
190
```python
191
# Smart rotation (recommended)
192
scraper = cloudscraper.create_scraper(
193
rotating_proxies=[
194
'http://proxy1.example.com:8080', # Fast, reliable proxy
195
'http://proxy2.example.com:8080', # Slower proxy
196
'http://proxy3.example.com:8080', # Sometimes fails
197
],
198
proxy_options={
199
'rotation_strategy': 'smart', # Prefer better-performing proxies
200
'ban_time': 300 # 5 minute ban for failures
201
}
202
)
203
204
# Smart rotation learns which proxies work better
205
for i in range(10):
206
try:
207
response = scraper.get('https://httpbin.org/ip', timeout=10)
208
print(f"Request {i+1}: Success with {response.json()['origin']}")
209
except Exception as e:
210
print(f"Request {i+1}: Failed - {e}")
211
```
212
213
## Proxy Types and Formats
214
215
### HTTP/HTTPS Proxies
216
217
Standard HTTP and HTTPS proxy configurations:
218
219
```python
220
# HTTP proxy
221
http_proxies = [
222
'http://proxy.example.com:8080',
223
'http://user:password@proxy.example.com:8080'
224
]
225
226
# HTTPS proxy
227
https_proxies = [
228
'https://proxy.example.com:8080',
229
'https://user:password@proxy.example.com:8080'
230
]
231
232
# Both HTTP and HTTPS
233
mixed_proxies = [
234
'http://proxy1.example.com:8080',
235
'https://proxy2.example.com:8080'
236
]
237
238
scraper = cloudscraper.create_scraper(rotating_proxies=mixed_proxies)
239
```
240
241
### SOCKS Proxies
242
243
SOCKS4 and SOCKS5 proxy support:
244
245
```python
246
# SOCKS proxies
247
socks_proxies = [
248
'socks4://proxy.example.com:1080',
249
'socks5://user:pass@proxy.example.com:1080',
250
'socks5://proxy.example.com:1080'
251
]
252
253
scraper = cloudscraper.create_scraper(rotating_proxies=socks_proxies)
254
255
# Note: Requires PySocks or requests[socks]
256
# pip install requests[socks]
257
```
258
259
### Proxy Dictionary Format
260
261
Alternative proxy specification using dictionary format:
262
263
```python
264
# Dictionary format for complex proxy setups
265
proxy_configs = [
266
{
267
'http': 'http://user:pass@proxy1.example.com:8080',
268
'https': 'https://user:pass@proxy1.example.com:8080'
269
},
270
{
271
'http': 'socks5://proxy2.example.com:1080',
272
'https': 'socks5://proxy2.example.com:1080'
273
}
274
]
275
276
# Note: Pass as rotating_proxies parameter
277
scraper = cloudscraper.create_scraper(rotating_proxies=proxy_configs)
278
```
279
280
## Proxy Failure Handling
281
282
### Automatic Ban Management
283
284
Failed proxies are temporarily banned to avoid repeated failures:
285
286
```python
287
# Configure ban behavior
288
scraper = cloudscraper.create_scraper(
289
rotating_proxies=[
290
'http://reliable-proxy.example.com:8080',
291
'http://unreliable-proxy.example.com:8080',
292
'http://slow-proxy.example.com:8080'
293
],
294
proxy_options={
295
'rotation_strategy': 'smart',
296
'ban_time': 900 # Ban failed proxies for 15 minutes
297
},
298
debug=True # See proxy selection and banning
299
)
300
301
# Automatic handling of proxy failures
302
for i in range(10):
303
try:
304
response = scraper.get('https://httpbin.org/delay/2', timeout=5)
305
print(f"Success: {response.json()['origin']}")
306
except Exception as e:
307
print(f"Failed: {e}")
308
# Failed proxy is automatically banned
309
```
310
311
### Manual Proxy Management
312
313
Direct interaction with the proxy manager:
314
315
```python
316
# Access proxy manager directly
317
scraper = cloudscraper.create_scraper(
318
rotating_proxies=['http://proxy1.com:8080', 'http://proxy2.com:8080']
319
)
320
321
# Get current proxy
322
current_proxy = scraper.proxy_manager.get_proxy()
323
print(f"Current proxy: {current_proxy}")
324
325
# Report success/failure manually
326
try:
327
response = scraper.get('https://httpbin.org/ip')
328
scraper.proxy_manager.report_success(current_proxy)
329
except Exception:
330
scraper.proxy_manager.report_failure(current_proxy)
331
```
332
333
### Proxy Health Monitoring
334
335
Monitor proxy performance and health:
336
337
```python
338
def monitor_proxy_health(scraper, test_url='https://httpbin.org/ip', rounds=10):
339
"""Monitor proxy performance over multiple requests."""
340
proxy_stats = {}
341
342
for i in range(rounds):
343
current_proxy = scraper.proxy_manager.get_proxy()
344
proxy_id = str(current_proxy)
345
346
if proxy_id not in proxy_stats:
347
proxy_stats[proxy_id] = {'success': 0, 'failure': 0, 'total_time': 0}
348
349
start_time = time.time()
350
try:
351
response = scraper.get(test_url, timeout=10)
352
end_time = time.time()
353
354
if response.status_code == 200:
355
proxy_stats[proxy_id]['success'] += 1
356
proxy_stats[proxy_id]['total_time'] += (end_time - start_time)
357
print(f"✅ {proxy_id}: Success in {end_time - start_time:.2f}s")
358
else:
359
proxy_stats[proxy_id]['failure'] += 1
360
print(f"❌ {proxy_id}: HTTP {response.status_code}")
361
362
except Exception as e:
363
proxy_stats[proxy_id]['failure'] += 1
364
print(f"❌ {proxy_id}: {e}")
365
366
# Calculate statistics
367
for proxy_id, stats in proxy_stats.items():
368
total = stats['success'] + stats['failure']
369
success_rate = (stats['success'] / total) * 100 if total > 0 else 0
370
avg_time = stats['total_time'] / stats['success'] if stats['success'] > 0 else 0
371
372
print(f"\n{proxy_id}:")
373
print(f" Success rate: {success_rate:.1f}%")
374
print(f" Average response time: {avg_time:.2f}s")
375
376
return proxy_stats
377
378
# Monitor proxy health
379
import time
380
scraper = cloudscraper.create_scraper(
381
rotating_proxies=[
382
'http://proxy1.example.com:8080',
383
'http://proxy2.example.com:8080'
384
],
385
proxy_options={'rotation_strategy': 'smart'}
386
)
387
388
stats = monitor_proxy_health(scraper)
389
```
390
391
## Proxy Integration with Other Features
392
393
### Proxies with CAPTCHA Solving
394
395
Configure how proxies interact with CAPTCHA solving services:
396
397
```python
398
# Forward proxy information to CAPTCHA service (recommended)
399
scraper = cloudscraper.create_scraper(
400
rotating_proxies=[
401
'http://user:pass@proxy1.example.com:8080',
402
'http://user:pass@proxy2.example.com:8080'
403
],
404
captcha={
405
'provider': '2captcha',
406
'api_key': 'your_api_key',
407
'no_proxy': False # Send proxy info to 2captcha for accuracy
408
}
409
)
410
411
# Don't forward proxy to CAPTCHA service (faster but less accurate)
412
scraper = cloudscraper.create_scraper(
413
rotating_proxies=proxy_list,
414
captcha={
415
'provider': '2captcha',
416
'api_key': 'your_api_key',
417
'no_proxy': True # Solve CAPTCHA without proxy
418
}
419
)
420
```
421
422
### Proxies with Stealth Mode
423
424
Combine proxy rotation with stealth techniques:
425
426
```python
427
# Proxies + stealth mode for maximum anonymity
428
scraper = cloudscraper.create_scraper(
429
rotating_proxies=[
430
'http://proxy1.example.com:8080',
431
'http://proxy2.example.com:8080',
432
'http://proxy3.example.com:8080'
433
],
434
proxy_options={
435
'rotation_strategy': 'random', # Random proxy selection
436
'ban_time': 600
437
},
438
enable_stealth=True,
439
stealth_options={
440
'min_delay': 2.0,
441
'max_delay': 6.0,
442
'human_like_delays': True,
443
'randomize_headers': True
444
}
445
)
446
447
# Each request uses different proxy + stealth techniques
448
for i in range(5):
449
response = scraper.get('https://httpbin.org/headers')
450
headers = response.json()['headers']
451
print(f"Request {i+1}:")
452
print(f" Origin: {response.json().get('origin', 'N/A')}")
453
print(f" User-Agent: {headers.get('User-Agent', '')[:50]}...")
454
```
455
456
### Geographic Proxy Distribution
457
458
Use proxies from different geographic locations:
459
460
```python
461
# Geographic proxy distribution
462
geo_proxies = [
463
'http://user:pass@us-proxy1.example.com:8080', # US East
464
'http://user:pass@us-proxy2.example.com:8080', # US West
465
'http://user:pass@eu-proxy1.example.com:8080', # Europe
466
'http://user:pass@asia-proxy1.example.com:8080', # Asia
467
]
468
469
scraper = cloudscraper.create_scraper(
470
rotating_proxies=geo_proxies,
471
proxy_options={
472
'rotation_strategy': 'random', # Random geographic distribution
473
'ban_time': 300
474
}
475
)
476
477
# Test geographic distribution
478
for i in range(8):
479
response = scraper.get('https://httpbin.org/ip')
480
print(f"Request {i+1}: {response.json()['origin']}")
481
```
482
483
## Proxy Authentication
484
485
### Basic Authentication
486
487
HTTP Basic authentication for proxy access:
488
489
```python
490
# Username/password in URL
491
authenticated_proxies = [
492
'http://username:password@proxy1.example.com:8080',
493
'http://user2:pass2@proxy2.example.com:8080'
494
]
495
496
scraper = cloudscraper.create_scraper(rotating_proxies=authenticated_proxies)
497
498
# URL encoding for special characters in credentials
499
import urllib.parse
500
501
username = 'user@domain.com'
502
password = 'p@ssw0rd!'
503
encoded_user = urllib.parse.quote(username)
504
encoded_pass = urllib.parse.quote(password)
505
506
proxy_url = f'http://{encoded_user}:{encoded_pass}@proxy.example.com:8080'
507
scraper = cloudscraper.create_scraper(rotating_proxies=[proxy_url])
508
```
509
510
### Advanced Proxy Authentication
511
512
Custom authentication methods for enterprise proxies:
513
514
```python
515
# Custom authentication headers (if supported by proxy)
516
import requests
517
from requests.auth import HTTPProxyAuth
518
519
# Manual proxy configuration with custom auth
520
session = requests.Session()
521
session.proxies = {'http': 'http://proxy.example.com:8080'}
522
session.auth = HTTPProxyAuth('username', 'password')
523
524
scraper = cloudscraper.create_scraper(sess=session)
525
526
# Or with custom authentication headers
527
scraper = cloudscraper.create_scraper()
528
scraper.headers.update({
529
'Proxy-Authorization': 'Basic base64encodedcreds'
530
})
531
scraper.proxies = {'http': 'http://proxy.example.com:8080'}
532
```
533
534
## Troubleshooting Proxies
535
536
### Common Proxy Issues
537
538
Handle common proxy-related problems:
539
540
```python
541
# Comprehensive proxy error handling
542
def robust_proxy_request(scraper, url, max_retries=3):
543
"""Make request with proxy error handling."""
544
for attempt in range(max_retries):
545
try:
546
response = scraper.get(url, timeout=30)
547
if response.status_code == 200:
548
return response
549
else:
550
print(f"HTTP {response.status_code} on attempt {attempt + 1}")
551
552
except requests.exceptions.ProxyError as e:
553
print(f"Proxy error on attempt {attempt + 1}: {e}")
554
# Proxy manager will automatically try next proxy
555
556
except requests.exceptions.ConnectTimeout as e:
557
print(f"Connection timeout on attempt {attempt + 1}: {e}")
558
559
except requests.exceptions.ReadTimeout as e:
560
print(f"Read timeout on attempt {attempt + 1}: {e}")
561
562
except Exception as e:
563
print(f"Unexpected error on attempt {attempt + 1}: {e}")
564
565
raise Exception(f"Failed after {max_retries} attempts")
566
567
# Usage with robust error handling
568
scraper = cloudscraper.create_scraper(
569
rotating_proxies=proxy_list,
570
proxy_options={'rotation_strategy': 'smart', 'ban_time': 300}
571
)
572
573
try:
574
response = robust_proxy_request(scraper, 'https://httpbin.org/ip')
575
print(f"Success: {response.json()['origin']}")
576
except Exception as e:
577
print(f"All proxy attempts failed: {e}")
578
```
579
580
### Proxy Testing and Validation
581
582
Test proxy functionality before use:
583
584
```python
585
def test_proxy_list(proxy_list, test_url='https://httpbin.org/ip'):
586
"""Test a list of proxies for functionality."""
587
working_proxies = []
588
failed_proxies = []
589
590
for proxy_url in proxy_list:
591
try:
592
test_scraper = cloudscraper.create_scraper(
593
proxies={'http': proxy_url, 'https': proxy_url}
594
)
595
596
response = test_scraper.get(test_url, timeout=10)
597
if response.status_code == 200:
598
origin_ip = response.json().get('origin', 'Unknown')
599
working_proxies.append((proxy_url, origin_ip))
600
print(f"✅ {proxy_url} -> {origin_ip}")
601
else:
602
failed_proxies.append((proxy_url, f"HTTP {response.status_code}"))
603
print(f"❌ {proxy_url} -> HTTP {response.status_code}")
604
605
except Exception as e:
606
failed_proxies.append((proxy_url, str(e)))
607
print(f"❌ {proxy_url} -> {e}")
608
609
return working_proxies, failed_proxies
610
611
# Test proxy list before using
612
proxy_list = [
613
'http://proxy1.example.com:8080',
614
'http://proxy2.example.com:8080',
615
'http://broken-proxy.example.com:8080'
616
]
617
618
working, failed = test_proxy_list(proxy_list)
619
print(f"\nWorking proxies: {len(working)}")
620
print(f"Failed proxies: {len(failed)}")
621
622
# Use only working proxies
623
if working:
624
working_proxy_urls = [proxy[0] for proxy in working]
625
scraper = cloudscraper.create_scraper(rotating_proxies=working_proxy_urls)
626
```
627
628
### Proxy Performance Optimization
629
630
Optimize proxy settings for different use cases:
631
632
```python
633
# High-throughput scraping
634
fast_scraper = cloudscraper.create_scraper(
635
rotating_proxies=proxy_list,
636
proxy_options={
637
'rotation_strategy': 'random', # Distribute load
638
'ban_time': 60 # Quick recovery from bans
639
},
640
enable_stealth=True,
641
stealth_options={
642
'min_delay': 0.1, # Minimal delays
643
'max_delay': 0.5
644
}
645
)
646
647
# Cautious scraping for sensitive sites
648
careful_scraper = cloudscraper.create_scraper(
649
rotating_proxies=proxy_list,
650
proxy_options={
651
'rotation_strategy': 'smart', # Use best-performing proxies
652
'ban_time': 1800 # Long ban time for failures
653
},
654
enable_stealth=True,
655
stealth_options={
656
'min_delay': 5.0, # Conservative delays
657
'max_delay': 15.0
658
}
659
)
660
```