Enhanced Python module to bypass Cloudflare's anti-bot page with support for v1, v2, v3 challenges, Turnstile, proxy rotation, and stealth mode.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Intelligent proxy rotation with multiple strategies, automatic failure detection, temporary banning of failed proxies, and success rate tracking for optimal performance. CloudScraper's proxy management helps distribute requests and avoid IP-based blocking.
Core class that handles proxy rotation, failure tracking, and intelligent selection based on performance metrics.
class ProxyManager:
def __init__(self, proxies, proxy_rotation_strategy: str = 'sequential', ban_time: int = 300):
"""
Initialize proxy manager with rotation strategy.
Parameters:
- proxies: list|dict, proxy URLs or dict mapping schemes to proxies
- proxy_rotation_strategy: str, rotation strategy ('sequential', 'random', 'smart')
- ban_time: int, time in seconds to ban failed proxies
"""
def get_proxy(self) -> dict:
"""
Get next proxy based on configured strategy.
Returns:
dict: Proxy configuration for requests (e.g., {'http': 'proxy_url', 'https': 'proxy_url'})
Raises:
- Exception: If no proxies are available
"""
def report_success(self, proxy: dict):
"""
Report successful proxy usage for smart rotation.
Parameters:
- proxy: dict, proxy configuration that succeeded
"""
def report_failure(self, proxy: dict):
"""
Report failed proxy usage for temporary banning.
Parameters:
- proxy: dict, proxy configuration that failed
"""
def add_proxy(self, proxy: str):
"""
Add a new proxy to the rotation pool.
Parameters:
- proxy: str, proxy URL to add to the pool
"""
def remove_proxy(self, proxy: str):
"""
Remove a proxy from the rotation pool.
Parameters:
- proxy: str, proxy URL to remove from the pool
"""
def get_stats(self) -> dict:
"""
Get proxy usage statistics and performance metrics.
Returns:
dict: Statistics including success rates, failure counts, and ban status
"""
def _format_proxy(self, proxy: str) -> dict:
"""
Format proxy URL as a requests-compatible proxy dict.
Parameters:
- proxy: str, proxy URL
Returns:
dict: Formatted proxy configuration for requests
"""Simple proxy setup with list of proxy URLs:
# Single proxy
scraper = cloudscraper.create_scraper(
proxies={'http': 'http://proxy.example.com:8080', 'https': 'http://proxy.example.com:8080'}
)
# Proxy rotation with list of URLs
proxy_list = [
'http://user:pass@proxy1.example.com:8080',
'http://user:pass@proxy2.example.com:8080',
'http://user:pass@proxy3.example.com:8080'
]
scraper = cloudscraper.create_scraper(
rotating_proxies=proxy_list
)
# Default sequential rotation
response1 = scraper.get('https://httpbin.org/ip') # Uses proxy1
response2 = scraper.get('https://httpbin.org/ip') # Uses proxy2
response3 = scraper.get('https://httpbin.org/ip') # Uses proxy3
response4 = scraper.get('https://httpbin.org/ip') # Uses proxy1 againComprehensive proxy setup with rotation strategies and failure handling:
# Advanced proxy configuration
scraper = cloudscraper.create_scraper(
rotating_proxies=[
'http://user:pass@proxy1.example.com:8080',
'http://user:pass@proxy2.example.com:8080',
'http://user:pass@proxy3.example.com:8080',
'http://user:pass@proxy4.example.com:8080'
],
proxy_options={
'rotation_strategy': 'smart', # Intelligent rotation based on success rate
'ban_time': 600 # Ban failed proxies for 10 minutes
}
)
# Conservative proxy settings for sensitive sites
scraper = cloudscraper.create_scraper(
rotating_proxies=proxy_list,
proxy_options={
'rotation_strategy': 'random', # Random selection
'ban_time': 1800 # Ban failed proxies for 30 minutes
}
)Rotate through proxies in order, returning to the first after reaching the end:
# Sequential rotation (default)
scraper = cloudscraper.create_scraper(
rotating_proxies=[
'http://proxy1.example.com:8080',
'http://proxy2.example.com:8080',
'http://proxy3.example.com:8080'
],
proxy_options={'rotation_strategy': 'sequential'}
)
# Predictable order: proxy1 -> proxy2 -> proxy3 -> proxy1 -> ...
for i in range(6):
response = scraper.get('https://httpbin.org/ip')
print(f"Request {i+1}: {response.json()['origin']}")Randomly select from available proxies for each request:
# Random rotation
scraper = cloudscraper.create_scraper(
rotating_proxies=[
'http://proxy1.example.com:8080',
'http://proxy2.example.com:8080',
'http://proxy3.example.com:8080'
],
proxy_options={'rotation_strategy': 'random'}
)
# Unpredictable order - good for avoiding patterns
for i in range(6):
response = scraper.get('https://httpbin.org/ip')
print(f"Request {i+1}: {response.json()['origin']}")Intelligent rotation based on proxy success rates and performance metrics:
# Smart rotation (recommended)
scraper = cloudscraper.create_scraper(
rotating_proxies=[
'http://proxy1.example.com:8080', # Fast, reliable proxy
'http://proxy2.example.com:8080', # Slower proxy
'http://proxy3.example.com:8080', # Sometimes fails
],
proxy_options={
'rotation_strategy': 'smart', # Prefer better-performing proxies
'ban_time': 300 # 5 minute ban for failures
}
)
# Smart rotation learns which proxies work better
for i in range(10):
try:
response = scraper.get('https://httpbin.org/ip', timeout=10)
print(f"Request {i+1}: Success with {response.json()['origin']}")
except Exception as e:
print(f"Request {i+1}: Failed - {e}")Standard HTTP and HTTPS proxy configurations:
# HTTP proxy
http_proxies = [
'http://proxy.example.com:8080',
'http://user:password@proxy.example.com:8080'
]
# HTTPS proxy
https_proxies = [
'https://proxy.example.com:8080',
'https://user:password@proxy.example.com:8080'
]
# Both HTTP and HTTPS
mixed_proxies = [
'http://proxy1.example.com:8080',
'https://proxy2.example.com:8080'
]
scraper = cloudscraper.create_scraper(rotating_proxies=mixed_proxies)SOCKS4 and SOCKS5 proxy support:
# SOCKS proxies
socks_proxies = [
'socks4://proxy.example.com:1080',
'socks5://user:pass@proxy.example.com:1080',
'socks5://proxy.example.com:1080'
]
scraper = cloudscraper.create_scraper(rotating_proxies=socks_proxies)
# Note: Requires PySocks or requests[socks]
# pip install requests[socks]Alternative proxy specification using dictionary format:
# Dictionary format for complex proxy setups
proxy_configs = [
{
'http': 'http://user:pass@proxy1.example.com:8080',
'https': 'https://user:pass@proxy1.example.com:8080'
},
{
'http': 'socks5://proxy2.example.com:1080',
'https': 'socks5://proxy2.example.com:1080'
}
]
# Note: Pass as rotating_proxies parameter
scraper = cloudscraper.create_scraper(rotating_proxies=proxy_configs)Failed proxies are temporarily banned to avoid repeated failures:
# Configure ban behavior
scraper = cloudscraper.create_scraper(
rotating_proxies=[
'http://reliable-proxy.example.com:8080',
'http://unreliable-proxy.example.com:8080',
'http://slow-proxy.example.com:8080'
],
proxy_options={
'rotation_strategy': 'smart',
'ban_time': 900 # Ban failed proxies for 15 minutes
},
debug=True # See proxy selection and banning
)
# Automatic handling of proxy failures
for i in range(10):
try:
response = scraper.get('https://httpbin.org/delay/2', timeout=5)
print(f"Success: {response.json()['origin']}")
except Exception as e:
print(f"Failed: {e}")
# Failed proxy is automatically bannedDirect interaction with the proxy manager:
# Access proxy manager directly
scraper = cloudscraper.create_scraper(
rotating_proxies=['http://proxy1.com:8080', 'http://proxy2.com:8080']
)
# Get current proxy
current_proxy = scraper.proxy_manager.get_proxy()
print(f"Current proxy: {current_proxy}")
# Report success/failure manually
try:
response = scraper.get('https://httpbin.org/ip')
scraper.proxy_manager.report_success(current_proxy)
except Exception:
scraper.proxy_manager.report_failure(current_proxy)Monitor proxy performance and health:
def monitor_proxy_health(scraper, test_url='https://httpbin.org/ip', rounds=10):
"""Monitor proxy performance over multiple requests."""
proxy_stats = {}
for i in range(rounds):
current_proxy = scraper.proxy_manager.get_proxy()
proxy_id = str(current_proxy)
if proxy_id not in proxy_stats:
proxy_stats[proxy_id] = {'success': 0, 'failure': 0, 'total_time': 0}
start_time = time.time()
try:
response = scraper.get(test_url, timeout=10)
end_time = time.time()
if response.status_code == 200:
proxy_stats[proxy_id]['success'] += 1
proxy_stats[proxy_id]['total_time'] += (end_time - start_time)
print(f"✅ {proxy_id}: Success in {end_time - start_time:.2f}s")
else:
proxy_stats[proxy_id]['failure'] += 1
print(f"❌ {proxy_id}: HTTP {response.status_code}")
except Exception as e:
proxy_stats[proxy_id]['failure'] += 1
print(f"❌ {proxy_id}: {e}")
# Calculate statistics
for proxy_id, stats in proxy_stats.items():
total = stats['success'] + stats['failure']
success_rate = (stats['success'] / total) * 100 if total > 0 else 0
avg_time = stats['total_time'] / stats['success'] if stats['success'] > 0 else 0
print(f"\n{proxy_id}:")
print(f" Success rate: {success_rate:.1f}%")
print(f" Average response time: {avg_time:.2f}s")
return proxy_stats
# Monitor proxy health
import time
scraper = cloudscraper.create_scraper(
rotating_proxies=[
'http://proxy1.example.com:8080',
'http://proxy2.example.com:8080'
],
proxy_options={'rotation_strategy': 'smart'}
)
stats = monitor_proxy_health(scraper)Configure how proxies interact with CAPTCHA solving services:
# Forward proxy information to CAPTCHA service (recommended)
scraper = cloudscraper.create_scraper(
rotating_proxies=[
'http://user:pass@proxy1.example.com:8080',
'http://user:pass@proxy2.example.com:8080'
],
captcha={
'provider': '2captcha',
'api_key': 'your_api_key',
'no_proxy': False # Send proxy info to 2captcha for accuracy
}
)
# Don't forward proxy to CAPTCHA service (faster but less accurate)
scraper = cloudscraper.create_scraper(
rotating_proxies=proxy_list,
captcha={
'provider': '2captcha',
'api_key': 'your_api_key',
'no_proxy': True # Solve CAPTCHA without proxy
}
)Combine proxy rotation with stealth techniques:
# Proxies + stealth mode for maximum anonymity
scraper = cloudscraper.create_scraper(
rotating_proxies=[
'http://proxy1.example.com:8080',
'http://proxy2.example.com:8080',
'http://proxy3.example.com:8080'
],
proxy_options={
'rotation_strategy': 'random', # Random proxy selection
'ban_time': 600
},
enable_stealth=True,
stealth_options={
'min_delay': 2.0,
'max_delay': 6.0,
'human_like_delays': True,
'randomize_headers': True
}
)
# Each request uses different proxy + stealth techniques
for i in range(5):
response = scraper.get('https://httpbin.org/headers')
headers = response.json()['headers']
print(f"Request {i+1}:")
print(f" Origin: {response.json().get('origin', 'N/A')}")
print(f" User-Agent: {headers.get('User-Agent', '')[:50]}...")Use proxies from different geographic locations:
# Geographic proxy distribution
geo_proxies = [
'http://user:pass@us-proxy1.example.com:8080', # US East
'http://user:pass@us-proxy2.example.com:8080', # US West
'http://user:pass@eu-proxy1.example.com:8080', # Europe
'http://user:pass@asia-proxy1.example.com:8080', # Asia
]
scraper = cloudscraper.create_scraper(
rotating_proxies=geo_proxies,
proxy_options={
'rotation_strategy': 'random', # Random geographic distribution
'ban_time': 300
}
)
# Test geographic distribution
for i in range(8):
response = scraper.get('https://httpbin.org/ip')
print(f"Request {i+1}: {response.json()['origin']}")HTTP Basic authentication for proxy access:
# Username/password in URL
authenticated_proxies = [
'http://username:password@proxy1.example.com:8080',
'http://user2:pass2@proxy2.example.com:8080'
]
scraper = cloudscraper.create_scraper(rotating_proxies=authenticated_proxies)
# URL encoding for special characters in credentials
import urllib.parse
username = 'user@domain.com'
password = 'p@ssw0rd!'
encoded_user = urllib.parse.quote(username)
encoded_pass = urllib.parse.quote(password)
proxy_url = f'http://{encoded_user}:{encoded_pass}@proxy.example.com:8080'
scraper = cloudscraper.create_scraper(rotating_proxies=[proxy_url])Custom authentication methods for enterprise proxies:
# Custom authentication headers (if supported by proxy)
import requests
from requests.auth import HTTPProxyAuth
# Manual proxy configuration with custom auth
session = requests.Session()
session.proxies = {'http': 'http://proxy.example.com:8080'}
session.auth = HTTPProxyAuth('username', 'password')
scraper = cloudscraper.create_scraper(sess=session)
# Or with custom authentication headers
scraper = cloudscraper.create_scraper()
scraper.headers.update({
'Proxy-Authorization': 'Basic base64encodedcreds'
})
scraper.proxies = {'http': 'http://proxy.example.com:8080'}Handle common proxy-related problems:
# Comprehensive proxy error handling
def robust_proxy_request(scraper, url, max_retries=3):
"""Make request with proxy error handling."""
for attempt in range(max_retries):
try:
response = scraper.get(url, timeout=30)
if response.status_code == 200:
return response
else:
print(f"HTTP {response.status_code} on attempt {attempt + 1}")
except requests.exceptions.ProxyError as e:
print(f"Proxy error on attempt {attempt + 1}: {e}")
# Proxy manager will automatically try next proxy
except requests.exceptions.ConnectTimeout as e:
print(f"Connection timeout on attempt {attempt + 1}: {e}")
except requests.exceptions.ReadTimeout as e:
print(f"Read timeout on attempt {attempt + 1}: {e}")
except Exception as e:
print(f"Unexpected error on attempt {attempt + 1}: {e}")
raise Exception(f"Failed after {max_retries} attempts")
# Usage with robust error handling
scraper = cloudscraper.create_scraper(
rotating_proxies=proxy_list,
proxy_options={'rotation_strategy': 'smart', 'ban_time': 300}
)
try:
response = robust_proxy_request(scraper, 'https://httpbin.org/ip')
print(f"Success: {response.json()['origin']}")
except Exception as e:
print(f"All proxy attempts failed: {e}")Test proxy functionality before use:
def test_proxy_list(proxy_list, test_url='https://httpbin.org/ip'):
"""Test a list of proxies for functionality."""
working_proxies = []
failed_proxies = []
for proxy_url in proxy_list:
try:
test_scraper = cloudscraper.create_scraper(
proxies={'http': proxy_url, 'https': proxy_url}
)
response = test_scraper.get(test_url, timeout=10)
if response.status_code == 200:
origin_ip = response.json().get('origin', 'Unknown')
working_proxies.append((proxy_url, origin_ip))
print(f"✅ {proxy_url} -> {origin_ip}")
else:
failed_proxies.append((proxy_url, f"HTTP {response.status_code}"))
print(f"❌ {proxy_url} -> HTTP {response.status_code}")
except Exception as e:
failed_proxies.append((proxy_url, str(e)))
print(f"❌ {proxy_url} -> {e}")
return working_proxies, failed_proxies
# Test proxy list before using
proxy_list = [
'http://proxy1.example.com:8080',
'http://proxy2.example.com:8080',
'http://broken-proxy.example.com:8080'
]
working, failed = test_proxy_list(proxy_list)
print(f"\nWorking proxies: {len(working)}")
print(f"Failed proxies: {len(failed)}")
# Use only working proxies
if working:
working_proxy_urls = [proxy[0] for proxy in working]
scraper = cloudscraper.create_scraper(rotating_proxies=working_proxy_urls)Optimize proxy settings for different use cases:
# High-throughput scraping
fast_scraper = cloudscraper.create_scraper(
rotating_proxies=proxy_list,
proxy_options={
'rotation_strategy': 'random', # Distribute load
'ban_time': 60 # Quick recovery from bans
},
enable_stealth=True,
stealth_options={
'min_delay': 0.1, # Minimal delays
'max_delay': 0.5
}
)
# Cautious scraping for sensitive sites
careful_scraper = cloudscraper.create_scraper(
rotating_proxies=proxy_list,
proxy_options={
'rotation_strategy': 'smart', # Use best-performing proxies
'ban_time': 1800 # Long ban time for failures
},
enable_stealth=True,
stealth_options={
'min_delay': 5.0, # Conservative delays
'max_delay': 15.0
}
)Install with Tessl CLI
npx tessl i tessl/pypi-cloudscraper