Enhanced Python module to bypass Cloudflare's anti-bot page with support for v1, v2, v3 challenges, Turnstile, proxy rotation, and stealth mode.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Advanced techniques for avoiding detection including human-like behavior simulation, request timing randomization, header manipulation, and browser fingerprint rotation. Stealth mode helps CloudScraper appear more like a regular browser.
Main class implementing anti-detection techniques and human-like behavior simulation to reduce the likelihood of being detected as an automated client.
class StealthMode:
def __init__(self, cloudscraper):
"""
Initialize stealth mode for a CloudScraper instance.
Parameters:
- cloudscraper: CloudScraper instance to apply stealth techniques to
"""
def apply_stealth_techniques(self, method: str, url: str, **kwargs):
"""
Apply stealth modifications to request parameters.
Parameters:
- method: str, HTTP method (GET, POST, etc.)
- url: str, target URL
- **kwargs: request parameters to modify
Returns:
dict: Modified request parameters with stealth techniques applied
"""
def set_delay_range(self, min_delay: float, max_delay: float):
"""
Configure random delay range between requests.
Parameters:
- min_delay: float, minimum delay in seconds
- max_delay: float, maximum delay in seconds
"""
def enable_human_like_delays(self, enabled: bool):
"""
Enable or disable human-like delay patterns.
Parameters:
- enabled: bool, whether to use human-like delays
"""
def enable_randomize_headers(self, enabled: bool):
"""
Enable or disable header randomization.
Parameters:
- enabled: bool, whether to randomize headers
"""
def enable_browser_quirks(self, enabled: bool):
"""
Enable or disable browser-specific behavioral quirks.
Parameters:
- enabled: bool, whether to apply browser quirks
"""
def _apply_human_like_delay(self):
"""
Apply random delay to mimic human behavior between requests.
Internal method called automatically during request processing.
"""
def _randomize_headers(self, kwargs):
"""
Apply header randomization to avoid fingerprinting.
Parameters:
- kwargs: dict, request parameters to modify headers in
"""
def _apply_browser_quirks(self, kwargs):
"""
Apply browser-specific behavioral quirks to request parameters.
Parameters:
- kwargs: dict, request parameters to modify with browser quirks
"""Enable stealth mode with default settings for general anti-detection:
# Enable stealth mode with defaults
scraper = cloudscraper.create_scraper(enable_stealth=True)
# Stealth mode disabled
scraper = cloudscraper.create_scraper(enable_stealth=False)
# Basic stealth configuration
scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'min_delay': 1.0,
'max_delay': 3.0,
'human_like_delays': True,
'randomize_headers': True,
'browser_quirks': True
}
)Comprehensive stealth setup with fine-tuned parameters for maximum anti-detection effectiveness:
# Maximum stealth configuration
scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'min_delay': 2.0, # Minimum 2 seconds between requests
'max_delay': 8.0, # Maximum 8 seconds between requests
'human_like_delays': True, # Use realistic human timing patterns
'randomize_headers': True, # Randomize headers to avoid fingerprinting
'browser_quirks': True # Apply browser-specific behaviors
},
browser={
'browser': 'chrome', # Consistent browser fingerprint
'platform': 'windows', # Consistent platform
'mobile': False # Desktop only
}
)
# Conservative stealth for sensitive sites
scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'min_delay': 5.0, # Slower, more human-like
'max_delay': 15.0, # Long maximum delays
'human_like_delays': True,
'randomize_headers': True,
'browser_quirks': True
}
)Implement human-like timing patterns to avoid detection based on request frequency:
# Configure delay patterns
scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'min_delay': 2.0,
'max_delay': 6.0,
'human_like_delays': True # Realistic timing patterns
}
)
# Manual delay configuration
scraper.stealth_mode.set_delay_range(1.5, 4.5)
scraper.stealth_mode.enable_human_like_delays(True)
# Different timing for different scenarios
fast_scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={'min_delay': 0.5, 'max_delay': 2.0}
)
slow_scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={'min_delay': 5.0, 'max_delay': 20.0}
)# Stealth timing in action
scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={'min_delay': 2.0, 'max_delay': 5.0},
debug=True # See timing information
)
# Each request will have random delays
response1 = scraper.get('https://example.com/page1') # Waits 2-5 seconds before
response2 = scraper.get('https://example.com/page2') # Waits 2-5 seconds before
response3 = scraper.get('https://example.com/page3') # Waits 2-5 seconds beforeRandomize HTTP headers to avoid consistent fingerprinting patterns:
# Enable header randomization
scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'randomize_headers': True # Randomize order and values
}
)
# Selective header randomization
scraper.stealth_mode.enable_randomize_headers(True)
# View applied headers with debug mode
scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={'randomize_headers': True},
debug=True
)
response = scraper.get('https://httpbin.org/headers')
print(response.json()) # See randomized headersApply browser-specific behavioral patterns to mimic real browser usage:
# Enable browser quirks
scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'browser_quirks': True # Apply browser-specific behaviors
},
browser={
'browser': 'chrome', # Target specific browser quirks
'platform': 'windows'
}
)
# Browser-specific configurations
chrome_quirks = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={'browser_quirks': True},
browser={'browser': 'chrome', 'platform': 'windows'}
)
firefox_quirks = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={'browser_quirks': True},
browser={'browser': 'firefox', 'platform': 'linux'}
)Configure automatic session refresh to maintain session health and avoid detection:
# Session refresh configuration
scraper = cloudscraper.create_scraper(
enable_stealth=True,
session_refresh_interval=3600, # Refresh every hour
auto_refresh_on_403=True, # Auto-refresh on 403 errors
max_403_retries=3 # Maximum retry attempts
)
# Conservative session management
scraper = cloudscraper.create_scraper(
enable_stealth=True,
session_refresh_interval=1800, # Refresh every 30 minutes
auto_refresh_on_403=True,
max_403_retries=5
)
# Aggressive session refresh for problematic sites
scraper = cloudscraper.create_scraper(
enable_stealth=True,
session_refresh_interval=900, # Refresh every 15 minutes
auto_refresh_on_403=True,
max_403_retries=10
)Control request frequency to avoid triggering rate limits:
# Request throttling configuration
scraper = cloudscraper.create_scraper(
enable_stealth=True,
min_request_interval=1.0, # Minimum 1 second between requests
max_concurrent_requests=1, # Only 1 request at a time
rotate_tls_ciphers=True # Rotate TLS fingerprint
)
# More aggressive throttling
scraper = cloudscraper.create_scraper(
enable_stealth=True,
min_request_interval=3.0, # Minimum 3 seconds between requests
max_concurrent_requests=1,
rotate_tls_ciphers=True
)Rotate TLS cipher suites to avoid consistent cryptographic fingerprinting:
# Enable TLS cipher rotation
scraper = cloudscraper.create_scraper(
enable_stealth=True,
rotate_tls_ciphers=True, # Rotate cipher suites
debug=True # See cipher rotation
)
# Manual cipher configuration
scraper = cloudscraper.create_scraper(
enable_stealth=True,
cipherSuite='TLS_AES_256_GCM_SHA384:TLS_AES_128_GCM_SHA256',
rotate_tls_ciphers=True
)Combine stealth mode with user agent rotation for enhanced anti-detection:
# User agent rotation with stealth
scraper = cloudscraper.create_scraper(
enable_stealth=True,
browser={
'browser': 'chrome', # Base browser type
'platform': 'windows', # Base platform
'mobile': False, # Device type
'desktop': True
},
stealth_options={
'randomize_headers': True, # This affects user agent selection
'browser_quirks': True
}
)
# Different user agents for different sessions
browsers = ['chrome', 'firefox']
for browser in browsers:
scraper = cloudscraper.create_scraper(
enable_stealth=True,
browser={'browser': browser, 'platform': 'windows'},
stealth_options={'randomize_headers': True}
)
response = scraper.get('https://httpbin.org/user-agent')
print(f"{browser}: {response.json()['user-agent']}")Implement realistic browsing patterns to avoid detection:
def realistic_browsing_session(scraper, base_url, pages):
"""Simulate realistic browsing behavior."""
import random
import time
# Start with homepage
response = scraper.get(base_url)
print(f"Visited homepage: {response.status_code}")
# Random delay like reading the page
time.sleep(random.uniform(5, 15))
# Visit pages in random order with realistic delays
random.shuffle(pages)
for page in pages:
# Random delay between page visits
time.sleep(random.uniform(2, 8))
url = f"{base_url.rstrip('/')}/{page.lstrip('/')}"
response = scraper.get(url)
print(f"Visited {page}: {response.status_code}")
# Occasional longer delays (like reading content)
if random.random() < 0.3: # 30% chance
time.sleep(random.uniform(10, 30))
# Use with stealth scraper
scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'min_delay': 1.0,
'max_delay': 4.0,
'human_like_delays': True
}
)
pages = ['/about', '/products', '/contact', '/blog']
realistic_browsing_session(scraper, 'https://example.com', pages)Monitor stealth techniques with debug logging:
scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'min_delay': 2.0,
'max_delay': 5.0,
'human_like_delays': True,
'randomize_headers': True,
'browser_quirks': True
},
debug=True
)
response = scraper.get('https://example.com')
# Debug output shows:
# "⏱️ Request throttling: sleeping 1.2s"
# "🔐 Rotated TLS cipher suite (rotation #3)"
# "🎭 Applied stealth header randomization"
# "🚶 Human-like delay: 3.4s"Test stealth effectiveness against detection systems:
def test_stealth_effectiveness(urls, stealth_configs):
"""Test different stealth configurations."""
results = {}
for config_name, config in stealth_configs.items():
print(f"\nTesting {config_name}...")
scraper = cloudscraper.create_scraper(**config)
config_results = {}
for url in urls:
try:
response = scraper.get(url, timeout=30)
config_results[url] = {
'status': response.status_code,
'success': 200 <= response.status_code < 400,
'blocked': response.status_code in [403, 429, 503]
}
print(f" {url}: {response.status_code}")
except Exception as e:
config_results[url] = {
'status': None,
'success': False,
'error': str(e)
}
print(f" {url}: Error - {e}")
results[config_name] = config_results
return results
# Test configurations
stealth_configs = {
'no_stealth': {
'enable_stealth': False
},
'basic_stealth': {
'enable_stealth': True
},
'maximum_stealth': {
'enable_stealth': True,
'stealth_options': {
'min_delay': 3.0,
'max_delay': 8.0,
'human_like_delays': True,
'randomize_headers': True,
'browser_quirks': True
},
'session_refresh_interval': 1800,
'rotate_tls_ciphers': True
}
}
test_urls = [
'https://example.com',
'https://httpbin.org/headers',
'https://protected-site.com'
]
results = test_stealth_effectiveness(test_urls, stealth_configs)Configure stealth settings based on target site characteristics:
# High-security financial sites
financial_scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'min_delay': 5.0,
'max_delay': 15.0,
'human_like_delays': True,
'randomize_headers': True,
'browser_quirks': True
},
session_refresh_interval=900, # 15 minutes
rotate_tls_ciphers=True
)
# E-commerce sites
ecommerce_scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'min_delay': 2.0,
'max_delay': 6.0,
'human_like_delays': True,
'randomize_headers': True
},
session_refresh_interval=3600 # 1 hour
)
# News/content sites
content_scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'min_delay': 1.0,
'max_delay': 3.0,
'human_like_delays': True
}
)Balance stealth effectiveness with performance requirements:
# Maximum performance (minimal stealth)
fast_scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'min_delay': 0.1,
'max_delay': 0.5,
'human_like_delays': False,
'randomize_headers': False
}
)
# Balanced performance and stealth
balanced_scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'min_delay': 1.0,
'max_delay': 3.0,
'human_like_delays': True,
'randomize_headers': True
}
)
# Maximum stealth (slower performance)
stealth_scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'min_delay': 5.0,
'max_delay': 20.0,
'human_like_delays': True,
'randomize_headers': True,
'browser_quirks': True
},
rotate_tls_ciphers=True
)Handle detection-related errors with stealth mode:
try:
scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={'min_delay': 2.0, 'max_delay': 5.0}
)
response = scraper.get('https://protected-site.com')
except cloudscraper.CloudflareLoopProtection:
print("Multiple challenges detected - increase stealth settings")
# Retry with more aggressive stealth
scraper = cloudscraper.create_scraper(
enable_stealth=True,
stealth_options={
'min_delay': 10.0,
'max_delay': 30.0,
'human_like_delays': True,
'randomize_headers': True,
'browser_quirks': True
}
)
response = scraper.get('https://protected-site.com')
except Exception as e:
if '403' in str(e) or '429' in str(e):
print("Access denied - consider more aggressive stealth settings")
raiseInstall with Tessl CLI
npx tessl i tessl/pypi-cloudscraper