tessl/pypi-cloudscraper

Enhanced Python module to bypass Cloudflare's anti-bot page with support for v1, v2, v3 challenges, Turnstile, proxy rotation, and stealth mode.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

User Agent and Browser Emulation

Name: tessl/pypi-cloudscraper
Author: tessl

Comprehensive browser fingerprinting and user agent management with support for multiple browsers, platforms, and device types. CloudScraper includes automatic fallback for executable environments and extensive browser database support.

Capabilities

User_Agent Class

Main class for managing user agent strings, HTTP headers, and TLS cipher suites to emulate different browsers and devices.

class User_Agent:
    def __init__(self, browser=None, **kwargs):
        """
        Initialize user agent manager.
        
        Parameters:
        - browser: str|dict, browser configuration
        - **kwargs: additional configuration options
        """

    def loadUserAgent(self, **kwargs):
        """
        Load user agent configuration from browsers database.
        
        Parameters:
        - browser: str|dict, browser type and options
        - platform: str, target platform (windows, linux, darwin, android, ios)
        - desktop: bool, include desktop user agents (default: True)
        - mobile: bool, include mobile user agents (default: True)
        - custom: str, custom user agent string
        - allow_brotli: bool, enable Brotli compression
        """

    def filterAgents(self, user_agents: dict) -> dict:
        """
        Filter user agents based on platform and device preferences.
        
        Parameters:
        - user_agents: dict, complete user agents database
        
        Returns:
        dict: Filtered user agents matching criteria
        """

    def tryMatchCustom(self, user_agents: dict) -> bool:
        """
        Try to match custom user agent string with known browsers.
        
        Parameters:
        - user_agents: dict, user agents database
        
        Returns:
        bool: True if custom user agent was matched and configured
        """

    @property
    def headers(self) -> dict:
        """HTTP headers including User-Agent for the selected browser."""

    @property
    def cipherSuite(self) -> list:
        """TLS cipher suite configuration for the selected browser."""
    
    # Configuration attributes (set during initialization)
    browser: str         # Selected browser name
    platform: str        # Selected platform name
    desktop: bool        # Whether desktop user agents are enabled
    mobile: bool         # Whether mobile user agents are enabled
    custom: str          # Custom user agent string (if provided)
    platforms: list      # Available platforms ['linux', 'windows', 'darwin', 'android', 'ios']
    browsers: list       # Available browsers ['chrome', 'firefox']

Basic Browser Selection

Simple browser configuration for common use cases:

# Default browser (random selection)
scraper = cloudscraper.create_scraper()

# Specific browser
scraper = cloudscraper.create_scraper(browser='chrome')
scraper = cloudscraper.create_scraper(browser='firefox')

# Browser with platform
scraper = cloudscraper.create_scraper(
    browser={
        'browser': 'chrome',
        'platform': 'windows'
    }
)

# Check selected user agent
print(scraper.headers['User-Agent'])
# Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36

Advanced Browser Configuration

Comprehensive browser fingerprinting with detailed device and platform options:

# Desktop-only Chrome on Windows
desktop_scraper = cloudscraper.create_scraper(
    browser={
        'browser': 'chrome',
        'platform': 'windows', 
        'desktop': True,
        'mobile': False
    }
)

# Mobile-only Firefox on Android
mobile_scraper = cloudscraper.create_scraper(
    browser={
        'browser': 'firefox',
        'platform': 'android',
        'desktop': False,
        'mobile': True
    }
)

# Any Firefox on macOS (mobile or desktop)
mac_scraper = cloudscraper.create_scraper(
    browser={
        'browser': 'firefox',
        'platform': 'darwin',  # macOS
        'desktop': True,
        'mobile': True
    }
)

Browser Types and Platforms

Supported Browsers

CloudScraper supports comprehensive browser emulation:

supported_browsers = ['chrome', 'firefox']

# Chrome variants across platforms
chrome_configs = [
    {'browser': 'chrome', 'platform': 'windows'},
    {'browser': 'chrome', 'platform': 'linux'},
    {'browser': 'chrome', 'platform': 'darwin'},   # macOS
    {'browser': 'chrome', 'platform': 'android'},
    {'browser': 'chrome', 'platform': 'ios'}
]

# Firefox variants
firefox_configs = [
    {'browser': 'firefox', 'platform': 'windows'},
    {'browser': 'firefox', 'platform': 'linux'},
    {'browser': 'firefox', 'platform': 'darwin'},
    {'browser': 'firefox', 'platform': 'android'},
    {'browser': 'firefox', 'platform': 'ios'}
]

# Test different browser configurations
for config in chrome_configs:
    scraper = cloudscraper.create_scraper(browser=config)
    print(f"{config['browser']} on {config['platform']}: {scraper.headers['User-Agent'][:50]}...")

Platform-Specific Configuration

Target specific operating systems and devices:

platforms = ['windows', 'linux', 'darwin', 'android', 'ios']

# Windows-specific browser emulation
windows_scraper = cloudscraper.create_scraper(
    browser={
        'browser': 'chrome',
        'platform': 'windows',
        'desktop': True,
        'mobile': False
    }
)

# Linux server environment
linux_scraper = cloudscraper.create_scraper(
    browser={
        'browser': 'firefox', 
        'platform': 'linux',
        'desktop': True,
        'mobile': False
    }
)

# iOS mobile device
ios_scraper = cloudscraper.create_scraper(
    browser={
        'browser': 'chrome',  # Chrome on iOS
        'platform': 'ios',
        'desktop': False,
        'mobile': True
    }
)

# Android device
android_scraper = cloudscraper.create_scraper(
    browser={
        'browser': 'chrome',
        'platform': 'android',
        'desktop': False,
        'mobile': True
    }
)

Device Type Selection

Control desktop vs mobile device emulation:

# Desktop only (no mobile user agents)
desktop_only = cloudscraper.create_scraper(
    browser={
        'browser': 'chrome',
        'desktop': True,
        'mobile': False
    }
)

# Mobile only (no desktop user agents)
mobile_only = cloudscraper.create_scraper(
    browser={
        'browser': 'chrome',
        'desktop': False,
        'mobile': True
    }
)

# Both desktop and mobile (default)
mixed_devices = cloudscraper.create_scraper(
    browser={
        'browser': 'chrome',
        'desktop': True,   # Default
        'mobile': True     # Default
    }
)

# Error: cannot disable both
try:
    invalid_scraper = cloudscraper.create_scraper(
        browser={
            'desktop': False,
            'mobile': False
        }
    )
except RuntimeError as e:
    print(f"Error: {e}")
    # "Sorry you can't have mobile and desktop disabled at the same time."

Custom User Agents

Custom User Agent Strings

Use custom user agent strings with automatic browser matching:

# Custom user agent with automatic browser detection
custom_ua = "MyBot/1.0 (compatible; MSIE 9.0; Windows NT 6.1)"
scraper = cloudscraper.create_scraper(
    browser={'custom': custom_ua}
)

print(scraper.headers['User-Agent'])  # Uses custom user agent
print(scraper.cipherSuite)            # Automatically selected cipher suite

# Custom user agent with Chrome characteristics
chrome_custom = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) MyApp/1.0 Chrome/120.0.0.0 Safari/537.36"
scraper = cloudscraper.create_scraper(
    browser={'custom': chrome_custom}
)

# If custom UA matches known browser, uses browser's headers and ciphers
# Otherwise, uses generic headers and ciphers

Fallback Behavior for Custom User Agents

Handle custom user agents that don't match known browsers:

# Custom user agent that doesn't match any known browser
unknown_ua = "CustomScraper/1.0"
scraper = cloudscraper.create_scraper(
    browser={'custom': unknown_ua}
)

# Uses generic headers and cipher suite
print("Headers:")
for key, value in scraper.headers.items():
    print(f"  {key}: {value}")

print(f"\nCipher Suite: {scraper.cipherSuite}")

# Expected output uses fallback configuration:
# User-Agent: CustomScraper/1.0
# Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
# Accept-Language: en-US,en;q=0.9
# Accept-Encoding: gzip, deflate, br

Browser Headers and Fingerprinting

HTTP Headers by Browser

Different browsers send different HTTP headers:

# Compare headers between browsers
browsers = ['chrome', 'firefox']

for browser in browsers:
    scraper = cloudscraper.create_scraper(browser=browser)
    print(f"\n{browser.upper()} Headers:")
    for key, value in scraper.headers.items():
        print(f"  {key}: {value}")

# Chrome typical headers:
# User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36...
# Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
# Accept-Language: en-US,en;q=0.9
# Accept-Encoding: gzip, deflate, br

# Firefox typical headers:
# User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0
# Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
# Accept-Language: en-US,en;q=0.5
# Accept-Encoding: gzip, deflate, br

TLS Cipher Suites

Browser-specific TLS cipher suite configurations:

# Compare cipher suites between browsers
browsers = ['chrome', 'firefox']

for browser in browsers:
    scraper = cloudscraper.create_scraper(browser=browser)
    print(f"\n{browser.upper()} Cipher Suite:")
    print("  " + "\n  ".join(scraper.cipherSuite[:5]))  # First 5 ciphers
    print(f"  ... and {len(scraper.cipherSuite) - 5} more")

# Chrome uses different cipher preferences than Firefox
# This helps CloudScraper appear as the correct browser type

Browser Fingerprint Consistency

Maintain consistent browser fingerprints across requests:

# Consistent browser fingerprint
scraper = cloudscraper.create_scraper(
    browser={
        'browser': 'chrome',
        'platform': 'windows',
        'desktop': True,
        'mobile': False
    }
)

# All requests use the same user agent and TLS fingerprint
for i in range(5):
    response = scraper.get('https://httpbin.org/headers')
    headers = response.json()['headers']
    print(f"Request {i+1}: {headers['User-Agent'][:30]}...")
    # Same user agent every time for consistency

User Agent Rotation

Manual User Agent Rotation

Change user agents between sessions:

def create_scraper_with_random_browser():
    """Create scraper with randomly selected browser configuration."""
    import random
    
    browsers = ['chrome', 'firefox']
    platforms = ['windows', 'linux', 'darwin']
    
    config = {
        'browser': random.choice(browsers),
        'platform': random.choice(platforms),
        'desktop': True,
        'mobile': random.choice([True, False])
    }
    
    scraper = cloudscraper.create_scraper(browser=config)
    print(f"Created scraper: {config['browser']} on {config['platform']}")
    print(f"User-Agent: {scraper.headers['User-Agent'][:50]}...")
    
    return scraper

# Create multiple scrapers with different fingerprints
scrapers = []
for i in range(3):
    scraper = create_scraper_with_random_browser()
    scrapers.append(scraper)
    print()

User Agent Refresh

Refresh user agent during long-running sessions:

# User agent refresh capability
scraper = cloudscraper.create_scraper(browser='chrome')
print(f"Initial UA: {scraper.headers['User-Agent'][:50]}...")

# Refresh user agent (generates new one)
scraper.user_agent.loadUserAgent(browser='firefox')
scraper.headers = scraper.user_agent.headers

print(f"Refreshed UA: {scraper.headers['User-Agent'][:50]}...")

# Or create new scraper with different browser
old_cookies = scraper.cookies  # Preserve session cookies
scraper = cloudscraper.create_scraper(browser='firefox')
scraper.cookies.update(old_cookies)  # Restore cookies

Executable Environment Support

PyInstaller and Frozen Applications

CloudScraper includes comprehensive fallback for executable environments:

# Automatic detection and fallback for executables
import sys

if getattr(sys, 'frozen', False):
    print("Running in executable environment")
    # CloudScraper automatically uses built-in fallback user agents
    scraper = cloudscraper.create_scraper()
else:
    print("Running in normal Python environment")
    # CloudScraper uses full browsers.json database
    scraper = cloudscraper.create_scraper()

# Works in both environments with appropriate fallbacks
response = scraper.get('https://httpbin.org/headers')
print(f"User-Agent: {response.json()['headers']['User-Agent'][:50]}...")

Built-in Fallback User Agents

Comprehensive hardcoded user agents for when browsers.json is unavailable:

# Fallback user agents are automatically used when needed
# Covers major browsers and platforms:

fallback_browsers = {
    'chrome_windows': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    'chrome_linux': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    'chrome_mac': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    'firefox_windows': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0",
    'firefox_linux': "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0",
    'chrome_android': "Mozilla/5.0 (Linux; Android 10; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36",
    'chrome_ios': "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/120.0.0.0 Mobile/15E148 Safari/604.1"
}

# These are automatically used when browsers.json is not available

Manual Executable Configuration

Manually configure for executable environments:

# Force fallback mode for testing
import os

# Temporarily hide browsers.json to test fallback
browsers_json_path = 'cloudscraper/user_agent/browsers.json'
if os.path.exists(browsers_json_path):
    os.rename(browsers_json_path, browsers_json_path + '.backup')

try:
    # This will use fallback user agents
    scraper = cloudscraper.create_scraper(browser='chrome')
    print(f"Fallback UA: {scraper.headers['User-Agent']}")
    
finally:
    # Restore browsers.json
    if os.path.exists(browsers_json_path + '.backup'):
        os.rename(browsers_json_path + '.backup', browsers_json_path)

Compression and Encoding

Brotli Compression Support

Configure Brotli compression support based on browser capabilities:

# Enable Brotli (default for browsers that support it)
scraper = cloudscraper.create_scraper(
    browser='chrome',
    allow_brotli=True  # Default
)
print(scraper.headers['Accept-Encoding'])
# "gzip, deflate, br" (includes Brotli)

# Disable Brotli
scraper = cloudscraper.create_scraper(
    browser='chrome',
    allow_brotli=False
)
print(scraper.headers['Accept-Encoding'])
# "gzip, deflate" (no Brotli)

# Brotli support varies by browser and version
firefox_scraper = cloudscraper.create_scraper(
    browser='firefox',
    allow_brotli=True
)
print(firefox_scraper.headers['Accept-Encoding'])

Content Encoding Handling

Automatic handling of compressed responses:

# CloudScraper automatically handles compressed responses
scraper = cloudscraper.create_scraper(allow_brotli=True)

# Request compressed content
response = scraper.get('https://httpbin.org/gzip')
print(f"Content-Encoding: {response.headers.get('Content-Encoding', 'none')}")
print(f"Content length: {len(response.text)} characters")

# Brotli compression (if supported)
response = scraper.get('https://httpbin.org/brotli')
print(f"Content-Encoding: {response.headers.get('Content-Encoding', 'none')}")
print(f"Decompressed: {len(response.text)} characters")

User Agent Debugging and Testing

User Agent Information

Inspect user agent configuration and selection:

# Detailed user agent information
scraper = cloudscraper.create_scraper(
    browser={
        'browser': 'chrome',
        'platform': 'windows',
        'desktop': True,
        'mobile': False
    }
)

print("User Agent Configuration:")
print(f"  Browser: {scraper.user_agent.browser}")
print(f"  Platform: {scraper.user_agent.platform}")
print(f"  Desktop: {scraper.user_agent.desktop}")
print(f"  Mobile: {scraper.user_agent.mobile}")

print(f"\nSelected User-Agent:")
print(f"  {scraper.headers['User-Agent']}")

print(f"\nAll Headers:")
for key, value in scraper.headers.items():
    print(f"  {key}: {value}")

print(f"\nCipher Suite ({len(scraper.cipherSuite)} ciphers):")
for i, cipher in enumerate(scraper.cipherSuite[:3]):
    print(f"  {i+1}. {cipher}")
print(f"  ... and {len(scraper.cipherSuite) - 3} more")

User Agent Testing

Test user agent selection and variation:

def test_user_agent_variety(configurations, requests_per_config=3):
    """Test user agent variety across different configurations."""
    results = {}
    
    for config_name, config in configurations.items():
        print(f"\nTesting {config_name}:")
        user_agents = set()
        
        # Test multiple instances to see variety
        for i in range(requests_per_config):
            scraper = cloudscraper.create_scraper(browser=config)
            ua = scraper.headers['User-Agent']
            user_agents.add(ua)
            print(f"  {i+1}: {ua[:60]}...")
        
        results[config_name] = {
            'unique_user_agents': len(user_agents),
            'user_agents': list(user_agents)
        }
        print(f"  Unique UAs: {len(user_agents)}")
    
    return results

# Test different configurations
test_configs = {
    'chrome_any': {'browser': 'chrome'},
    'firefox_any': {'browser': 'firefox'},
    'chrome_windows': {'browser': 'chrome', 'platform': 'windows'},
    'chrome_mobile': {'browser': 'chrome', 'desktop': False, 'mobile': True},
    'random_browser': None  # Uses random selection
}

results = test_user_agent_variety(test_configs)

# Analyze variety
for config, result in results.items():
    if result['unique_user_agents'] > 1:
        print(f"\n{config} provides good variety ({result['unique_user_agents']} unique UAs)")
    else:
        print(f"\n{config} provides consistent fingerprint (1 unique UA)")

Browser Fingerprint Validation

Validate that browser fingerprints are realistic:

# Test browser fingerprint against real browser detection
def validate_browser_fingerprint(scraper, test_url='https://httpbin.org/headers'):
    """Validate browser fingerprint components."""
    response = scraper.get(test_url)
    headers = response.json()['headers']
    
    ua = headers.get('User-Agent', '')
    accept = headers.get('Accept', '')
    accept_encoding = headers.get('Accept-Encoding', '')
    accept_language = headers.get('Accept-Language', '')
    
    print("Browser Fingerprint Validation:")
    print(f"  User-Agent: {ua[:50]}...")
    print(f"  Accept: {accept}")
    print(f"  Accept-Encoding: {accept_encoding}")
    print(f"  Accept-Language: {accept_language}")
    
    # Basic validation checks
    checks = {
        'has_user_agent': bool(ua),
        'has_accept': bool(accept),
        'has_accept_encoding': bool(accept_encoding),
        'chrome_like': 'Chrome' in ua and 'Safari' in ua,
        'firefox_like': 'Firefox' in ua and 'Gecko' in ua,
        'supports_modern_encoding': 'gzip' in accept_encoding,
        'supports_brotli': 'br' in accept_encoding
    }
    
    print(f"\nValidation Results:")
    for check, result in checks.items():
        status = "✅" if result else "❌"
        print(f"  {status} {check}")
    
    return checks

# Validate different browser configurations  
browsers = ['chrome', 'firefox']
for browser in browsers:
    print(f"\n{'='*50}")
    print(f"Validating {browser.upper()} fingerprint")
    print('='*50)
    
    scraper = cloudscraper.create_scraper(browser=browser)
    validate_browser_fingerprint(scraper)

Install with Tessl CLI