CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-ddgs

A metasearch library that aggregates results from diverse web search services

Pending
Overview
Eval results
Files

config-utils.mddocs/

Configuration and Utilities

Configuration options, proxy support, utility functions, and result processing capabilities. This module provides the foundational configuration and utility functionality that supports the DDGS search operations.

Package Version

__version__ = "9.5.5"

Access the package version:

from ddgs import __version__
print(__version__)  # "9.5.5"

Capabilities

DDGS Configuration

Main configuration class for initializing DDGS with proxy, timeout, and SSL verification settings.

class DDGS:
    threads: int | None = None
    _executor: ThreadPoolExecutor | None = None
    
    def __init__(
        self,
        proxy: str | None = None,
        timeout: int | None = 5,
        verify: bool = True
    ):
        """
        Initialize DDGS search coordinator.
        
        Parameters:
        - proxy: Proxy URL for requests. Supports:
          - HTTP proxies: "http://proxy.example.com:8080"
          - SOCKS proxies: "socks5://127.0.0.1:9050"
          - Tor Browser shortcut: "tb" (expands to "socks5h://127.0.0.1:9150")
          - Environment variable: Uses DDGS_PROXY if proxy=None
        - timeout: Request timeout in seconds (default: 5)
        - verify: SSL certificate verification (default: True)
        """
    
    def __enter__(self) -> 'DDGS':
        """Context manager entry point."""
    
    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
        """Context manager exit point."""
    
    @classmethod
    def get_executor(cls) -> ThreadPoolExecutor:
        """
        Get cached ThreadPoolExecutor for concurrent searches.
        
        Returns:
        ThreadPoolExecutor instance configured with cls.threads workers
        """

Usage Examples:

# Basic configuration
with DDGS() as ddgs:
    results = ddgs.text("query")

# Custom timeout and proxy
with DDGS(proxy="socks5://127.0.0.1:9050", timeout=15) as ddgs:
    results = ddgs.text("query")

# Tor Browser proxy shortcut
with DDGS(proxy="tb", verify=False) as ddgs:
    results = ddgs.text("sensitive query")

# Environment-based proxy configuration
import os
os.environ['DDGS_PROXY'] = 'http://proxy.example.com:8080'
with DDGS() as ddgs:  # Uses environment proxy
    results = ddgs.text("query")

# Configure global thread pool
DDGS.threads = 20  # Use 20 threads for concurrent searches
with DDGS() as ddgs:
    results = ddgs.text("query", backend="all")  # Uses more threads

JSON Utilities

Utility functions for JSON serialization and deserialization with optimized performance.

def json_dumps(obj: Any) -> str:
    """
    Serialize object to JSON string.
    
    Uses orjson library if available for better performance,
    falls back to standard json library.
    
    Parameters:
    - obj: Object to serialize
    
    Returns:
    JSON string representation
    """

def json_loads(obj: str | bytes) -> Any:
    """
    Deserialize JSON string to Python object.
    
    Uses orjson library if available for better performance,
    falls back to standard json library.
    
    Parameters:
    - obj: JSON string or bytes to deserialize
    
    Returns:
    Deserialized Python object
    """

Usage Examples:

from ddgs.utils import json_dumps, json_loads

# Serialize search results
results = ddgs.text("query")
json_string = json_dumps(results)

# Deserialize JSON data
data = json_loads(json_string)

# Use with file operations
with open('results.json', 'w') as f:
    f.write(json_dumps(results))

with open('results.json', 'r') as f:
    loaded_results = json_loads(f.read())

Proxy Configuration

Advanced proxy configuration options and utilities.

def _expand_proxy_tb_alias(proxy: str | None) -> str | None:
    """
    Expand Tor Browser proxy alias to full SOCKS URL.
    
    Parameters:
    - proxy: Proxy string, may contain "tb" alias
    
    Returns:
    Expanded proxy URL or None
    
    Example:
    "tb" -> "socks5h://127.0.0.1:9150"
    """

Proxy Support:

# Built-in proxy configurations
proxies = {
    # Tor Browser default
    "tb": "socks5h://127.0.0.1:9150",
    
    # Common SOCKS proxy
    "socks5://127.0.0.1:9050": "socks5://127.0.0.1:9050",
    
    # HTTP proxy
    "http://proxy.company.com:8080": "http://proxy.company.com:8080",
    
    # HTTPS proxy  
    "https://secure-proxy.com:443": "https://secure-proxy.com:443"
}

# Environment variable support
import os
os.environ['DDGS_PROXY'] = 'socks5://127.0.0.1:9050'

# Proxy authentication (if supported by proxy server)
authenticated_proxy = "http://username:password@proxy.example.com:8080"

Threading Configuration

Control concurrent search execution with thread pool configuration.

# Class-level thread configuration
DDGS.threads = 10  # Set global thread pool size

# Access thread pool executor
executor = DDGS.get_executor()

# Manual thread pool management
from concurrent.futures import ThreadPoolExecutor

# Custom executor for advanced use cases
with ThreadPoolExecutor(max_workers=15) as custom_executor:
    # Use custom executor for specific operations
    pass

Threading Examples:

# Configure for high-performance searching
DDGS.threads = 25  # More threads for concurrent searches

with DDGS(timeout=30) as ddgs:
    # Perform multiple searches concurrently
    text_results = ddgs.text("AI research", backend="all", max_results=100)
    # Uses thread pool for concurrent engine queries

# Conservative configuration for limited resources
DDGS.threads = 5

with DDGS(timeout=10) as ddgs:
    results = ddgs.text("query", backend="google,bing")

Environment Variables

Environment variable support for default configuration.

# Set in shell environment
export DDGS_PROXY="socks5://127.0.0.1:9050"
export DDGS_TIMEOUT="15"
export DDGS_VERIFY="false"
# Use environment variables
import os

# Proxy from environment
os.environ['DDGS_PROXY'] = 'http://proxy.example.com:8080'

# DDGS automatically uses environment proxy if proxy=None
with DDGS() as ddgs:
    results = ddgs.text("query")  # Uses environment proxy

# Override environment with explicit parameters
with DDGS(proxy="tb", timeout=20) as ddgs:
    results = ddgs.text("query")  # Uses "tb" proxy, ignores environment

Performance Optimization

Configuration options for optimizing search performance.

# High-performance configuration
DDGS.threads = 20  # More concurrent threads

with DDGS(
    timeout=30,        # Longer timeout for complex searches
    verify=True        # Keep SSL verification for security
) as ddgs:
    # Use all available backends for maximum coverage
    results = ddgs.text(
        "comprehensive query",
        backend="all",
        max_results=200
    )

# Memory-efficient configuration for limited resources  
DDGS.threads = 3   # Fewer threads

with DDGS(timeout=10) as ddgs:
    # Use specific backend to reduce resource usage
    results = ddgs.text(
        "focused query",
        backend="wikipedia,google",
        max_results=20
    )

SSL and Security Configuration

SSL certificate verification and security settings.

# Disable SSL verification (not recommended for production)
with DDGS(verify=False) as ddgs:
    results = ddgs.text("query")

# Enable SSL verification with custom timeout
with DDGS(verify=True, timeout=20) as ddgs:
    results = ddgs.text("query")

# Secure configuration with Tor
with DDGS(proxy="tb", verify=True, timeout=30) as ddgs:
    results = ddgs.text("sensitive query")

Result Processing Utilities

Internal utilities for processing and normalizing search results.

# Text normalization utilities (internal)
def _normalize_url(url: str) -> str:
    """Normalize URL format for consistent results."""

def _normalize_text(raw: str) -> str:
    """Clean and normalize text content from HTML."""
    
def _normalize_date(date: int | str) -> str:
    """Normalize date formats to consistent string representation."""

Error Configuration

Configure error handling behavior and retry logic.

# Timeout configuration affects exception behavior
short_timeout_ddgs = DDGS(timeout=5)   # More likely to raise TimeoutException
long_timeout_ddgs = DDGS(timeout=30)   # Less likely to timeout

# Verify configuration affects SSL errors
secure_ddgs = DDGS(verify=True)    # Will raise SSL errors for invalid certs
insecure_ddgs = DDGS(verify=False) # Ignores SSL certificate issues

Configuration Best Practices

Production Configuration

# Recommended production settings
with DDGS(
    proxy=None,        # Use environment variable or None
    timeout=15,        # Reasonable timeout
    verify=True        # Always verify SSL in production
) as ddgs:
    results = ddgs.text("query", backend="auto")

Development Configuration

# Development/testing settings
with DDGS(
    proxy="tb",        # Use Tor for privacy during testing
    timeout=30,        # Longer timeout for debugging
    verify=False       # May disable for local testing
) as ddgs:
    results = ddgs.text("test query", backend="all")

High-Volume Configuration

# Settings for high-volume search applications
DDGS.threads = 30

with DDGS(
    timeout=20,
    verify=True
) as ddgs:
    # Batch processing with rate limiting
    for query in query_list:
        results = ddgs.text(query, max_results=50)
        time.sleep(1)  # Rate limiting

Resource-Constrained Configuration

# Minimal resource usage
DDGS.threads = 2

with DDGS(timeout=8) as ddgs:
    results = ddgs.text(
        "query", 
        backend="wikipedia",  # Single, reliable backend
        max_results=10
    )

Install with Tessl CLI

npx tessl i tessl/pypi-ddgs

docs

cli.md

config-utils.md

core-search.md

exceptions.md

index.md

tile.json