tessl/pypi-geopy

Python Geocoding Toolbox providing comprehensive geocoding services and geodesic distance calculations

—

Pending

Overview

Eval results

Files

Rate Limiting

Name: tessl/pypi-geopy
Author: tessl

Geopy provides built-in rate limiting functionality to manage API quotas and prevent service abuse with configurable delays, retry logic, and error handling for both synchronous and asynchronous operations.

Capabilities

Synchronous Rate Limiter

Rate limiting wrapper for synchronous geocoding functions.

from geopy.extra.rate_limiter import RateLimiter

class RateLimiter:
    """
    Rate limiting wrapper for synchronous geocoding functions.
    Automatically handles delays between requests and retries on errors.
    """
    
    def __init__(self, func, min_delay_seconds=0.0, max_retries=2,
                 error_wait_seconds=5.0, swallow_exceptions=True,
                 return_value_on_exception=None):
        """
        Initialize rate limiter.
        
        Parameters:
        - func: Function to wrap (e.g., geolocator.geocode)
        - min_delay_seconds (float): Minimum delay between calls
        - max_retries (int): Number of retry attempts on errors
        - error_wait_seconds (float): Wait time after recoverable errors
        - swallow_exceptions (bool): Whether to suppress final exceptions
        - return_value_on_exception: Return value when exceptions are swallowed
        """
    
    def __call__(self, *args, **kwargs):
        """
        Execute rate-limited function call.
        
        Returns:
        Function result or return_value_on_exception on failure
        """

Asynchronous Rate Limiter

Rate limiting wrapper for asynchronous geocoding functions.

from geopy.extra.rate_limiter import AsyncRateLimiter

class AsyncRateLimiter:
    """
    Rate limiting wrapper for asynchronous geocoding functions.
    Same functionality as RateLimiter but for async/await functions.
    """
    
    def __init__(self, func, min_delay_seconds=0.0, max_retries=2,
                 error_wait_seconds=5.0, swallow_exceptions=True,
                 return_value_on_exception=None):
        """
        Initialize async rate limiter.
        
        Parameters: Same as RateLimiter but for async functions
        """
    
    async def __call__(self, *args, **kwargs):
        """
        Execute rate-limited async function call.
        
        Returns:
        Function result or return_value_on_exception on failure
        """

Usage Examples

Basic Rate Limiting

from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

# Initialize geocoder
geolocator = Nominatim(user_agent="rate_limited_app")

# Create rate-limited geocoder with 1-second minimum delay
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1.0)

# Use like normal geocoder but with automatic rate limiting
addresses = [
    "New York, NY",
    "Los Angeles, CA", 
    "Chicago, IL",
    "Houston, TX",
    "Phoenix, AZ"
]

results = []
for address in addresses:
    # Automatically waits 1 second between calls
    location = geocode(address)
    results.append((address, location))
    if location:
        print(f"✓ {address} -> {location.address}")
    else:
        print(f"○ {address} -> No results")

Advanced Rate Limiting Configuration

from geopy.geocoders import GoogleV3
from geopy.extra.rate_limiter import RateLimiter
from geopy.exc import GeocoderServiceError

# Initialize geocoder (requires API key)
geolocator = GoogleV3(api_key="your_api_key")

# Configure rate limiter with advanced options
geocode = RateLimiter(
    geolocator.geocode,
    min_delay_seconds=0.5,          # Half-second minimum delay
    max_retries=5,                  # Retry up to 5 times
    error_wait_seconds=10.0,        # Wait 10 seconds after errors
    swallow_exceptions=False,       # Don't suppress exceptions
    return_value_on_exception=None  # Return None on failure
)

# Test with potentially problematic addresses
test_addresses = [
    "1600 Amphitheatre Parkway, Mountain View, CA",  # Valid
    "Invalid Address 123456789",                     # Invalid
    "Times Square, New York, NY",                    # Valid
    "",                                              # Empty
    "Valid Street Address, San Francisco, CA"       # Valid
]

for address in test_addresses:
    try:
        location = geocode(address)
        if location:
            print(f"✓ Found: {address} -> {location.address}")
        else:
            print(f"○ No results: {address}")
    except Exception as e:
        print(f"✗ Error: {address} -> {e}")

Rate Limiting with Custom Retry Logic

from geopy.geocoders import Bing
from geopy.extra.rate_limiter import RateLimiter
from geopy.exc import GeocoderRateLimited, GeocoderServiceError
import time

class SmartRateLimiter:
    """Rate limiter with intelligent backoff strategy"""
    
    def __init__(self, func, base_delay=1.0, max_delay=60.0, backoff_factor=2.0):
        self.func = func
        self.base_delay = base_delay
        self.max_delay = max_delay
        self.backoff_factor = backoff_factor
        self.last_call_time = 0
        self.current_delay = base_delay
    
    def __call__(self, *args, **kwargs):
        """Execute with smart rate limiting"""
        max_retries = 5
        
        for attempt in range(max_retries):
            # Ensure minimum delay since last call
            time_since_last = time.time() - self.last_call_time
            if time_since_last < self.current_delay:
                sleep_time = self.current_delay - time_since_last
                print(f"Rate limiting: waiting {sleep_time:.1f}s")
                time.sleep(sleep_time)
            
            try:
                self.last_call_time = time.time()
                result = self.func(*args, **kwargs)
                
                # Success - reduce delay
                self.current_delay = max(
                    self.base_delay, 
                    self.current_delay / self.backoff_factor
                )
                return result
                
            except GeocoderRateLimited as e:
                if attempt == max_retries - 1:
                    raise
                
                # Increase delay on rate limiting
                self.current_delay = min(
                    self.max_delay,
                    self.current_delay * self.backoff_factor
                )
                
                wait_time = e.retry_after if e.retry_after else self.current_delay
                print(f"Rate limited, waiting {wait_time}s (attempt {attempt + 1})")
                time.sleep(wait_time)
                
            except GeocoderServiceError as e:
                if attempt == max_retries - 1:
                    raise
                
                print(f"Service error, retrying in {self.current_delay}s")
                time.sleep(self.current_delay)
        
        raise Exception("Max retries exceeded")

# Usage
geolocator = Bing(api_key="your_api_key")
smart_geocode = SmartRateLimiter(geolocator.geocode, base_delay=0.5)

addresses = ["New York", "London", "Tokyo", "Sydney", "Berlin"] * 3  # Test many

for i, address in enumerate(addresses):
    try:
        result = smart_geocode(address)
        print(f"{i+1:2d}. {address} -> {'Found' if result else 'Not found'}")
    except Exception as e:
        print(f"{i+1:2d}. {address} -> Error: {e}")

Async Rate Limiting

import asyncio
from geopy.geocoders import Nominatim
from geopy.adapters import AioHTTPAdapter
from geopy.extra.rate_limiter import AsyncRateLimiter

async def async_rate_limiting_example():
    """Async rate limiting example"""
    
    async with Nominatim(
        user_agent="async_rate_limited_app",
        adapter_factory=AioHTTPAdapter
    ) as geolocator:
        
        # Create async rate limiter
        geocode = AsyncRateLimiter(
            geolocator.geocode,
            min_delay_seconds=1.0,      # 1-second delay between calls
            max_retries=3,              # Retry up to 3 times
            error_wait_seconds=5.0      # Wait 5 seconds after errors
        )
        
        addresses = [
            "San Francisco, CA",
            "Seattle, WA", 
            "Portland, OR",
            "Denver, CO",
            "Austin, TX"
        ]
        
        # Sequential processing with rate limiting
        # Note: Even though we await each call, the rate limiter
        # ensures proper delays between requests
        results = []
        for address in addresses:
            result = await geocode(address)
            results.append((address, result))
            if result:
                print(f"✓ {address} -> {result.address}")
            else:
                print(f"○ {address} -> No results")
        
        return results

# Run async example
asyncio.run(async_rate_limiting_example())

Batch Processing with Rate Limiting

from geopy.geocoders import OpenCage
from geopy.extra.rate_limiter import RateLimiter
import time
import csv

class BatchGeocoder:
    """Batch geocoder with rate limiting and progress tracking"""
    
    def __init__(self, geocoder, requests_per_second=1.0, max_retries=3):
        self.delay = 1.0 / requests_per_second
        self.geocode = RateLimiter(
            geocoder.geocode,
            min_delay_seconds=self.delay,
            max_retries=max_retries,
            error_wait_seconds=5.0,
            swallow_exceptions=True,
            return_value_on_exception=None
        )
        self.stats = {
            'processed': 0,
            'successful': 0,
            'no_results': 0,
            'errors': 0
        }
    
    def process_batch(self, addresses, progress_callback=None):
        """Process batch of addresses with rate limiting"""
        results = []
        start_time = time.time()
        
        for i, address in enumerate(addresses):
            try:
                result = self.geocode(address)
                
                if result:
                    self.stats['successful'] += 1
                    results.append({
                        'input': address,
                        'status': 'success',
                        'address': result.address,
                        'latitude': result.latitude,
                        'longitude': result.longitude
                    })
                else:
                    self.stats['no_results'] += 1
                    results.append({
                        'input': address,
                        'status': 'no_results',
                        'address': None,
                        'latitude': None,
                        'longitude': None
                    })
                
            except Exception as e:
                self.stats['errors'] += 1
                results.append({
                    'input': address,
                    'status': 'error',
                    'error': str(e),
                    'address': None,
                    'latitude': None,
                    'longitude': None
                })
            
            self.stats['processed'] += 1
            
            # Progress callback
            if progress_callback and (i + 1) % 10 == 0:
                elapsed = time.time() - start_time
                progress_callback(i + 1, len(addresses), elapsed)
        
        return results
    
    def print_stats(self):
        """Print processing statistics"""
        total = self.stats['processed']
        if total == 0:
            return
        
        print(f"\nBatch Processing Statistics:")
        print(f"Total processed: {total}")
        print(f"Successful: {self.stats['successful']} ({self.stats['successful']/total*100:.1f}%)")
        print(f"No results: {self.stats['no_results']} ({self.stats['no_results']/total*100:.1f}%)")
        print(f"Errors: {self.stats['errors']} ({self.stats['errors']/total*100:.1f}%)")

def progress_callback(current, total, elapsed):
    """Progress callback function"""
    rate = current / elapsed if elapsed > 0 else 0
    remaining = (total - current) / rate if rate > 0 else 0
    print(f"Progress: {current}/{total} ({current/total*100:.1f}%) - "
          f"{rate:.2f} req/s - ETA: {remaining:.0f}s")

# Example usage
addresses = [
    "1600 Amphitheatre Parkway, Mountain View, CA",
    "1 Apple Park Way, Cupertino, CA",
    "350 Fifth Avenue, New York, NY", 
    "Times Square, New York, NY",
    "Golden Gate Bridge, San Francisco, CA",
    "Space Needle, Seattle, WA",
    "Willis Tower, Chicago, IL",
    "Hollywood Sign, Los Angeles, CA",
    "Mount Rushmore, South Dakota",
    "Statue of Liberty, New York, NY"
] * 2  # Duplicate for larger test

# Initialize batch geocoder (requires API key for OpenCage)
# geolocator = OpenCage(api_key="your_api_key")
geolocator = Nominatim(user_agent="batch_app")  # Using free service for example

batch_geocoder = BatchGeocoder(
    geolocator, 
    requests_per_second=0.5,  # Conservative rate for free service
    max_retries=3
)

# Process batch
print(f"Starting batch processing of {len(addresses)} addresses...")
results = batch_geocoder.process_batch(addresses, progress_callback)

# Print statistics
batch_geocoder.print_stats()

# Save results to CSV
with open('batch_results.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=['input', 'status', 'address', 'latitude', 'longitude', 'error'])
    writer.writeheader()
    writer.writerows(results)

print("Results saved to batch_results.csv")

Rate Limiting with Multiple Services

from geopy.geocoders import Nominatim, Photon
from geopy.extra.rate_limiter import RateLimiter
import time

class MultiServiceGeocoder:
    """Geocoder that uses multiple services with individual rate limiting"""
    
    def __init__(self):
        # Initialize services with different rate limits
        self.services = {
            'nominatim': {
                'geocoder': Nominatim(user_agent="multi_service_app"),
                'rate_limiter': RateLimiter(
                    None,  # Will be set below
                    min_delay_seconds=1.0,  # Nominatim's required delay
                    max_retries=2
                ),
                'name': 'OpenStreetMap Nominatim'
            },
            'photon': {
                'geocoder': Photon(user_agent="multi_service_app"),
                'rate_limiter': RateLimiter(
                    None,  # Will be set below  
                    min_delay_seconds=0.1,  # Photon is more permissive
                    max_retries=2
                ),
                'name': 'Photon'
            }
        }
        
        # Set the actual functions to rate limit
        for service_key, service_data in self.services.items():
            service_data['rate_limiter'].func = service_data['geocoder'].geocode
    
    def geocode_with_fallback(self, query, preferred_service='nominatim'):
        """Geocode with service fallback"""
        
        # Try preferred service first
        if preferred_service in self.services:
            service_data = self.services[preferred_service]
            try:
                print(f"Trying {service_data['name']}...")
                result = service_data['rate_limiter'](query)
                if result:
                    print(f"✓ Success with {service_data['name']}")
                    return result, preferred_service
                else:
                    print(f"○ No results from {service_data['name']}")
            except Exception as e:
                print(f"✗ Error with {service_data['name']}: {e}")
        
        # Try other services
        for service_key, service_data in self.services.items():
            if service_key == preferred_service:
                continue  # Already tried
            
            try:
                print(f"Trying {service_data['name']}...")
                result = service_data['rate_limiter'](query)
                if result:
                    print(f"✓ Success with {service_data['name']}")
                    return result, service_key
                else:
                    print(f"○ No results from {service_data['name']}")
            except Exception as e:
                print(f"✗ Error with {service_data['name']}: {e}")
        
        return None, None
    
    def batch_geocode(self, addresses, preferred_service='nominatim'):
        """Batch geocode with service fallback"""
        results = []
        
        for i, address in enumerate(addresses):
            print(f"\n{i+1}/{len(addresses)}: Processing '{address}'")
            
            result, used_service = self.geocode_with_fallback(
                address, 
                preferred_service
            )
            
            results.append({
                'address': address,
                'result': result,
                'service_used': used_service,
                'success': result is not None
            })
            
            # Brief pause between addresses to be respectful
            time.sleep(0.5)
        
        return results

# Example usage
multi_geocoder = MultiServiceGeocoder()

test_addresses = [
    "Paris, France",
    "Tokyo, Japan", 
    "New York City, USA",
    "Invalid Address 123456",
    "London, UK"
]

print("Starting multi-service batch geocoding...")
results = multi_geocoder.batch_geocode(test_addresses, preferred_service='nominatim')

# Analyze results
print(f"\n=== Results Summary ===")
successful = sum(1 for r in results if r['success'])
print(f"Success rate: {successful}/{len(results)} ({successful/len(results)*100:.1f}%)")

service_usage = {}
for result in results:
    if result['service_used']:
        service_usage[result['service_used']] = service_usage.get(result['service_used'], 0) + 1

print("Service usage:")
for service, count in service_usage.items():
    service_name = multi_geocoder.services[service]['name']
    print(f"  {service_name}: {count}")

Custom Rate Limiting Strategies

from geopy.geocoders import Nominatim
import time
import random

class AdaptiveRateLimiter:
    """Rate limiter that adapts based on success/failure patterns"""
    
    def __init__(self, func, initial_delay=1.0, min_delay=0.1, max_delay=10.0):
        self.func = func
        self.current_delay = initial_delay
        self.min_delay = min_delay
        self.max_delay = max_delay
        self.last_call_time = 0
        
        # Success/failure tracking
        self.recent_results = []  # Track last 10 results
        self.success_threshold = 0.8  # 80% success rate target
    
    def __call__(self, *args, **kwargs):
        """Execute with adaptive rate limiting"""
        # Wait for current delay
        time_since_last = time.time() - self.last_call_time
        if time_since_last < self.current_delay:
            sleep_time = self.current_delay - time_since_last
            time.sleep(sleep_time)
        
        # Make the call
        self.last_call_time = time.time()
        
        try:
            result = self.func(*args, **kwargs)
            self._record_result(True, result is not None)
            return result
            
        except Exception as e:
            self._record_result(False, False)
            raise
    
    def _record_result(self, call_successful, has_result):
        """Record result and adapt delay"""
        # Track overall success (call didn't fail + got result)
        overall_success = call_successful and has_result
        
        # Keep only recent results
        self.recent_results.append(overall_success)
        if len(self.recent_results) > 10:
            self.recent_results.pop(0)
        
        # Adapt delay based on success rate
        if len(self.recent_results) >= 5:  # Need some data
            success_rate = sum(self.recent_results) / len(self.recent_results)
            
            if success_rate >= self.success_threshold:
                # Good success rate - can speed up
                self.current_delay = max(
                    self.min_delay,
                    self.current_delay * 0.9
                )
            else:
                # Poor success rate - slow down
                self.current_delay = min(
                    self.max_delay,
                    self.current_delay * 1.5
                )
        
        # Add some jitter to avoid thundering herd
        jitter = random.uniform(0.8, 1.2)
        self.current_delay *= jitter
        self.current_delay = max(self.min_delay, 
                                min(self.max_delay, self.current_delay))

# Example usage
geolocator = Nominatim(user_agent="adaptive_app")
adaptive_geocode = AdaptiveRateLimiter(geolocator.geocode, initial_delay=1.0)

# Mix of good and bad addresses to test adaptation
test_addresses = [
    "New York, NY",           # Good
    "Los Angeles, CA",        # Good
    "Invalid123456",          # Bad
    "Chicago, IL",            # Good
    "BadAddress!!!",          # Bad  
    "Houston, TX",            # Good
    "Phoenix, AZ",            # Good
    "Philadelphia, PA",       # Good
    "Another Bad Address",    # Bad
    "San Antonio, TX",        # Good
]

print("Testing adaptive rate limiting...")
for i, address in enumerate(test_addresses):
    try:
        start_time = time.time()
        result = adaptive_geocode(address)
        elapsed = time.time() - start_time
        
        status = "✓ Found" if result else "○ No results"
        print(f"{i+1:2d}. {address:<20} -> {status} "
              f"(delay: {adaptive_geocode.current_delay:.2f}s, "
              f"total: {elapsed:.2f}s)")
        
    except Exception as e:
        print(f"{i+1:2d}. {address:<20} -> ✗ Error: {e} "
              f"(delay: {adaptive_geocode.current_delay:.2f}s)")

print(f"\nFinal delay: {adaptive_geocode.current_delay:.2f}s")
if adaptive_geocode.recent_results:
    success_rate = sum(adaptive_geocode.recent_results) / len(adaptive_geocode.recent_results)
    print(f"Recent success rate: {success_rate:.1%}")

Install with Tessl CLI