Python Geocoding Toolbox providing comprehensive geocoding services and geodesic distance calculations
—
Geopy provides built-in rate limiting functionality to manage API quotas and prevent service abuse with configurable delays, retry logic, and error handling for both synchronous and asynchronous operations.
Rate limiting wrapper for synchronous geocoding functions.
from geopy.extra.rate_limiter import RateLimiter
class RateLimiter:
"""
Rate limiting wrapper for synchronous geocoding functions.
Automatically handles delays between requests and retries on errors.
"""
def __init__(self, func, min_delay_seconds=0.0, max_retries=2,
error_wait_seconds=5.0, swallow_exceptions=True,
return_value_on_exception=None):
"""
Initialize rate limiter.
Parameters:
- func: Function to wrap (e.g., geolocator.geocode)
- min_delay_seconds (float): Minimum delay between calls
- max_retries (int): Number of retry attempts on errors
- error_wait_seconds (float): Wait time after recoverable errors
- swallow_exceptions (bool): Whether to suppress final exceptions
- return_value_on_exception: Return value when exceptions are swallowed
"""
def __call__(self, *args, **kwargs):
"""
Execute rate-limited function call.
Returns:
Function result or return_value_on_exception on failure
"""Rate limiting wrapper for asynchronous geocoding functions.
from geopy.extra.rate_limiter import AsyncRateLimiter
class AsyncRateLimiter:
"""
Rate limiting wrapper for asynchronous geocoding functions.
Same functionality as RateLimiter but for async/await functions.
"""
def __init__(self, func, min_delay_seconds=0.0, max_retries=2,
error_wait_seconds=5.0, swallow_exceptions=True,
return_value_on_exception=None):
"""
Initialize async rate limiter.
Parameters: Same as RateLimiter but for async functions
"""
async def __call__(self, *args, **kwargs):
"""
Execute rate-limited async function call.
Returns:
Function result or return_value_on_exception on failure
"""from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
# Initialize geocoder
geolocator = Nominatim(user_agent="rate_limited_app")
# Create rate-limited geocoder with 1-second minimum delay
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1.0)
# Use like normal geocoder but with automatic rate limiting
addresses = [
"New York, NY",
"Los Angeles, CA",
"Chicago, IL",
"Houston, TX",
"Phoenix, AZ"
]
results = []
for address in addresses:
# Automatically waits 1 second between calls
location = geocode(address)
results.append((address, location))
if location:
print(f"✓ {address} -> {location.address}")
else:
print(f"○ {address} -> No results")from geopy.geocoders import GoogleV3
from geopy.extra.rate_limiter import RateLimiter
from geopy.exc import GeocoderServiceError
# Initialize geocoder (requires API key)
geolocator = GoogleV3(api_key="your_api_key")
# Configure rate limiter with advanced options
geocode = RateLimiter(
geolocator.geocode,
min_delay_seconds=0.5, # Half-second minimum delay
max_retries=5, # Retry up to 5 times
error_wait_seconds=10.0, # Wait 10 seconds after errors
swallow_exceptions=False, # Don't suppress exceptions
return_value_on_exception=None # Return None on failure
)
# Test with potentially problematic addresses
test_addresses = [
"1600 Amphitheatre Parkway, Mountain View, CA", # Valid
"Invalid Address 123456789", # Invalid
"Times Square, New York, NY", # Valid
"", # Empty
"Valid Street Address, San Francisco, CA" # Valid
]
for address in test_addresses:
try:
location = geocode(address)
if location:
print(f"✓ Found: {address} -> {location.address}")
else:
print(f"○ No results: {address}")
except Exception as e:
print(f"✗ Error: {address} -> {e}")from geopy.geocoders import Bing
from geopy.extra.rate_limiter import RateLimiter
from geopy.exc import GeocoderRateLimited, GeocoderServiceError
import time
class SmartRateLimiter:
"""Rate limiter with intelligent backoff strategy"""
def __init__(self, func, base_delay=1.0, max_delay=60.0, backoff_factor=2.0):
self.func = func
self.base_delay = base_delay
self.max_delay = max_delay
self.backoff_factor = backoff_factor
self.last_call_time = 0
self.current_delay = base_delay
def __call__(self, *args, **kwargs):
"""Execute with smart rate limiting"""
max_retries = 5
for attempt in range(max_retries):
# Ensure minimum delay since last call
time_since_last = time.time() - self.last_call_time
if time_since_last < self.current_delay:
sleep_time = self.current_delay - time_since_last
print(f"Rate limiting: waiting {sleep_time:.1f}s")
time.sleep(sleep_time)
try:
self.last_call_time = time.time()
result = self.func(*args, **kwargs)
# Success - reduce delay
self.current_delay = max(
self.base_delay,
self.current_delay / self.backoff_factor
)
return result
except GeocoderRateLimited as e:
if attempt == max_retries - 1:
raise
# Increase delay on rate limiting
self.current_delay = min(
self.max_delay,
self.current_delay * self.backoff_factor
)
wait_time = e.retry_after if e.retry_after else self.current_delay
print(f"Rate limited, waiting {wait_time}s (attempt {attempt + 1})")
time.sleep(wait_time)
except GeocoderServiceError as e:
if attempt == max_retries - 1:
raise
print(f"Service error, retrying in {self.current_delay}s")
time.sleep(self.current_delay)
raise Exception("Max retries exceeded")
# Usage
geolocator = Bing(api_key="your_api_key")
smart_geocode = SmartRateLimiter(geolocator.geocode, base_delay=0.5)
addresses = ["New York", "London", "Tokyo", "Sydney", "Berlin"] * 3 # Test many
for i, address in enumerate(addresses):
try:
result = smart_geocode(address)
print(f"{i+1:2d}. {address} -> {'Found' if result else 'Not found'}")
except Exception as e:
print(f"{i+1:2d}. {address} -> Error: {e}")import asyncio
from geopy.geocoders import Nominatim
from geopy.adapters import AioHTTPAdapter
from geopy.extra.rate_limiter import AsyncRateLimiter
async def async_rate_limiting_example():
"""Async rate limiting example"""
async with Nominatim(
user_agent="async_rate_limited_app",
adapter_factory=AioHTTPAdapter
) as geolocator:
# Create async rate limiter
geocode = AsyncRateLimiter(
geolocator.geocode,
min_delay_seconds=1.0, # 1-second delay between calls
max_retries=3, # Retry up to 3 times
error_wait_seconds=5.0 # Wait 5 seconds after errors
)
addresses = [
"San Francisco, CA",
"Seattle, WA",
"Portland, OR",
"Denver, CO",
"Austin, TX"
]
# Sequential processing with rate limiting
# Note: Even though we await each call, the rate limiter
# ensures proper delays between requests
results = []
for address in addresses:
result = await geocode(address)
results.append((address, result))
if result:
print(f"✓ {address} -> {result.address}")
else:
print(f"○ {address} -> No results")
return results
# Run async example
asyncio.run(async_rate_limiting_example())from geopy.geocoders import OpenCage
from geopy.extra.rate_limiter import RateLimiter
import time
import csv
class BatchGeocoder:
"""Batch geocoder with rate limiting and progress tracking"""
def __init__(self, geocoder, requests_per_second=1.0, max_retries=3):
self.delay = 1.0 / requests_per_second
self.geocode = RateLimiter(
geocoder.geocode,
min_delay_seconds=self.delay,
max_retries=max_retries,
error_wait_seconds=5.0,
swallow_exceptions=True,
return_value_on_exception=None
)
self.stats = {
'processed': 0,
'successful': 0,
'no_results': 0,
'errors': 0
}
def process_batch(self, addresses, progress_callback=None):
"""Process batch of addresses with rate limiting"""
results = []
start_time = time.time()
for i, address in enumerate(addresses):
try:
result = self.geocode(address)
if result:
self.stats['successful'] += 1
results.append({
'input': address,
'status': 'success',
'address': result.address,
'latitude': result.latitude,
'longitude': result.longitude
})
else:
self.stats['no_results'] += 1
results.append({
'input': address,
'status': 'no_results',
'address': None,
'latitude': None,
'longitude': None
})
except Exception as e:
self.stats['errors'] += 1
results.append({
'input': address,
'status': 'error',
'error': str(e),
'address': None,
'latitude': None,
'longitude': None
})
self.stats['processed'] += 1
# Progress callback
if progress_callback and (i + 1) % 10 == 0:
elapsed = time.time() - start_time
progress_callback(i + 1, len(addresses), elapsed)
return results
def print_stats(self):
"""Print processing statistics"""
total = self.stats['processed']
if total == 0:
return
print(f"\nBatch Processing Statistics:")
print(f"Total processed: {total}")
print(f"Successful: {self.stats['successful']} ({self.stats['successful']/total*100:.1f}%)")
print(f"No results: {self.stats['no_results']} ({self.stats['no_results']/total*100:.1f}%)")
print(f"Errors: {self.stats['errors']} ({self.stats['errors']/total*100:.1f}%)")
def progress_callback(current, total, elapsed):
"""Progress callback function"""
rate = current / elapsed if elapsed > 0 else 0
remaining = (total - current) / rate if rate > 0 else 0
print(f"Progress: {current}/{total} ({current/total*100:.1f}%) - "
f"{rate:.2f} req/s - ETA: {remaining:.0f}s")
# Example usage
addresses = [
"1600 Amphitheatre Parkway, Mountain View, CA",
"1 Apple Park Way, Cupertino, CA",
"350 Fifth Avenue, New York, NY",
"Times Square, New York, NY",
"Golden Gate Bridge, San Francisco, CA",
"Space Needle, Seattle, WA",
"Willis Tower, Chicago, IL",
"Hollywood Sign, Los Angeles, CA",
"Mount Rushmore, South Dakota",
"Statue of Liberty, New York, NY"
] * 2 # Duplicate for larger test
# Initialize batch geocoder (requires API key for OpenCage)
# geolocator = OpenCage(api_key="your_api_key")
geolocator = Nominatim(user_agent="batch_app") # Using free service for example
batch_geocoder = BatchGeocoder(
geolocator,
requests_per_second=0.5, # Conservative rate for free service
max_retries=3
)
# Process batch
print(f"Starting batch processing of {len(addresses)} addresses...")
results = batch_geocoder.process_batch(addresses, progress_callback)
# Print statistics
batch_geocoder.print_stats()
# Save results to CSV
with open('batch_results.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=['input', 'status', 'address', 'latitude', 'longitude', 'error'])
writer.writeheader()
writer.writerows(results)
print("Results saved to batch_results.csv")from geopy.geocoders import Nominatim, Photon
from geopy.extra.rate_limiter import RateLimiter
import time
class MultiServiceGeocoder:
"""Geocoder that uses multiple services with individual rate limiting"""
def __init__(self):
# Initialize services with different rate limits
self.services = {
'nominatim': {
'geocoder': Nominatim(user_agent="multi_service_app"),
'rate_limiter': RateLimiter(
None, # Will be set below
min_delay_seconds=1.0, # Nominatim's required delay
max_retries=2
),
'name': 'OpenStreetMap Nominatim'
},
'photon': {
'geocoder': Photon(user_agent="multi_service_app"),
'rate_limiter': RateLimiter(
None, # Will be set below
min_delay_seconds=0.1, # Photon is more permissive
max_retries=2
),
'name': 'Photon'
}
}
# Set the actual functions to rate limit
for service_key, service_data in self.services.items():
service_data['rate_limiter'].func = service_data['geocoder'].geocode
def geocode_with_fallback(self, query, preferred_service='nominatim'):
"""Geocode with service fallback"""
# Try preferred service first
if preferred_service in self.services:
service_data = self.services[preferred_service]
try:
print(f"Trying {service_data['name']}...")
result = service_data['rate_limiter'](query)
if result:
print(f"✓ Success with {service_data['name']}")
return result, preferred_service
else:
print(f"○ No results from {service_data['name']}")
except Exception as e:
print(f"✗ Error with {service_data['name']}: {e}")
# Try other services
for service_key, service_data in self.services.items():
if service_key == preferred_service:
continue # Already tried
try:
print(f"Trying {service_data['name']}...")
result = service_data['rate_limiter'](query)
if result:
print(f"✓ Success with {service_data['name']}")
return result, service_key
else:
print(f"○ No results from {service_data['name']}")
except Exception as e:
print(f"✗ Error with {service_data['name']}: {e}")
return None, None
def batch_geocode(self, addresses, preferred_service='nominatim'):
"""Batch geocode with service fallback"""
results = []
for i, address in enumerate(addresses):
print(f"\n{i+1}/{len(addresses)}: Processing '{address}'")
result, used_service = self.geocode_with_fallback(
address,
preferred_service
)
results.append({
'address': address,
'result': result,
'service_used': used_service,
'success': result is not None
})
# Brief pause between addresses to be respectful
time.sleep(0.5)
return results
# Example usage
multi_geocoder = MultiServiceGeocoder()
test_addresses = [
"Paris, France",
"Tokyo, Japan",
"New York City, USA",
"Invalid Address 123456",
"London, UK"
]
print("Starting multi-service batch geocoding...")
results = multi_geocoder.batch_geocode(test_addresses, preferred_service='nominatim')
# Analyze results
print(f"\n=== Results Summary ===")
successful = sum(1 for r in results if r['success'])
print(f"Success rate: {successful}/{len(results)} ({successful/len(results)*100:.1f}%)")
service_usage = {}
for result in results:
if result['service_used']:
service_usage[result['service_used']] = service_usage.get(result['service_used'], 0) + 1
print("Service usage:")
for service, count in service_usage.items():
service_name = multi_geocoder.services[service]['name']
print(f" {service_name}: {count}")from geopy.geocoders import Nominatim
import time
import random
class AdaptiveRateLimiter:
"""Rate limiter that adapts based on success/failure patterns"""
def __init__(self, func, initial_delay=1.0, min_delay=0.1, max_delay=10.0):
self.func = func
self.current_delay = initial_delay
self.min_delay = min_delay
self.max_delay = max_delay
self.last_call_time = 0
# Success/failure tracking
self.recent_results = [] # Track last 10 results
self.success_threshold = 0.8 # 80% success rate target
def __call__(self, *args, **kwargs):
"""Execute with adaptive rate limiting"""
# Wait for current delay
time_since_last = time.time() - self.last_call_time
if time_since_last < self.current_delay:
sleep_time = self.current_delay - time_since_last
time.sleep(sleep_time)
# Make the call
self.last_call_time = time.time()
try:
result = self.func(*args, **kwargs)
self._record_result(True, result is not None)
return result
except Exception as e:
self._record_result(False, False)
raise
def _record_result(self, call_successful, has_result):
"""Record result and adapt delay"""
# Track overall success (call didn't fail + got result)
overall_success = call_successful and has_result
# Keep only recent results
self.recent_results.append(overall_success)
if len(self.recent_results) > 10:
self.recent_results.pop(0)
# Adapt delay based on success rate
if len(self.recent_results) >= 5: # Need some data
success_rate = sum(self.recent_results) / len(self.recent_results)
if success_rate >= self.success_threshold:
# Good success rate - can speed up
self.current_delay = max(
self.min_delay,
self.current_delay * 0.9
)
else:
# Poor success rate - slow down
self.current_delay = min(
self.max_delay,
self.current_delay * 1.5
)
# Add some jitter to avoid thundering herd
jitter = random.uniform(0.8, 1.2)
self.current_delay *= jitter
self.current_delay = max(self.min_delay,
min(self.max_delay, self.current_delay))
# Example usage
geolocator = Nominatim(user_agent="adaptive_app")
adaptive_geocode = AdaptiveRateLimiter(geolocator.geocode, initial_delay=1.0)
# Mix of good and bad addresses to test adaptation
test_addresses = [
"New York, NY", # Good
"Los Angeles, CA", # Good
"Invalid123456", # Bad
"Chicago, IL", # Good
"BadAddress!!!", # Bad
"Houston, TX", # Good
"Phoenix, AZ", # Good
"Philadelphia, PA", # Good
"Another Bad Address", # Bad
"San Antonio, TX", # Good
]
print("Testing adaptive rate limiting...")
for i, address in enumerate(test_addresses):
try:
start_time = time.time()
result = adaptive_geocode(address)
elapsed = time.time() - start_time
status = "✓ Found" if result else "○ No results"
print(f"{i+1:2d}. {address:<20} -> {status} "
f"(delay: {adaptive_geocode.current_delay:.2f}s, "
f"total: {elapsed:.2f}s)")
except Exception as e:
print(f"{i+1:2d}. {address:<20} -> ✗ Error: {e} "
f"(delay: {adaptive_geocode.current_delay:.2f}s)")
print(f"\nFinal delay: {adaptive_geocode.current_delay:.2f}s")
if adaptive_geocode.recent_results:
success_rate = sum(adaptive_geocode.recent_results) / len(adaptive_geocode.recent_results)
print(f"Recent success rate: {success_rate:.1%}")Install with Tessl CLI
npx tessl i tessl/pypi-geopy