0
# Rate Limiting
1
2
Geopy provides built-in rate limiting functionality to manage API quotas and prevent service abuse with configurable delays, retry logic, and error handling for both synchronous and asynchronous operations.
3
4
## Capabilities
5
6
### Synchronous Rate Limiter
7
8
Rate limiting wrapper for synchronous geocoding functions.
9
10
```python { .api }
11
from geopy.extra.rate_limiter import RateLimiter
12
13
class RateLimiter:
14
"""
15
Rate limiting wrapper for synchronous geocoding functions.
16
Automatically handles delays between requests and retries on errors.
17
"""
18
19
def __init__(self, func, min_delay_seconds=0.0, max_retries=2,
20
error_wait_seconds=5.0, swallow_exceptions=True,
21
return_value_on_exception=None):
22
"""
23
Initialize rate limiter.
24
25
Parameters:
26
- func: Function to wrap (e.g., geolocator.geocode)
27
- min_delay_seconds (float): Minimum delay between calls
28
- max_retries (int): Number of retry attempts on errors
29
- error_wait_seconds (float): Wait time after recoverable errors
30
- swallow_exceptions (bool): Whether to suppress final exceptions
31
- return_value_on_exception: Return value when exceptions are swallowed
32
"""
33
34
def __call__(self, *args, **kwargs):
35
"""
36
Execute rate-limited function call.
37
38
Returns:
39
Function result or return_value_on_exception on failure
40
"""
41
```
42
43
### Asynchronous Rate Limiter
44
45
Rate limiting wrapper for asynchronous geocoding functions.
46
47
```python { .api }
48
from geopy.extra.rate_limiter import AsyncRateLimiter
49
50
class AsyncRateLimiter:
51
"""
52
Rate limiting wrapper for asynchronous geocoding functions.
53
Same functionality as RateLimiter but for async/await functions.
54
"""
55
56
def __init__(self, func, min_delay_seconds=0.0, max_retries=2,
57
error_wait_seconds=5.0, swallow_exceptions=True,
58
return_value_on_exception=None):
59
"""
60
Initialize async rate limiter.
61
62
Parameters: Same as RateLimiter but for async functions
63
"""
64
65
async def __call__(self, *args, **kwargs):
66
"""
67
Execute rate-limited async function call.
68
69
Returns:
70
Function result or return_value_on_exception on failure
71
"""
72
```
73
74
## Usage Examples
75
76
### Basic Rate Limiting
77
78
```python
79
from geopy.geocoders import Nominatim
80
from geopy.extra.rate_limiter import RateLimiter
81
82
# Initialize geocoder
83
geolocator = Nominatim(user_agent="rate_limited_app")
84
85
# Create rate-limited geocoder with 1-second minimum delay
86
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1.0)
87
88
# Use like normal geocoder but with automatic rate limiting
89
addresses = [
90
"New York, NY",
91
"Los Angeles, CA",
92
"Chicago, IL",
93
"Houston, TX",
94
"Phoenix, AZ"
95
]
96
97
results = []
98
for address in addresses:
99
# Automatically waits 1 second between calls
100
location = geocode(address)
101
results.append((address, location))
102
if location:
103
print(f"✓ {address} -> {location.address}")
104
else:
105
print(f"○ {address} -> No results")
106
```
107
108
### Advanced Rate Limiting Configuration
109
110
```python
111
from geopy.geocoders import GoogleV3
112
from geopy.extra.rate_limiter import RateLimiter
113
from geopy.exc import GeocoderServiceError
114
115
# Initialize geocoder (requires API key)
116
geolocator = GoogleV3(api_key="your_api_key")
117
118
# Configure rate limiter with advanced options
119
geocode = RateLimiter(
120
geolocator.geocode,
121
min_delay_seconds=0.5, # Half-second minimum delay
122
max_retries=5, # Retry up to 5 times
123
error_wait_seconds=10.0, # Wait 10 seconds after errors
124
swallow_exceptions=False, # Don't suppress exceptions
125
return_value_on_exception=None # Return None on failure
126
)
127
128
# Test with potentially problematic addresses
129
test_addresses = [
130
"1600 Amphitheatre Parkway, Mountain View, CA", # Valid
131
"Invalid Address 123456789", # Invalid
132
"Times Square, New York, NY", # Valid
133
"", # Empty
134
"Valid Street Address, San Francisco, CA" # Valid
135
]
136
137
for address in test_addresses:
138
try:
139
location = geocode(address)
140
if location:
141
print(f"✓ Found: {address} -> {location.address}")
142
else:
143
print(f"○ No results: {address}")
144
except Exception as e:
145
print(f"✗ Error: {address} -> {e}")
146
```
147
148
### Rate Limiting with Custom Retry Logic
149
150
```python
151
from geopy.geocoders import Bing
152
from geopy.extra.rate_limiter import RateLimiter
153
from geopy.exc import GeocoderRateLimited, GeocoderServiceError
154
import time
155
156
class SmartRateLimiter:
157
"""Rate limiter with intelligent backoff strategy"""
158
159
def __init__(self, func, base_delay=1.0, max_delay=60.0, backoff_factor=2.0):
160
self.func = func
161
self.base_delay = base_delay
162
self.max_delay = max_delay
163
self.backoff_factor = backoff_factor
164
self.last_call_time = 0
165
self.current_delay = base_delay
166
167
def __call__(self, *args, **kwargs):
168
"""Execute with smart rate limiting"""
169
max_retries = 5
170
171
for attempt in range(max_retries):
172
# Ensure minimum delay since last call
173
time_since_last = time.time() - self.last_call_time
174
if time_since_last < self.current_delay:
175
sleep_time = self.current_delay - time_since_last
176
print(f"Rate limiting: waiting {sleep_time:.1f}s")
177
time.sleep(sleep_time)
178
179
try:
180
self.last_call_time = time.time()
181
result = self.func(*args, **kwargs)
182
183
# Success - reduce delay
184
self.current_delay = max(
185
self.base_delay,
186
self.current_delay / self.backoff_factor
187
)
188
return result
189
190
except GeocoderRateLimited as e:
191
if attempt == max_retries - 1:
192
raise
193
194
# Increase delay on rate limiting
195
self.current_delay = min(
196
self.max_delay,
197
self.current_delay * self.backoff_factor
198
)
199
200
wait_time = e.retry_after if e.retry_after else self.current_delay
201
print(f"Rate limited, waiting {wait_time}s (attempt {attempt + 1})")
202
time.sleep(wait_time)
203
204
except GeocoderServiceError as e:
205
if attempt == max_retries - 1:
206
raise
207
208
print(f"Service error, retrying in {self.current_delay}s")
209
time.sleep(self.current_delay)
210
211
raise Exception("Max retries exceeded")
212
213
# Usage
214
geolocator = Bing(api_key="your_api_key")
215
smart_geocode = SmartRateLimiter(geolocator.geocode, base_delay=0.5)
216
217
addresses = ["New York", "London", "Tokyo", "Sydney", "Berlin"] * 3 # Test many
218
219
for i, address in enumerate(addresses):
220
try:
221
result = smart_geocode(address)
222
print(f"{i+1:2d}. {address} -> {'Found' if result else 'Not found'}")
223
except Exception as e:
224
print(f"{i+1:2d}. {address} -> Error: {e}")
225
```
226
227
### Async Rate Limiting
228
229
```python
230
import asyncio
231
from geopy.geocoders import Nominatim
232
from geopy.adapters import AioHTTPAdapter
233
from geopy.extra.rate_limiter import AsyncRateLimiter
234
235
async def async_rate_limiting_example():
236
"""Async rate limiting example"""
237
238
async with Nominatim(
239
user_agent="async_rate_limited_app",
240
adapter_factory=AioHTTPAdapter
241
) as geolocator:
242
243
# Create async rate limiter
244
geocode = AsyncRateLimiter(
245
geolocator.geocode,
246
min_delay_seconds=1.0, # 1-second delay between calls
247
max_retries=3, # Retry up to 3 times
248
error_wait_seconds=5.0 # Wait 5 seconds after errors
249
)
250
251
addresses = [
252
"San Francisco, CA",
253
"Seattle, WA",
254
"Portland, OR",
255
"Denver, CO",
256
"Austin, TX"
257
]
258
259
# Sequential processing with rate limiting
260
# Note: Even though we await each call, the rate limiter
261
# ensures proper delays between requests
262
results = []
263
for address in addresses:
264
result = await geocode(address)
265
results.append((address, result))
266
if result:
267
print(f"✓ {address} -> {result.address}")
268
else:
269
print(f"○ {address} -> No results")
270
271
return results
272
273
# Run async example
274
asyncio.run(async_rate_limiting_example())
275
```
276
277
### Batch Processing with Rate Limiting
278
279
```python
280
from geopy.geocoders import OpenCage
281
from geopy.extra.rate_limiter import RateLimiter
282
import time
283
import csv
284
285
class BatchGeocoder:
286
"""Batch geocoder with rate limiting and progress tracking"""
287
288
def __init__(self, geocoder, requests_per_second=1.0, max_retries=3):
289
self.delay = 1.0 / requests_per_second
290
self.geocode = RateLimiter(
291
geocoder.geocode,
292
min_delay_seconds=self.delay,
293
max_retries=max_retries,
294
error_wait_seconds=5.0,
295
swallow_exceptions=True,
296
return_value_on_exception=None
297
)
298
self.stats = {
299
'processed': 0,
300
'successful': 0,
301
'no_results': 0,
302
'errors': 0
303
}
304
305
def process_batch(self, addresses, progress_callback=None):
306
"""Process batch of addresses with rate limiting"""
307
results = []
308
start_time = time.time()
309
310
for i, address in enumerate(addresses):
311
try:
312
result = self.geocode(address)
313
314
if result:
315
self.stats['successful'] += 1
316
results.append({
317
'input': address,
318
'status': 'success',
319
'address': result.address,
320
'latitude': result.latitude,
321
'longitude': result.longitude
322
})
323
else:
324
self.stats['no_results'] += 1
325
results.append({
326
'input': address,
327
'status': 'no_results',
328
'address': None,
329
'latitude': None,
330
'longitude': None
331
})
332
333
except Exception as e:
334
self.stats['errors'] += 1
335
results.append({
336
'input': address,
337
'status': 'error',
338
'error': str(e),
339
'address': None,
340
'latitude': None,
341
'longitude': None
342
})
343
344
self.stats['processed'] += 1
345
346
# Progress callback
347
if progress_callback and (i + 1) % 10 == 0:
348
elapsed = time.time() - start_time
349
progress_callback(i + 1, len(addresses), elapsed)
350
351
return results
352
353
def print_stats(self):
354
"""Print processing statistics"""
355
total = self.stats['processed']
356
if total == 0:
357
return
358
359
print(f"\nBatch Processing Statistics:")
360
print(f"Total processed: {total}")
361
print(f"Successful: {self.stats['successful']} ({self.stats['successful']/total*100:.1f}%)")
362
print(f"No results: {self.stats['no_results']} ({self.stats['no_results']/total*100:.1f}%)")
363
print(f"Errors: {self.stats['errors']} ({self.stats['errors']/total*100:.1f}%)")
364
365
def progress_callback(current, total, elapsed):
366
"""Progress callback function"""
367
rate = current / elapsed if elapsed > 0 else 0
368
remaining = (total - current) / rate if rate > 0 else 0
369
print(f"Progress: {current}/{total} ({current/total*100:.1f}%) - "
370
f"{rate:.2f} req/s - ETA: {remaining:.0f}s")
371
372
# Example usage
373
addresses = [
374
"1600 Amphitheatre Parkway, Mountain View, CA",
375
"1 Apple Park Way, Cupertino, CA",
376
"350 Fifth Avenue, New York, NY",
377
"Times Square, New York, NY",
378
"Golden Gate Bridge, San Francisco, CA",
379
"Space Needle, Seattle, WA",
380
"Willis Tower, Chicago, IL",
381
"Hollywood Sign, Los Angeles, CA",
382
"Mount Rushmore, South Dakota",
383
"Statue of Liberty, New York, NY"
384
] * 2 # Duplicate for larger test
385
386
# Initialize batch geocoder (requires API key for OpenCage)
387
# geolocator = OpenCage(api_key="your_api_key")
388
geolocator = Nominatim(user_agent="batch_app") # Using free service for example
389
390
batch_geocoder = BatchGeocoder(
391
geolocator,
392
requests_per_second=0.5, # Conservative rate for free service
393
max_retries=3
394
)
395
396
# Process batch
397
print(f"Starting batch processing of {len(addresses)} addresses...")
398
results = batch_geocoder.process_batch(addresses, progress_callback)
399
400
# Print statistics
401
batch_geocoder.print_stats()
402
403
# Save results to CSV
404
with open('batch_results.csv', 'w', newline='', encoding='utf-8') as f:
405
writer = csv.DictWriter(f, fieldnames=['input', 'status', 'address', 'latitude', 'longitude', 'error'])
406
writer.writeheader()
407
writer.writerows(results)
408
409
print("Results saved to batch_results.csv")
410
```
411
412
### Rate Limiting with Multiple Services
413
414
```python
415
from geopy.geocoders import Nominatim, Photon
416
from geopy.extra.rate_limiter import RateLimiter
417
import time
418
419
class MultiServiceGeocoder:
420
"""Geocoder that uses multiple services with individual rate limiting"""
421
422
def __init__(self):
423
# Initialize services with different rate limits
424
self.services = {
425
'nominatim': {
426
'geocoder': Nominatim(user_agent="multi_service_app"),
427
'rate_limiter': RateLimiter(
428
None, # Will be set below
429
min_delay_seconds=1.0, # Nominatim's required delay
430
max_retries=2
431
),
432
'name': 'OpenStreetMap Nominatim'
433
},
434
'photon': {
435
'geocoder': Photon(user_agent="multi_service_app"),
436
'rate_limiter': RateLimiter(
437
None, # Will be set below
438
min_delay_seconds=0.1, # Photon is more permissive
439
max_retries=2
440
),
441
'name': 'Photon'
442
}
443
}
444
445
# Set the actual functions to rate limit
446
for service_key, service_data in self.services.items():
447
service_data['rate_limiter'].func = service_data['geocoder'].geocode
448
449
def geocode_with_fallback(self, query, preferred_service='nominatim'):
450
"""Geocode with service fallback"""
451
452
# Try preferred service first
453
if preferred_service in self.services:
454
service_data = self.services[preferred_service]
455
try:
456
print(f"Trying {service_data['name']}...")
457
result = service_data['rate_limiter'](query)
458
if result:
459
print(f"✓ Success with {service_data['name']}")
460
return result, preferred_service
461
else:
462
print(f"○ No results from {service_data['name']}")
463
except Exception as e:
464
print(f"✗ Error with {service_data['name']}: {e}")
465
466
# Try other services
467
for service_key, service_data in self.services.items():
468
if service_key == preferred_service:
469
continue # Already tried
470
471
try:
472
print(f"Trying {service_data['name']}...")
473
result = service_data['rate_limiter'](query)
474
if result:
475
print(f"✓ Success with {service_data['name']}")
476
return result, service_key
477
else:
478
print(f"○ No results from {service_data['name']}")
479
except Exception as e:
480
print(f"✗ Error with {service_data['name']}: {e}")
481
482
return None, None
483
484
def batch_geocode(self, addresses, preferred_service='nominatim'):
485
"""Batch geocode with service fallback"""
486
results = []
487
488
for i, address in enumerate(addresses):
489
print(f"\n{i+1}/{len(addresses)}: Processing '{address}'")
490
491
result, used_service = self.geocode_with_fallback(
492
address,
493
preferred_service
494
)
495
496
results.append({
497
'address': address,
498
'result': result,
499
'service_used': used_service,
500
'success': result is not None
501
})
502
503
# Brief pause between addresses to be respectful
504
time.sleep(0.5)
505
506
return results
507
508
# Example usage
509
multi_geocoder = MultiServiceGeocoder()
510
511
test_addresses = [
512
"Paris, France",
513
"Tokyo, Japan",
514
"New York City, USA",
515
"Invalid Address 123456",
516
"London, UK"
517
]
518
519
print("Starting multi-service batch geocoding...")
520
results = multi_geocoder.batch_geocode(test_addresses, preferred_service='nominatim')
521
522
# Analyze results
523
print(f"\n=== Results Summary ===")
524
successful = sum(1 for r in results if r['success'])
525
print(f"Success rate: {successful}/{len(results)} ({successful/len(results)*100:.1f}%)")
526
527
service_usage = {}
528
for result in results:
529
if result['service_used']:
530
service_usage[result['service_used']] = service_usage.get(result['service_used'], 0) + 1
531
532
print("Service usage:")
533
for service, count in service_usage.items():
534
service_name = multi_geocoder.services[service]['name']
535
print(f" {service_name}: {count}")
536
```
537
538
### Custom Rate Limiting Strategies
539
540
```python
541
from geopy.geocoders import Nominatim
542
import time
543
import random
544
545
class AdaptiveRateLimiter:
546
"""Rate limiter that adapts based on success/failure patterns"""
547
548
def __init__(self, func, initial_delay=1.0, min_delay=0.1, max_delay=10.0):
549
self.func = func
550
self.current_delay = initial_delay
551
self.min_delay = min_delay
552
self.max_delay = max_delay
553
self.last_call_time = 0
554
555
# Success/failure tracking
556
self.recent_results = [] # Track last 10 results
557
self.success_threshold = 0.8 # 80% success rate target
558
559
def __call__(self, *args, **kwargs):
560
"""Execute with adaptive rate limiting"""
561
# Wait for current delay
562
time_since_last = time.time() - self.last_call_time
563
if time_since_last < self.current_delay:
564
sleep_time = self.current_delay - time_since_last
565
time.sleep(sleep_time)
566
567
# Make the call
568
self.last_call_time = time.time()
569
570
try:
571
result = self.func(*args, **kwargs)
572
self._record_result(True, result is not None)
573
return result
574
575
except Exception as e:
576
self._record_result(False, False)
577
raise
578
579
def _record_result(self, call_successful, has_result):
580
"""Record result and adapt delay"""
581
# Track overall success (call didn't fail + got result)
582
overall_success = call_successful and has_result
583
584
# Keep only recent results
585
self.recent_results.append(overall_success)
586
if len(self.recent_results) > 10:
587
self.recent_results.pop(0)
588
589
# Adapt delay based on success rate
590
if len(self.recent_results) >= 5: # Need some data
591
success_rate = sum(self.recent_results) / len(self.recent_results)
592
593
if success_rate >= self.success_threshold:
594
# Good success rate - can speed up
595
self.current_delay = max(
596
self.min_delay,
597
self.current_delay * 0.9
598
)
599
else:
600
# Poor success rate - slow down
601
self.current_delay = min(
602
self.max_delay,
603
self.current_delay * 1.5
604
)
605
606
# Add some jitter to avoid thundering herd
607
jitter = random.uniform(0.8, 1.2)
608
self.current_delay *= jitter
609
self.current_delay = max(self.min_delay,
610
min(self.max_delay, self.current_delay))
611
612
# Example usage
613
geolocator = Nominatim(user_agent="adaptive_app")
614
adaptive_geocode = AdaptiveRateLimiter(geolocator.geocode, initial_delay=1.0)
615
616
# Mix of good and bad addresses to test adaptation
617
test_addresses = [
618
"New York, NY", # Good
619
"Los Angeles, CA", # Good
620
"Invalid123456", # Bad
621
"Chicago, IL", # Good
622
"BadAddress!!!", # Bad
623
"Houston, TX", # Good
624
"Phoenix, AZ", # Good
625
"Philadelphia, PA", # Good
626
"Another Bad Address", # Bad
627
"San Antonio, TX", # Good
628
]
629
630
print("Testing adaptive rate limiting...")
631
for i, address in enumerate(test_addresses):
632
try:
633
start_time = time.time()
634
result = adaptive_geocode(address)
635
elapsed = time.time() - start_time
636
637
status = "✓ Found" if result else "○ No results"
638
print(f"{i+1:2d}. {address:<20} -> {status} "
639
f"(delay: {adaptive_geocode.current_delay:.2f}s, "
640
f"total: {elapsed:.2f}s)")
641
642
except Exception as e:
643
print(f"{i+1:2d}. {address:<20} -> ✗ Error: {e} "
644
f"(delay: {adaptive_geocode.current_delay:.2f}s)")
645
646
print(f"\nFinal delay: {adaptive_geocode.current_delay:.2f}s")
647
if adaptive_geocode.recent_results:
648
success_rate = sum(adaptive_geocode.recent_results) / len(adaptive_geocode.recent_results)
649
print(f"Recent success rate: {success_rate:.1%}")
650
```