0
# Configuration and Utilities
1
2
Configuration options, proxy support, utility functions, and result processing capabilities. This module provides the foundational configuration and utility functionality that supports the DDGS search operations.
3
4
## Package Version
5
6
```python { .api }
7
__version__ = "9.5.5"
8
```
9
10
Access the package version:
11
12
```python
13
from ddgs import __version__
14
print(__version__) # "9.5.5"
15
```
16
17
## Capabilities
18
19
### DDGS Configuration
20
21
Main configuration class for initializing DDGS with proxy, timeout, and SSL verification settings.
22
23
```python { .api }
24
class DDGS:
25
threads: int | None = None
26
_executor: ThreadPoolExecutor | None = None
27
28
def __init__(
29
self,
30
proxy: str | None = None,
31
timeout: int | None = 5,
32
verify: bool = True
33
):
34
"""
35
Initialize DDGS search coordinator.
36
37
Parameters:
38
- proxy: Proxy URL for requests. Supports:
39
- HTTP proxies: "http://proxy.example.com:8080"
40
- SOCKS proxies: "socks5://127.0.0.1:9050"
41
- Tor Browser shortcut: "tb" (expands to "socks5h://127.0.0.1:9150")
42
- Environment variable: Uses DDGS_PROXY if proxy=None
43
- timeout: Request timeout in seconds (default: 5)
44
- verify: SSL certificate verification (default: True)
45
"""
46
47
def __enter__(self) -> 'DDGS':
48
"""Context manager entry point."""
49
50
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
51
"""Context manager exit point."""
52
53
@classmethod
54
def get_executor(cls) -> ThreadPoolExecutor:
55
"""
56
Get cached ThreadPoolExecutor for concurrent searches.
57
58
Returns:
59
ThreadPoolExecutor instance configured with cls.threads workers
60
"""
61
```
62
63
**Usage Examples:**
64
65
```python
66
# Basic configuration
67
with DDGS() as ddgs:
68
results = ddgs.text("query")
69
70
# Custom timeout and proxy
71
with DDGS(proxy="socks5://127.0.0.1:9050", timeout=15) as ddgs:
72
results = ddgs.text("query")
73
74
# Tor Browser proxy shortcut
75
with DDGS(proxy="tb", verify=False) as ddgs:
76
results = ddgs.text("sensitive query")
77
78
# Environment-based proxy configuration
79
import os
80
os.environ['DDGS_PROXY'] = 'http://proxy.example.com:8080'
81
with DDGS() as ddgs: # Uses environment proxy
82
results = ddgs.text("query")
83
84
# Configure global thread pool
85
DDGS.threads = 20 # Use 20 threads for concurrent searches
86
with DDGS() as ddgs:
87
results = ddgs.text("query", backend="all") # Uses more threads
88
```
89
90
### JSON Utilities
91
92
Utility functions for JSON serialization and deserialization with optimized performance.
93
94
```python { .api }
95
def json_dumps(obj: Any) -> str:
96
"""
97
Serialize object to JSON string.
98
99
Uses orjson library if available for better performance,
100
falls back to standard json library.
101
102
Parameters:
103
- obj: Object to serialize
104
105
Returns:
106
JSON string representation
107
"""
108
109
def json_loads(obj: str | bytes) -> Any:
110
"""
111
Deserialize JSON string to Python object.
112
113
Uses orjson library if available for better performance,
114
falls back to standard json library.
115
116
Parameters:
117
- obj: JSON string or bytes to deserialize
118
119
Returns:
120
Deserialized Python object
121
"""
122
```
123
124
**Usage Examples:**
125
126
```python
127
from ddgs.utils import json_dumps, json_loads
128
129
# Serialize search results
130
results = ddgs.text("query")
131
json_string = json_dumps(results)
132
133
# Deserialize JSON data
134
data = json_loads(json_string)
135
136
# Use with file operations
137
with open('results.json', 'w') as f:
138
f.write(json_dumps(results))
139
140
with open('results.json', 'r') as f:
141
loaded_results = json_loads(f.read())
142
```
143
144
### Proxy Configuration
145
146
Advanced proxy configuration options and utilities.
147
148
```python { .api }
149
def _expand_proxy_tb_alias(proxy: str | None) -> str | None:
150
"""
151
Expand Tor Browser proxy alias to full SOCKS URL.
152
153
Parameters:
154
- proxy: Proxy string, may contain "tb" alias
155
156
Returns:
157
Expanded proxy URL or None
158
159
Example:
160
"tb" -> "socks5h://127.0.0.1:9150"
161
"""
162
```
163
164
**Proxy Support:**
165
166
```python
167
# Built-in proxy configurations
168
proxies = {
169
# Tor Browser default
170
"tb": "socks5h://127.0.0.1:9150",
171
172
# Common SOCKS proxy
173
"socks5://127.0.0.1:9050": "socks5://127.0.0.1:9050",
174
175
# HTTP proxy
176
"http://proxy.company.com:8080": "http://proxy.company.com:8080",
177
178
# HTTPS proxy
179
"https://secure-proxy.com:443": "https://secure-proxy.com:443"
180
}
181
182
# Environment variable support
183
import os
184
os.environ['DDGS_PROXY'] = 'socks5://127.0.0.1:9050'
185
186
# Proxy authentication (if supported by proxy server)
187
authenticated_proxy = "http://username:password@proxy.example.com:8080"
188
```
189
190
### Threading Configuration
191
192
Control concurrent search execution with thread pool configuration.
193
194
```python { .api }
195
# Class-level thread configuration
196
DDGS.threads = 10 # Set global thread pool size
197
198
# Access thread pool executor
199
executor = DDGS.get_executor()
200
201
# Manual thread pool management
202
from concurrent.futures import ThreadPoolExecutor
203
204
# Custom executor for advanced use cases
205
with ThreadPoolExecutor(max_workers=15) as custom_executor:
206
# Use custom executor for specific operations
207
pass
208
```
209
210
**Threading Examples:**
211
212
```python
213
# Configure for high-performance searching
214
DDGS.threads = 25 # More threads for concurrent searches
215
216
with DDGS(timeout=30) as ddgs:
217
# Perform multiple searches concurrently
218
text_results = ddgs.text("AI research", backend="all", max_results=100)
219
# Uses thread pool for concurrent engine queries
220
221
# Conservative configuration for limited resources
222
DDGS.threads = 5
223
224
with DDGS(timeout=10) as ddgs:
225
results = ddgs.text("query", backend="google,bing")
226
```
227
228
### Environment Variables
229
230
Environment variable support for default configuration.
231
232
```bash
233
# Set in shell environment
234
export DDGS_PROXY="socks5://127.0.0.1:9050"
235
export DDGS_TIMEOUT="15"
236
export DDGS_VERIFY="false"
237
```
238
239
```python
240
# Use environment variables
241
import os
242
243
# Proxy from environment
244
os.environ['DDGS_PROXY'] = 'http://proxy.example.com:8080'
245
246
# DDGS automatically uses environment proxy if proxy=None
247
with DDGS() as ddgs:
248
results = ddgs.text("query") # Uses environment proxy
249
250
# Override environment with explicit parameters
251
with DDGS(proxy="tb", timeout=20) as ddgs:
252
results = ddgs.text("query") # Uses "tb" proxy, ignores environment
253
```
254
255
### Performance Optimization
256
257
Configuration options for optimizing search performance.
258
259
```python
260
# High-performance configuration
261
DDGS.threads = 20 # More concurrent threads
262
263
with DDGS(
264
timeout=30, # Longer timeout for complex searches
265
verify=True # Keep SSL verification for security
266
) as ddgs:
267
# Use all available backends for maximum coverage
268
results = ddgs.text(
269
"comprehensive query",
270
backend="all",
271
max_results=200
272
)
273
274
# Memory-efficient configuration for limited resources
275
DDGS.threads = 3 # Fewer threads
276
277
with DDGS(timeout=10) as ddgs:
278
# Use specific backend to reduce resource usage
279
results = ddgs.text(
280
"focused query",
281
backend="wikipedia,google",
282
max_results=20
283
)
284
```
285
286
### SSL and Security Configuration
287
288
SSL certificate verification and security settings.
289
290
```python
291
# Disable SSL verification (not recommended for production)
292
with DDGS(verify=False) as ddgs:
293
results = ddgs.text("query")
294
295
# Enable SSL verification with custom timeout
296
with DDGS(verify=True, timeout=20) as ddgs:
297
results = ddgs.text("query")
298
299
# Secure configuration with Tor
300
with DDGS(proxy="tb", verify=True, timeout=30) as ddgs:
301
results = ddgs.text("sensitive query")
302
```
303
304
### Result Processing Utilities
305
306
Internal utilities for processing and normalizing search results.
307
308
```python { .api }
309
# Text normalization utilities (internal)
310
def _normalize_url(url: str) -> str:
311
"""Normalize URL format for consistent results."""
312
313
def _normalize_text(raw: str) -> str:
314
"""Clean and normalize text content from HTML."""
315
316
def _normalize_date(date: int | str) -> str:
317
"""Normalize date formats to consistent string representation."""
318
```
319
320
### Error Configuration
321
322
Configure error handling behavior and retry logic.
323
324
```python
325
# Timeout configuration affects exception behavior
326
short_timeout_ddgs = DDGS(timeout=5) # More likely to raise TimeoutException
327
long_timeout_ddgs = DDGS(timeout=30) # Less likely to timeout
328
329
# Verify configuration affects SSL errors
330
secure_ddgs = DDGS(verify=True) # Will raise SSL errors for invalid certs
331
insecure_ddgs = DDGS(verify=False) # Ignores SSL certificate issues
332
```
333
334
## Configuration Best Practices
335
336
### Production Configuration
337
338
```python
339
# Recommended production settings
340
with DDGS(
341
proxy=None, # Use environment variable or None
342
timeout=15, # Reasonable timeout
343
verify=True # Always verify SSL in production
344
) as ddgs:
345
results = ddgs.text("query", backend="auto")
346
```
347
348
### Development Configuration
349
350
```python
351
# Development/testing settings
352
with DDGS(
353
proxy="tb", # Use Tor for privacy during testing
354
timeout=30, # Longer timeout for debugging
355
verify=False # May disable for local testing
356
) as ddgs:
357
results = ddgs.text("test query", backend="all")
358
```
359
360
### High-Volume Configuration
361
362
```python
363
# Settings for high-volume search applications
364
DDGS.threads = 30
365
366
with DDGS(
367
timeout=20,
368
verify=True
369
) as ddgs:
370
# Batch processing with rate limiting
371
for query in query_list:
372
results = ddgs.text(query, max_results=50)
373
time.sleep(1) # Rate limiting
374
```
375
376
### Resource-Constrained Configuration
377
378
```python
379
# Minimal resource usage
380
DDGS.threads = 2
381
382
with DDGS(timeout=8) as ddgs:
383
results = ddgs.text(
384
"query",
385
backend="wikipedia", # Single, reliable backend
386
max_results=10
387
)
388
```