0
# Utilities
1
2
URI parsing, normalization, IRI to URI conversion, certificate management, and other utility functions supporting the main HTTP client functionality. These utilities handle the low-level details of HTTP protocol implementation.
3
4
## Capabilities
5
6
### URI Processing
7
8
Functions for parsing, normalizing, and converting URI formats.
9
10
```python { .api }
11
def parse_uri(uri):
12
"""
13
Parse URI into components.
14
15
Args:
16
uri (str): URI to parse
17
18
Returns:
19
tuple: (scheme, authority, path, query, fragment)
20
"""
21
22
def urlnorm(uri):
23
"""
24
Normalize URI format for consistent processing.
25
26
Args:
27
uri (str): URI to normalize
28
29
Returns:
30
tuple: (scheme, authority, request_uri, defrag_uri)
31
"""
32
33
def iri2uri(uri):
34
"""
35
Convert IRI (Internationalized Resource Identifier) to URI.
36
37
Encodes non-ASCII characters in IRIs to create valid URIs.
38
IRIs must be passed as unicode strings.
39
40
Args:
41
uri (str): IRI to convert (must be unicode string)
42
43
Returns:
44
str: Valid URI with non-ASCII characters encoded
45
"""
46
```
47
48
### Connection Utilities
49
50
Functions for managing HTTP connections and timeouts.
51
52
```python { .api }
53
def has_timeout(timeout):
54
"""
55
Check if timeout value is set and valid.
56
57
Args:
58
timeout: Timeout value to check
59
60
Returns:
61
bool: True if timeout is set and not default
62
"""
63
```
64
65
### Cache Utilities
66
67
Functions for generating safe cache filenames and keys.
68
69
```python { .api }
70
def safename(filename):
71
"""
72
Generate safe filename for caching.
73
74
Converts potentially unsafe characters in filenames to safe alternatives
75
for cross-platform filesystem compatibility.
76
77
Args:
78
filename (str): Original filename
79
80
Returns:
81
str: Safe filename for filesystem storage
82
"""
83
```
84
85
### Certificate Management
86
87
Functions and classes for managing SSL/TLS certificates.
88
89
```python { .api }
90
# From httplib2.certs module
91
def where():
92
"""
93
Locate CA certificates file.
94
95
Searches for CA certificates in the following order:
96
1. HTTPLIB2_CA_CERTS environment variable
97
2. ca_certs_locater.get() if available
98
3. certifi.where() if available
99
4. Built-in CA certificates file
100
101
Returns:
102
str: Path to CA certificates file
103
104
Raises:
105
RuntimeError: If HTTPLIB2_CA_CERTS points to invalid file
106
"""
107
108
BUILTIN_CA_CERTS = "path/to/builtin/cacerts.txt"
109
"""Path to built-in CA certificates file."""
110
```
111
112
### HTTP Header Processing
113
114
Internal utilities for processing HTTP headers and content.
115
116
```python { .api }
117
def _normalize_headers(headers):
118
"""
119
Normalize header names and values.
120
121
Args:
122
headers (dict): HTTP headers dictionary
123
124
Returns:
125
dict: Normalized headers with lowercase names
126
"""
127
128
def _parse_cache_control(headers):
129
"""
130
Parse Cache-Control header directives.
131
132
Args:
133
headers (dict): HTTP headers
134
135
Returns:
136
dict: Parsed cache control directives
137
"""
138
139
def _get_end2end_headers(response):
140
"""
141
Get end-to-end headers from response.
142
143
Filters out hop-by-hop headers that shouldn't be cached.
144
145
Args:
146
response: HTTP response object
147
148
Returns:
149
list: End-to-end header names
150
"""
151
```
152
153
### Usage Examples
154
155
#### URI Processing
156
157
```python
158
import httplib2
159
from httplib2 import parse_uri, urlnorm, iri2uri
160
161
# Parse URI components
162
uri = "https://example.com:8080/path/to/resource?param=value#section"
163
(scheme, authority, path, query, fragment) = parse_uri(uri)
164
print(f"Scheme: {scheme}")
165
print(f"Authority: {authority}")
166
print(f"Path: {path}")
167
print(f"Query: {query}")
168
print(f"Fragment: {fragment}")
169
170
# Normalize URI
171
(scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
172
print(f"Request URI: {request_uri}")
173
print(f"Defragmented URI: {defrag_uri}")
174
175
# Convert IRI to URI
176
iri = "http://例え.テスト/パス" # Japanese IRI
177
uri = iri2uri(iri)
178
print(f"Converted URI: {uri}")
179
```
180
181
#### Certificate Management
182
183
```python
184
import httplib2.certs
185
import os
186
187
# Get CA certificates location
188
ca_certs_path = httplib2.certs.where()
189
print(f"CA certificates: {ca_certs_path}")
190
191
# Use custom CA certificates
192
os.environ['HTTPLIB2_CA_CERTS'] = '/path/to/custom/cacerts.pem'
193
custom_ca_path = httplib2.certs.where()
194
print(f"Custom CA certificates: {custom_ca_path}")
195
196
# Create HTTP client with specific CA certificates
197
h = httplib2.Http(ca_certs=custom_ca_path)
198
(resp, content) = h.request("https://secure.example.com/")
199
```
200
201
#### Safe Filename Generation
202
203
```python
204
import httplib2
205
from httplib2 import safename
206
207
# Generate safe filenames for caching
208
unsafe_names = [
209
"http://example.com/path?param=value",
210
"file with spaces.txt",
211
"file/with/slashes.txt",
212
"file:with:colons.txt"
213
]
214
215
for name in unsafe_names:
216
safe = safename(name)
217
print(f"'{name}' -> '{safe}'")
218
219
# Use in custom cache implementation
220
class CustomCache:
221
def __init__(self, cache_dir):
222
self.cache_dir = cache_dir
223
224
def _get_path(self, key):
225
safe_key = safename(key)
226
return os.path.join(self.cache_dir, safe_key)
227
228
def get(self, key):
229
path = self._get_path(key)
230
try:
231
with open(path, 'rb') as f:
232
return f.read()
233
except FileNotFoundError:
234
return None
235
236
def set(self, key, value):
237
path = self._get_path(key)
238
with open(path, 'wb') as f:
239
f.write(value)
240
```
241
242
#### Timeout Handling
243
244
```python
245
import httplib2
246
import socket
247
248
# Check timeout configuration
249
timeouts = [None, 0, 30, socket._GLOBAL_DEFAULT_TIMEOUT]
250
251
for timeout in timeouts:
252
has_timeout = httplib2.has_timeout(timeout)
253
print(f"Timeout {timeout}: {'has timeout' if has_timeout else 'no timeout'}")
254
255
# Use with HTTP client
256
h = httplib2.Http(timeout=30)
257
if httplib2.has_timeout(30):
258
print("Client configured with timeout")
259
```
260
261
#### Content Processing
262
263
```python
264
import httplib2
265
266
# Internal header processing (normally handled automatically)
267
headers = {
268
'Content-Type': 'application/json',
269
'Cache-Control': 'max-age=3600, public',
270
'Connection': 'keep-alive'
271
}
272
273
# Normalize headers (internal function)
274
normalized = httplib2._normalize_headers(headers)
275
print(f"Normalized headers: {normalized}")
276
277
# Parse cache control
278
cache_directives = httplib2._parse_cache_control(normalized)
279
print(f"Cache directives: {cache_directives}")
280
```
281
282
### IRI to URI Conversion Details
283
284
The `iri2uri` function handles internationalized resource identifiers:
285
286
```python
287
import httplib2
288
289
# Examples of IRI to URI conversion
290
iris = [
291
"http://例え.テスト/", # Japanese domain
292
"http://пример.испытание/", # Russian domain
293
"http://example.com/café", # Accented characters in path
294
"http://example.com/?q=naïve", # Accented characters in query
295
"http://example.com/#español" # Accented characters in fragment
296
]
297
298
for iri in iris:
299
uri = httplib2.iri2uri(iri)
300
print(f"IRI: {iri}")
301
print(f"URI: {uri}")
302
print()
303
```
304
305
### Cache Filename Safety
306
307
The `safename` function ensures cache filenames work across different filesystems:
308
309
```python
310
import httplib2
311
312
# Characters that need escaping in filenames
313
problematic_chars = [
314
"http://example.com/", # Slashes
315
"file?query=value", # Question marks
316
"file:with:colons", # Colons (Windows)
317
"file<with>brackets", # Angle brackets
318
"file|with|pipes", # Pipes
319
"file*with*asterisks", # Asterisks
320
"file\"with\"quotes", # Quotes
321
"very long filename " * 10 # Long names
322
]
323
324
for name in problematic_chars:
325
safe = httplib2.safename(name)
326
print(f"Original: {name[:50]}")
327
print(f"Safe: {safe[:50]}")
328
print()
329
```
330
331
### Authentication Utilities
332
333
Internal utilities for authentication processing:
334
335
```python
336
import httplib2.auth
337
338
# Parse WWW-Authenticate header
339
headers = {
340
'www-authenticate': 'Basic realm="Protected Area"'
341
}
342
343
auth_info = httplib2.auth._parse_www_authenticate(headers)
344
print(f"Authentication info: {auth_info}")
345
346
# Parse Authentication-Info header
347
auth_response_headers = {
348
'authentication-info': 'nextnonce="abcdef123456"'
349
}
350
351
auth_response = httplib2.auth._parse_authentication_info(auth_response_headers)
352
print(f"Authentication response: {auth_response}")
353
```
354
355
### Internal Utilities
356
357
These functions are primarily used internally but may be useful for advanced usage:
358
359
```python
360
import httplib2
361
362
# Content decompression (handled automatically)
363
response_headers = {'content-encoding': 'gzip'}
364
# compressed_content = gzip.compress(b"Hello, World!")
365
# decompressed = httplib2._decompressContent(response_headers, compressed_content)
366
367
# Entry disposition for caching
368
request_headers = {'cache-control': 'no-cache'}
369
response_headers = {'cache-control': 'max-age=3600'}
370
disposition = httplib2._entry_disposition(response_headers, request_headers)
371
print(f"Cache disposition: {disposition}")
372
373
# Cache key generation (internal)
374
# Cache keys are generated from normalized URIs and relevant headers
375
```
376
377
### Utility Constants
378
379
Important constants used throughout httplib2:
380
381
```python
382
import httplib2
383
384
# HTTP constants
385
print(f"Safe methods: {httplib2.SAFE_METHODS}")
386
print(f"Redirect codes: {httplib2.REDIRECT_CODES}")
387
print(f"Hop-by-hop headers: {httplib2.HOP_BY_HOP}")
388
print(f"Default max redirects: {httplib2.DEFAULT_MAX_REDIRECTS}")
389
print(f"Default retries: {httplib2.RETRIES}")
390
391
# SSL/TLS configuration
392
print(f"CA certificates: {httplib2.CA_CERTS}")
393
```
394
395
### Advanced URI Handling
396
397
```python
398
import httplib2
399
import urllib.parse
400
401
def advanced_uri_processing(uri):
402
"""Demonstrate advanced URI processing."""
403
404
# Parse with httplib2
405
(scheme, authority, path, query, fragment) = httplib2.parse_uri(uri)
406
407
# Normalize
408
(norm_scheme, norm_authority, request_uri, defrag_uri) = httplib2.urlnorm(uri)
409
410
# Handle internationalization
411
if not uri.isascii():
412
uri = httplib2.iri2uri(uri)
413
414
# Additional parsing with urllib
415
parsed = urllib.parse.urlparse(uri)
416
417
return {
418
'original': uri,
419
'scheme': scheme,
420
'authority': authority,
421
'path': path,
422
'normalized_request_uri': request_uri,
423
'defragmented': defrag_uri,
424
'is_secure': scheme == 'https',
425
'default_port': 443 if scheme == 'https' else 80,
426
'hostname': parsed.hostname,
427
'port': parsed.port
428
}
429
430
# Example usage
431
uri_info = advanced_uri_processing("https://example.com:8080/path?q=test#top")
432
for key, value in uri_info.items():
433
print(f"{key}: {value}")
434
```