0
# URL and WSGI Utilities
1
2
URL processing functions and WSGI utilities for encoding, decoding, parsing, and manipulating URLs and WSGI environments. These utilities provide essential functionality for handling web requests and constructing proper URLs.
3
4
## Capabilities
5
6
### URL Encoding and Decoding
7
8
Functions for converting between different URL formats and handling international characters.
9
10
```python { .api }
11
def uri_to_iri(uri):
12
"""
13
Convert a URI to an IRI (Internationalized Resource Identifier).
14
15
Unquotes all valid UTF-8 characters while leaving reserved and invalid
16
characters quoted. Decodes Punycode domains to Unicode.
17
18
Parameters:
19
- uri: URI string to convert
20
21
Returns:
22
IRI string with Unicode characters unquoted
23
24
Examples:
25
- uri_to_iri("http://xn--n3h.net/p%C3%A5th") → "http://☃.net/påth"
26
- uri_to_iri("/path%20with%20spaces") → "/path with spaces"
27
"""
28
29
def iri_to_uri(iri):
30
"""
31
Convert an IRI to a URI by encoding non-ASCII characters.
32
33
Encodes Unicode characters to percent-encoded UTF-8 bytes and
34
converts Unicode domains to Punycode.
35
36
Parameters:
37
- iri: IRI string to convert
38
39
Returns:
40
ASCII-only URI string
41
42
Examples:
43
- iri_to_uri("http://☃.net/påth") → "http://xn--n3h.net/p%C3%A5th"
44
- iri_to_uri("/path with spaces") → "/path%20with%20spaces"
45
"""
46
47
# Import URL conversion functions
48
from werkzeug.urls import uri_to_iri, iri_to_uri
49
50
# Note: Werkzeug uses urllib.parse functions directly for basic URL operations
51
from urllib.parse import quote, unquote, quote_plus, unquote_plus, urlencode, parse_qs
52
53
# These are the standard functions used throughout Werkzeug:
54
def url_quote(string, charset="utf-8", errors="strict", safe="/:"):
55
"""Quote URL string (alias for urllib.parse.quote)."""
56
57
def url_unquote(string, charset="utf-8", errors="replace"):
58
"""Unquote URL string (alias for urllib.parse.unquote)."""
59
60
def url_quote_plus(string, charset="utf-8", errors="strict"):
61
"""Quote URL string using + for spaces (alias for urllib.parse.quote_plus)."""
62
63
def url_unquote_plus(string, charset="utf-8", errors="replace"):
64
"""Unquote URL string with + as spaces (alias for urllib.parse.unquote_plus)."""
65
66
def url_encode(obj, charset="utf-8", encode_keys=False, sort=False, key=None, separator="&"):
67
"""Encode object to URL query string (enhanced urllib.parse.urlencode)."""
68
69
def url_decode(s, charset="utf-8", decode_keys=False, separator="&", cls=None, errors="replace"):
70
"""Decode URL query string to MultiDict or dict."""
71
```
72
73
### WSGI Environment Utilities
74
75
Functions for extracting and manipulating data from WSGI environment dictionaries.
76
77
```python { .api }
78
def get_current_url(environ, root_only=False, strip_querystring=False, host_only=False, trusted_hosts=None):
79
"""
80
Reconstruct the complete URL from WSGI environment.
81
82
Parameters:
83
- environ: WSGI environment dictionary
84
- root_only: Only return scheme, host, and script root
85
- strip_querystring: Exclude query string from result
86
- host_only: Only return scheme and host
87
- trusted_hosts: List of trusted hostnames for validation
88
89
Returns:
90
Complete URL as IRI (may contain Unicode characters)
91
92
Examples:
93
- get_current_url(environ) → "http://example.com/app/path?query=value"
94
- get_current_url(environ, root_only=True) → "http://example.com/app"
95
- get_current_url(environ, host_only=True) → "http://example.com"
96
"""
97
98
def get_host(environ, trusted_hosts=None):
99
"""
100
Extract host information from WSGI environment.
101
102
Prefers Host header, falls back to SERVER_NAME. Only includes port
103
if it differs from the standard port for the protocol.
104
105
Parameters:
106
- environ: WSGI environment dictionary
107
- trusted_hosts: List of trusted hostnames (raises SecurityError if not matched)
108
109
Returns:
110
Host string, optionally with port
111
112
Examples:
113
- get_host(environ) → "example.com"
114
- get_host(environ) → "localhost:8080"
115
"""
116
117
def get_content_length(environ):
118
"""
119
Get the content length from WSGI environment.
120
121
Parameters:
122
- environ: WSGI environment dictionary
123
124
Returns:
125
Content length as integer or None if not specified/invalid
126
"""
127
128
def get_input_stream(environ, safe_fallback=True):
129
"""
130
Get the input stream from WSGI environment with safety checks.
131
132
Parameters:
133
- environ: WSGI environment dictionary
134
- safe_fallback: Return empty stream if wsgi.input is None
135
136
Returns:
137
Input stream for reading request body
138
"""
139
140
def get_path_info(environ):
141
"""
142
Get PATH_INFO from WSGI environment with proper decoding.
143
144
Parameters:
145
- environ: WSGI environment dictionary
146
147
Returns:
148
Decoded path info string
149
"""
150
```
151
152
### WSGI Response Utilities
153
154
Utilities for handling WSGI responses and file serving.
155
156
```python { .api }
157
def wrap_file(environ, file, buffer_size=8192):
158
"""
159
Wrap a file for WSGI response, with optional range support.
160
161
Parameters:
162
- environ: WSGI environment dictionary
163
- file: File-like object to wrap
164
- buffer_size: Buffer size for reading file chunks
165
166
Returns:
167
WSGI-compatible iterable for file content
168
"""
169
170
class FileWrapper:
171
def __init__(self, file, buffer_size=8192):
172
"""
173
Wrap a file-like object for WSGI response.
174
175
Parameters:
176
- file: File-like object to serve
177
- buffer_size: Size of chunks to read
178
"""
179
180
def __iter__(self):
181
"""Iterate over file chunks."""
182
183
def __len__(self):
184
"""Get file length if available."""
185
186
class ClosingIterator:
187
def __init__(self, iterable, callbacks=None):
188
"""
189
Wrap an iterable to ensure cleanup callbacks are called.
190
191
Parameters:
192
- iterable: Iterable to wrap
193
- callbacks: List of functions to call when iterator is closed
194
"""
195
196
def __iter__(self):
197
"""Iterate over wrapped iterable."""
198
199
def close(self):
200
"""Call all cleanup callbacks."""
201
202
class LimitedStream:
203
def __init__(self, stream, limit):
204
"""
205
Limit the amount of data that can be read from a stream.
206
207
Parameters:
208
- stream: Stream to wrap
209
- limit: Maximum bytes to allow reading
210
"""
211
212
def read(self, size=-1):
213
"""Read up to size bytes, respecting the limit."""
214
215
def readline(self, size=-1):
216
"""Read one line, respecting the limit."""
217
```
218
219
### WSGI Decorators and Helpers
220
221
Decorators and helper functions for WSGI application development.
222
223
```python { .api }
224
def responder(f):
225
"""
226
Decorator to automatically call returned WSGI application.
227
228
Allows returning Response objects from functions that will be
229
automatically called with (environ, start_response).
230
231
Parameters:
232
- f: Function that returns a WSGI application
233
234
Returns:
235
WSGI application function
236
237
Example:
238
@responder
239
def app(environ, start_response):
240
return Response('Hello World!')
241
"""
242
243
def host_is_trusted(hostname, trusted_list):
244
"""
245
Check if hostname is in the trusted list.
246
247
Parameters:
248
- hostname: Hostname to check
249
- trusted_list: List of trusted hostnames/patterns
250
251
Returns:
252
True if hostname is trusted
253
"""
254
```
255
256
## Usage Examples
257
258
### URL Construction and Parsing
259
260
```python
261
from werkzeug.urls import uri_to_iri, iri_to_uri
262
from werkzeug.wsgi import get_current_url
263
from urllib.parse import quote, unquote, urlencode, parse_qs
264
265
def url_handling_examples():
266
# Convert between URI and IRI
267
uri = "http://xn--n3h.net/caf%C3%A9"
268
iri = uri_to_iri(uri)
269
print(f"URI: {uri}")
270
print(f"IRI: {iri}") # "http://☃.net/café"
271
272
back_to_uri = iri_to_uri(iri)
273
print(f"Back to URI: {back_to_uri}")
274
275
# Basic URL encoding/decoding
276
text = "Hello World & Co."
277
encoded = quote(text)
278
decoded = unquote(encoded)
279
print(f"Original: {text}")
280
print(f"Encoded: {encoded}") # "Hello%20World%20%26%20Co."
281
print(f"Decoded: {decoded}")
282
283
# Query string handling
284
params = {'name': 'John Doe', 'age': '30', 'city': 'New York'}
285
query_string = urlencode(params)
286
print(f"Query string: {query_string}") # "name=John+Doe&age=30&city=New+York"
287
288
parsed = parse_qs(query_string)
289
print(f"Parsed: {parsed}")
290
291
def wsgi_url_reconstruction(environ, start_response):
292
"""Example of reconstructing URLs from WSGI environ."""
293
294
# Get complete current URL
295
full_url = get_current_url(environ)
296
297
# Get just the application root
298
root_url = get_current_url(environ, root_only=True)
299
300
# Get host only
301
host_only = get_current_url(environ, host_only=True)
302
303
# URL without query string
304
path_url = get_current_url(environ, strip_querystring=True)
305
306
response_text = f"""
307
Full URL: {full_url}
308
Root URL: {root_url}
309
Host only: {host_only}
310
Path URL: {path_url}
311
"""
312
313
response = Response(response_text)
314
return response(environ, start_response)
315
316
if __name__ == '__main__':
317
url_handling_examples()
318
```
319
320
### WSGI Environment Processing
321
322
```python
323
from werkzeug.wsgi import get_host, get_content_length, get_path_info
324
from werkzeug.wrappers import Request, Response
325
326
def process_wsgi_environ(environ, start_response):
327
"""Extract information from WSGI environment."""
328
329
# Get host information
330
host = get_host(environ)
331
332
# Get content length
333
content_length = get_content_length(environ)
334
335
# Get path info
336
path_info = get_path_info(environ)
337
338
# Build response with environment info
339
info = {
340
'host': host,
341
'content_length': content_length,
342
'path_info': path_info,
343
'method': environ.get('REQUEST_METHOD'),
344
'query_string': environ.get('QUERY_STRING'),
345
'content_type': environ.get('CONTENT_TYPE'),
346
'remote_addr': environ.get('REMOTE_ADDR'),
347
'user_agent': environ.get('HTTP_USER_AGENT'),
348
}
349
350
response_text = '\n'.join(f'{k}: {v}' for k, v in info.items())
351
response = Response(response_text, mimetype='text/plain')
352
return response(environ, start_response)
353
354
# Security example with trusted hosts
355
def secure_host_check(environ, start_response):
356
"""Example of validating host against trusted list."""
357
358
trusted_hosts = ['example.com', 'www.example.com', 'localhost']
359
360
try:
361
host = get_host(environ, trusted_hosts)
362
response = Response(f'Welcome from trusted host: {host}')
363
except SecurityError:
364
response = Response('Untrusted host', status=400)
365
366
return response(environ, start_response)
367
```
368
369
### File Serving with WSGI
370
371
```python
372
from werkzeug.wsgi import wrap_file, FileWrapper
373
from werkzeug.wrappers import Response
374
import os
375
import mimetypes
376
377
def serve_static_file(environ, start_response):
378
"""Serve static files using WSGI utilities."""
379
380
# Get requested file path (in production, validate this!)
381
path_info = environ.get('PATH_INFO', '').lstrip('/')
382
file_path = os.path.join('./static', path_info)
383
384
if not os.path.exists(file_path) or not os.path.isfile(file_path):
385
response = Response('File not found', status=404)
386
return response(environ, start_response)
387
388
# Get file info
389
file_size = os.path.getsize(file_path)
390
mime_type, _ = mimetypes.guess_type(file_path)
391
392
# Open file and wrap it for WSGI
393
file_obj = open(file_path, 'rb')
394
file_wrapper = wrap_file(environ, file_obj)
395
396
# Create response with proper headers
397
response = Response(
398
file_wrapper,
399
mimetype=mime_type or 'application/octet-stream',
400
headers={
401
'Content-Length': str(file_size),
402
'Accept-Ranges': 'bytes',
403
}
404
)
405
406
return response(environ, start_response)
407
408
def serve_large_file_with_wrapper():
409
"""Example of using FileWrapper directly."""
410
411
def large_file_app(environ, start_response):
412
file_path = './large_file.dat'
413
414
if not os.path.exists(file_path):
415
start_response('404 Not Found', [('Content-Type', 'text/plain')])
416
return [b'File not found']
417
418
file_obj = open(file_path, 'rb')
419
file_size = os.path.getsize(file_path)
420
421
headers = [
422
('Content-Type', 'application/octet-stream'),
423
('Content-Length', str(file_size)),
424
]
425
426
start_response('200 OK', headers)
427
428
# Use FileWrapper for efficient serving
429
return FileWrapper(file_obj, buffer_size=16384)
430
431
return large_file_app
432
```
433
434
### Responder Decorator Usage
435
436
```python
437
from werkzeug.wsgi import responder
438
from werkzeug.wrappers import Response
439
import json
440
441
@responder
442
def json_api(environ, start_response):
443
"""API endpoint using responder decorator."""
444
445
path = environ.get('PATH_INFO', '')
446
method = environ.get('REQUEST_METHOD', 'GET')
447
448
if path == '/api/status':
449
data = {'status': 'ok', 'method': method}
450
return Response(
451
json.dumps(data),
452
mimetype='application/json'
453
)
454
elif path == '/api/info':
455
data = {
456
'host': environ.get('HTTP_HOST'),
457
'user_agent': environ.get('HTTP_USER_AGENT'),
458
'path': path
459
}
460
return Response(
461
json.dumps(data),
462
mimetype='application/json'
463
)
464
else:
465
return Response(
466
'{"error": "Not found"}',
467
status=404,
468
mimetype='application/json'
469
)
470
471
# Without responder decorator (traditional WSGI)
472
def traditional_wsgi_app(environ, start_response):
473
"""Same functionality without decorator."""
474
response = json_api.__wrapped__(environ, start_response)
475
return response(environ, start_response)
476
```
477
478
### Stream Limiting and Safety
479
480
```python
481
from werkzeug.wsgi import LimitedStream, get_input_stream
482
from werkzeug.wrappers import Request, Response
483
484
def handle_upload_with_limits(environ, start_response):
485
"""Handle file uploads with size limits."""
486
487
request = Request(environ)
488
489
# Get content length
490
content_length = get_content_length(environ)
491
max_upload_size = 1024 * 1024 * 5 # 5MB limit
492
493
if content_length and content_length > max_upload_size:
494
response = Response('File too large', status=413)
495
return response(environ, start_response)
496
497
# Get input stream with limit
498
input_stream = get_input_stream(environ)
499
limited_stream = LimitedStream(input_stream, max_upload_size)
500
501
try:
502
# Read data with automatic limit enforcement
503
data = limited_stream.read()
504
505
response = Response(f'Received {len(data)} bytes')
506
507
except RequestEntityTooLarge:
508
response = Response('Upload size exceeded limit', status=413)
509
510
return response(environ, start_response)
511
512
def safe_stream_reading():
513
"""Example of safe stream reading patterns."""
514
515
def app(environ, start_response):
516
input_stream = get_input_stream(environ, safe_fallback=True)
517
518
# Read in chunks to avoid memory issues
519
chunks = []
520
chunk_size = 8192
521
total_size = 0
522
max_size = 1024 * 1024 # 1MB limit
523
524
while True:
525
chunk = input_stream.read(chunk_size)
526
if not chunk:
527
break
528
529
total_size += len(chunk)
530
if total_size > max_size:
531
response = Response('Request too large', status=413)
532
return response(environ, start_response)
533
534
chunks.append(chunk)
535
536
data = b''.join(chunks)
537
response = Response(f'Processed {len(data)} bytes safely')
538
return response(environ, start_response)
539
540
return app
541
```
542
543
### URL Building and Routing Integration
544
545
```python
546
from werkzeug.wsgi import get_current_url
547
from werkzeug.urls import iri_to_uri
548
from werkzeug.routing import Map, Rule
549
from urllib.parse import urlencode
550
551
class URLBuilder:
552
"""Helper class for building URLs in WSGI applications."""
553
554
def __init__(self, url_map):
555
self.url_map = url_map
556
557
def build_url(self, environ, endpoint, values=None, external=False):
558
"""Build URL for endpoint with current request context."""
559
560
adapter = self.url_map.bind_to_environ(environ)
561
562
try:
563
url = adapter.build(endpoint, values or {}, force_external=external)
564
565
# Convert IRI to URI for external URLs
566
if external:
567
url = iri_to_uri(url)
568
569
return url
570
571
except BuildError as e:
572
return None
573
574
def build_external_url(self, environ, path, query_params=None):
575
"""Build external URL with query parameters."""
576
577
base_url = get_current_url(environ, root_only=True)
578
579
if query_params:
580
query_string = urlencode(query_params)
581
url = f"{base_url}{path}?{query_string}"
582
else:
583
url = f"{base_url}{path}"
584
585
return iri_to_uri(url)
586
587
# Example usage
588
url_map = Map([
589
Rule('/', endpoint='index'),
590
Rule('/user/<int:user_id>', endpoint='user_profile'),
591
Rule('/api/data', endpoint='api_data'),
592
])
593
594
builder = URLBuilder(url_map)
595
596
def link_generation_app(environ, start_response):
597
"""App demonstrating URL generation."""
598
599
# Build internal URLs
600
home_url = builder.build_url(environ, 'index')
601
profile_url = builder.build_url(environ, 'user_profile', {'user_id': 123})
602
603
# Build external URLs
604
api_url = builder.build_url(environ, 'api_data', external=True)
605
custom_url = builder.build_external_url(
606
environ,
607
'/search',
608
{'q': 'python', 'page': '2'}
609
)
610
611
html = f"""
612
<html>
613
<body>
614
<h1>URL Examples</h1>
615
<ul>
616
<li><a href="{home_url}">Home</a></li>
617
<li><a href="{profile_url}">User Profile</a></li>
618
<li><a href="{api_url}">API Data</a></li>
619
<li><a href="{custom_url}">Search Results</a></li>
620
</ul>
621
</body>
622
</html>
623
"""
624
625
response = Response(html, mimetype='text/html')
626
return response(environ, start_response)
627
```