0
# Network & URL Handling
1
2
Socket programming utilities with buffering and protocol support, plus comprehensive URL parsing, manipulation, and utilities. Includes netstring protocol implementation, complete URL component handling, and robust socket wrapper classes for network programming.
3
4
## Capabilities
5
6
### Socket Programming Utilities
7
8
Enhanced socket wrappers with buffering and protocol support.
9
10
```python { .api }
11
class BufferedSocket:
12
"""Socket wrapper with buffering capabilities."""
13
def __init__(self, sock, timeout=DEFAULT_TIMEOUT, maxsize=DEFAULT_MAXSIZE): ...
14
def recv(self, size, flags=0, timeout=None): ...
15
def recv_until(self, delimiter, timeout=None, maxsize=None): ...
16
def recv_size(self, size, timeout=None): ...
17
def send(self, data, flags=0, timeout=None): ...
18
def sendall(self, data, timeout=None): ...
19
def close(self): ...
20
21
class NetstringSocket:
22
"""Socket using netstring protocol for message framing."""
23
def __init__(self, sock, timeout=DEFAULT_TIMEOUT, maxsize=DEFAULT_MAXSIZE): ...
24
def read_ns(self, timeout=None, maxsize=None): ...
25
def write_ns(self, data, timeout=None): ...
26
def close(self): ...
27
```
28
29
### URL Parsing and Manipulation
30
31
Comprehensive URL handling with immutable semantics.
32
33
```python { .api }
34
class URL:
35
"""Comprehensive URL manipulation class with immutable semantics."""
36
def __init__(self, url_text='', **kwargs): ...
37
38
# Properties
39
@property
40
def scheme(self): ...
41
@property
42
def username(self): ...
43
@property
44
def password(self): ...
45
@property
46
def host(self): ...
47
@property
48
def port(self): ...
49
@property
50
def path(self): ...
51
@property
52
def query_params(self): ...
53
@property
54
def fragment(self): ...
55
56
# Methods
57
def replace(self, **kwargs): ...
58
def normalize(self): ...
59
def __str__(self): ...
60
61
def parse_url(url_text):
62
"""
63
Parse URL string into components.
64
65
Parameters:
66
- url_text (str): URL string to parse
67
68
Returns:
69
URL: Parsed URL object
70
"""
71
```
72
73
### URL Component Processing
74
75
Utilities for processing individual URL components.
76
77
```python { .api }
78
def quote_path_part(text, full_quote=True):
79
"""
80
URL-encode path components.
81
82
Parameters:
83
- text (str): Text to encode
84
- full_quote (bool): Use full URL encoding
85
86
Returns:
87
str: URL-encoded path component
88
"""
89
90
def quote_query_part(text, full_quote=True):
91
"""
92
URL-encode query parameters.
93
94
Parameters:
95
- text (str): Text to encode
96
- full_quote (bool): Use full URL encoding
97
98
Returns:
99
str: URL-encoded query parameter
100
"""
101
102
def quote_fragment_part(text, full_quote=True):
103
"""
104
URL-encode fragment components.
105
106
Parameters:
107
- text (str): Text to encode
108
- full_quote (bool): Use full URL encoding
109
110
Returns:
111
str: URL-encoded fragment
112
"""
113
114
def quote_userinfo_part(text, full_quote=True):
115
"""
116
URL-encode userinfo components.
117
118
Parameters:
119
- text (str): Text to encode
120
- full_quote (bool): Use full URL encoding
121
122
Returns:
123
str: URL-encoded userinfo
124
"""
125
126
def unquote(string, encoding='utf-8', errors='replace'):
127
"""
128
URL-decode strings.
129
130
Parameters:
131
- string (str): URL-encoded string
132
- encoding (str): Character encoding
133
- errors (str): Error handling strategy
134
135
Returns:
136
str: Decoded string
137
"""
138
139
def unquote_to_bytes(string):
140
"""
141
URL-decode to bytes.
142
143
Parameters:
144
- string (str): URL-encoded string
145
146
Returns:
147
bytes: Decoded bytes
148
"""
149
```
150
151
### URL Discovery and Analysis
152
153
Extract and analyze URLs from text.
154
155
```python { .api }
156
def find_all_links(text, with_text=False, **kwargs):
157
"""
158
Extract URLs from text.
159
160
Parameters:
161
- text (str): Text containing URLs
162
- with_text (bool): Include surrounding text
163
164
Returns:
165
list: List of found URLs or (url, text) tuples
166
"""
167
168
def parse_host(host):
169
"""
170
Parse hostname/IP address with port.
171
172
Parameters:
173
- host (str): Host string (hostname:port or IP:port)
174
175
Returns:
176
tuple: (hostname, port) tuple
177
"""
178
```
179
180
### Query String Processing
181
182
Parse and manipulate URL query strings.
183
184
```python { .api }
185
def parse_qsl(qs, keep_blank_values=True, **kwargs):
186
"""
187
Parse query string to list.
188
189
Parameters:
190
- qs (str): Query string to parse
191
- keep_blank_values (bool): Keep parameters with empty values
192
193
Returns:
194
list: List of (key, value) tuples
195
"""
196
197
class QueryParamDict(OrderedMultiDict):
198
"""Specialized dict for query parameters."""
199
def add(self, key, value): ...
200
def get_list(self, key): ...
201
def to_url_query(self): ...
202
```
203
204
### URL Scheme Management
205
206
Register and manage custom URL schemes.
207
208
```python { .api }
209
def register_scheme(text, uses_netloc=None, **kwargs):
210
"""
211
Register custom URL schemes.
212
213
Parameters:
214
- text (str): Scheme name
215
- uses_netloc (bool): Whether scheme uses network location
216
217
Returns:
218
None
219
"""
220
221
def resolve_path_parts(path_parts):
222
"""
223
Resolve relative path components.
224
225
Parameters:
226
- path_parts (list): List of path components
227
228
Returns:
229
list: Resolved path components
230
"""
231
```
232
233
## Usage Examples
234
235
```python
236
from boltons.socketutils import BufferedSocket, NetstringSocket\nfrom boltons.urlutils import URL, find_all_links, parse_qsl\nimport socket\n\n# Enhanced socket operations\nsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\nbuf_sock = BufferedSocket(sock)\nbuf_sock.sendall(b'HTTP/1.1 request data')\nresponse = buf_sock.recv_until(b'\\r\\n\\r\\n') # Read until headers end\n\n# Netstring protocol\nns_sock = NetstringSocket(sock)\nns_sock.write_ns(b'structured message')\nmessage = ns_sock.read_ns()\n\n# URL manipulation\nurl = URL('https://example.com/path?param=value#section')\nprint(url.host) # 'example.com'\nprint(url.path) # '/path'\nprint(url.query_params) # QueryParamDict([('param', 'value')])\n\n# Immutable URL modifications\nnew_url = url.replace(path='/new-path', query_params={'new': 'param'})\nprint(new_url) # 'https://example.com/new-path?new=param#section'\n\n# Extract URLs from text\ntext = \"Visit https://example.com and http://test.org for more info\"\nurls = find_all_links(text)\nprint(urls) # ['https://example.com', 'http://test.org']\n\n# Query string processing\nquery = \"param1=value1¶m2=value2¶m1=another_value\"\nparams = parse_qsl(query)\nprint(params) # [('param1', 'value1'), ('param2', 'value2'), ('param1', 'another_value')]\n```
237
238
### Advanced URL Operations
239
240
```python
241
from boltons.urlutils import (\n URL, quote_path_part, unquote, register_scheme\n)\n\n# URL component encoding\npath = \"path with spaces/special chars!\"\nencoded = quote_path_part(path)\nprint(encoded) # \"path%20with%20spaces/special%20chars%21\"\n\n# URL decoding\nencoded_url = \"https%3A//example.com/path%20with%20spaces\"\ndecoded = unquote(encoded_url)\nprint(decoded) # \"https://example.com/path with spaces\"\n\n# Complex URL building\nbase_url = URL('https://api.example.com')\napi_url = base_url.replace(\n path='/v1/users',\n query_params={\n 'limit': 10,\n 'offset': 20,\n 'filter': ['active', 'verified']\n }\n)\nprint(api_url)\n# https://api.example.com/v1/users?limit=10&offset=20&filter=active&filter=verified\n\n# Custom scheme registration\nregister_scheme('myprotocol', uses_netloc=True)\ncustom_url = URL('myprotocol://server:port/path')\nprint(custom_url.scheme) # 'myprotocol'\nprint(custom_url.host) # 'server'\nprint(custom_url.port) # port\n```
242
243
## Types
244
245
```python { .api }
246
# Socket Exceptions\nclass Error(socket.error):\n \"\"\"Base exception for socket utilities.\"\"\"\n pass\n\nclass ConnectionClosed(Error):\n \"\"\"Exception for closed connections.\"\"\"\n pass\n\nclass MessageTooLong(Error):\n \"\"\"Exception for oversized messages.\"\"\"\n pass\n\nclass Timeout(socket.timeout, Error):\n \"\"\"Timeout exception.\"\"\"\n pass\n\nclass NetstringProtocolError(Error):\n \"\"\"Base netstring protocol error.\"\"\"\n pass\n\nclass NetstringInvalidSize(NetstringProtocolError):\n \"\"\"Invalid netstring size error.\"\"\"\n pass\n\nclass NetstringMessageTooLong(NetstringProtocolError):\n \"\"\"Netstring message too long error.\"\"\"\n pass\n\n# URL Exceptions\nclass URLParseError(ValueError):\n \"\"\"Exception for URL parsing errors.\"\"\"\n pass\n\n# Constants\nDEFAULT_TIMEOUT = 10 # Default timeout in seconds\nDEFAULT_MAXSIZE = 32 * 1024 # Default maximum message size (32KB)\nDEFAULT_ENCODING = 'utf8' # Default character encoding\n\n# URL scheme mappings\nSCHEME_PORT_MAP: dict # Mapping of URL schemes to default ports\nNO_NETLOC_SCHEMES: set # Set of schemes that don't use netloc\nDEFAULT_PARSED_URL: dict # Default empty parsed URL structure\n```