0
# HTTP/HTTPS Support
1
2
HTTP and HTTPS resource access with custom authentication, directory listing parsers, and RESTful operations for web-based storage systems. This implementation provides pathlib-compatible access to HTTP/HTTPS resources with full control over HTTP operations.
3
4
## Capabilities
5
6
### HttpPath and HttpsPath Classes
7
8
HTTP-specific path implementations with RESTful operation support.
9
10
```python { .api }
11
class HttpPath(CloudPath):
12
"""HTTP resource path implementation."""
13
14
@property
15
def parsed_url(self) -> "urllib.parse.ParseResult":
16
"""
17
Parsed URL components.
18
19
Returns:
20
ParseResult object with URL components
21
"""
22
23
@property
24
def drive(self) -> str:
25
"""
26
Network location (netloc).
27
28
Returns:
29
Network location from URL
30
"""
31
32
@property
33
def anchor(self) -> str:
34
"""
35
Full scheme + netloc.
36
37
Returns:
38
Scheme and network location
39
"""
40
41
def get(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:
42
"""
43
Issue GET request to the URL.
44
45
Args:
46
**kwargs: Arguments passed to urllib request
47
48
Returns:
49
Tuple of (HTTPResponse, response body)
50
"""
51
52
def put(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:
53
"""
54
Issue PUT request to the URL.
55
56
Args:
57
**kwargs: Arguments passed to urllib request
58
59
Returns:
60
Tuple of (HTTPResponse, response body)
61
"""
62
63
def post(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:
64
"""
65
Issue POST request to the URL.
66
67
Args:
68
**kwargs: Arguments passed to urllib request
69
70
Returns:
71
Tuple of (HTTPResponse, response body)
72
"""
73
74
def delete(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:
75
"""
76
Issue DELETE request to the URL.
77
78
Args:
79
**kwargs: Arguments passed to urllib request
80
81
Returns:
82
Tuple of (HTTPResponse, response body)
83
"""
84
85
def head(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:
86
"""
87
Issue HEAD request to the URL.
88
89
Args:
90
**kwargs: Arguments passed to urllib request
91
92
Returns:
93
Tuple of (HTTPResponse, response body)
94
"""
95
96
class HttpsPath(HttpPath):
97
"""HTTPS resource path implementation (same API as HttpPath)."""
98
```
99
100
### HttpClient and HttpsClient Classes
101
102
HTTP client with comprehensive authentication and configuration options.
103
104
```python { .api }
105
class HttpClient:
106
"""HTTP client for web resource access."""
107
108
def __init__(
109
self,
110
file_cache_mode: FileCacheMode = None,
111
local_cache_dir: str = None,
112
content_type_method = None,
113
auth = None,
114
custom_list_page_parser = None,
115
custom_dir_matcher = None,
116
write_file_http_method: str = 'PUT'
117
):
118
"""
119
Initialize HTTP client.
120
121
Args:
122
file_cache_mode: Cache management strategy
123
local_cache_dir: Local directory for file cache
124
content_type_method: Function to determine MIME types
125
auth: Authentication handler (requests auth object)
126
custom_list_page_parser: Function to parse directory listings
127
custom_dir_matcher: Function to identify directories
128
write_file_http_method: HTTP method for file uploads
129
"""
130
131
def request(
132
self,
133
url: str,
134
method: str,
135
**kwargs
136
) -> typing.Tuple["http.client.HTTPResponse", bytes]:
137
"""
138
Make HTTP request.
139
140
Args:
141
url: Target URL
142
method: HTTP method
143
**kwargs: Additional request arguments
144
145
Returns:
146
Tuple of (HTTPResponse, response body)
147
"""
148
149
@property
150
def dir_matcher(self):
151
"""Function to identify directories from HTTP responses."""
152
153
@property
154
def write_file_http_method(self) -> str:
155
"""HTTP method used for file uploads."""
156
157
class HttpsClient(HttpClient):
158
"""HTTPS client (same API as HttpClient)."""
159
```
160
161
## Usage Examples
162
163
### Basic HTTP Operations
164
165
```python
166
from cloudpathlib import HttpPath, HttpsPath, HttpClient
167
168
# Create HTTP paths
169
http_path = HttpPath("http://example.com/api/data.json")
170
https_path = HttpsPath("https://api.example.com/data.json")
171
172
# Access URL properties
173
print(f"Netloc: {https_path.drive}") # "api.example.com"
174
print(f"Anchor: {https_path.anchor}") # "https://api.example.com"
175
print(f"Parsed: {https_path.parsed_url}") # ParseResult object
176
```
177
178
### RESTful HTTP Operations
179
180
```python
181
# Create HTTPS path for API endpoint
182
api_path = HttpsPath("https://api.example.com/users/123")
183
184
# GET request
185
response = api_path.get()
186
if response.status_code == 200:
187
user_data = response.json()
188
print(f"User: {user_data}")
189
190
# POST request with data
191
create_path = HttpsPath("https://api.example.com/users")
192
response = create_path.post(json={
193
"name": "John Doe",
194
"email": "john@example.com"
195
})
196
197
# PUT request to update
198
update_data = {"name": "Jane Doe"}
199
response = api_path.put(json=update_data)
200
201
# DELETE request
202
response = api_path.delete()
203
print(f"Delete status: {response.status_code}")
204
205
# HEAD request for metadata
206
response = api_path.head()
207
print(f"Content-Length: {response.headers.get('Content-Length')}")
208
```
209
210
### Authentication
211
212
```python
213
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
214
215
# Basic authentication
216
auth = HTTPBasicAuth('username', 'password')
217
client = HttpClient(auth=auth)
218
219
authenticated_path = HttpsPath(
220
"https://protected.example.com/data.json",
221
client=client
222
)
223
224
# API key authentication
225
class APIKeyAuth:
226
def __init__(self, api_key):
227
self.api_key = api_key
228
229
def __call__(self, request):
230
request.headers['Authorization'] = f'Bearer {self.api_key}'
231
return request
232
233
api_auth = APIKeyAuth('your-api-key')
234
client = HttpClient(auth=api_auth)
235
236
# OAuth token authentication
237
def oauth_auth(request):
238
request.headers['Authorization'] = f'Bearer {oauth_token}'
239
return request
240
241
client = HttpClient(auth=oauth_auth)
242
```
243
244
### File Upload and Download
245
246
```python
247
# Download file from HTTP
248
file_url = HttpsPath("https://example.com/files/document.pdf")
249
250
# Download to local file
251
local_path = file_url.download_to("downloaded_document.pdf")
252
print(f"Downloaded to: {local_path}")
253
254
# Read content directly
255
content = file_url.read_bytes()
256
257
# Upload file via PUT (default)
258
upload_url = HttpsPath("https://upload.example.com/files/new_document.pdf")
259
upload_url.upload_from("local_document.pdf")
260
261
# Upload via POST
262
client = HttpClient(write_file_http_method='POST')
263
upload_url = HttpsPath("https://upload.example.com/files/", client=client)
264
upload_url.upload_from("local_document.pdf")
265
```
266
267
### Custom Directory Listing
268
269
```python
270
import re
271
from bs4 import BeautifulSoup
272
273
def parse_apache_directory_listing(response_text):
274
"""Parse Apache-style directory listing."""
275
soup = BeautifulSoup(response_text, 'html.parser')
276
entries = []
277
278
for link in soup.find_all('a'):
279
href = link.get('href')
280
if href and href not in ['../', '../']:
281
entries.append(href.rstrip('/'))
282
283
return entries
284
285
def is_directory(name):
286
"""Identify directories by trailing slash or no extension."""
287
return name.endswith('/') or '.' not in name.split('/')[-1]
288
289
# Configure client with custom parsers
290
client = HttpClient(
291
custom_list_page_parser=parse_apache_directory_listing,
292
custom_dir_matcher=is_directory
293
)
294
295
# List directory contents
296
dir_path = HttpPath("http://files.example.com/data/", client=client)
297
for item in dir_path.iterdir():
298
print(f"{'Dir' if item.is_dir() else 'File'}: {item.name}")
299
```
300
301
### Working with APIs
302
303
```python
304
# REST API interaction
305
api_base = HttpsPath("https://jsonplaceholder.typicode.com")
306
307
# Get all posts
308
posts_path = api_base / "posts"
309
response = posts_path.get()
310
posts = response.json()
311
print(f"Found {len(posts)} posts")
312
313
# Get specific post
314
post_path = api_base / "posts" / "1"
315
response = post_path.get()
316
post = response.json()
317
print(f"Post title: {post['title']}")
318
319
# Create new post
320
new_post = {
321
"title": "New Post",
322
"body": "This is a new post",
323
"userId": 1
324
}
325
response = posts_path.post(json=new_post)
326
created_post = response.json()
327
print(f"Created post ID: {created_post['id']}")
328
329
# Update post
330
updated_data = {"title": "Updated Title"}
331
response = post_path.put(json=updated_data)
332
333
# Delete post
334
response = post_path.delete()
335
print(f"Delete status: {response.status_code}")
336
```
337
338
### File Server Operations
339
340
```python
341
# Work with file servers
342
file_server = HttpsPath("https://files.example.com")
343
344
# List files in directory
345
data_dir = file_server / "data"
346
for file_path in data_dir.glob("*.csv"):
347
print(f"CSV file: {file_path}")
348
349
# Download and process
350
local_file = file_path.download_to(f"local_{file_path.name}")
351
process_csv_file(local_file)
352
353
# Upload files to server
354
local_files = Path("uploads/").glob("*.txt")
355
upload_dir = file_server / "uploads"
356
357
for local_file in local_files:
358
remote_path = upload_dir / local_file.name
359
remote_path.upload_from(local_file)
360
print(f"Uploaded: {remote_path}")
361
```
362
363
### WebDAV Support
364
365
```python
366
from requests_toolbelt.auth.http_proxy_digest import HTTPProxyDigestAuth
367
368
# WebDAV server access
369
webdav_auth = HTTPDigestAuth('username', 'password')
370
client = HttpClient(
371
auth=webdav_auth,
372
write_file_http_method='PUT'
373
)
374
375
webdav_path = HttpsPath("https://webdav.example.com/files/", client=client)
376
377
# WebDAV operations
378
document = webdav_path / "document.txt"
379
document.write_text("WebDAV content")
380
381
# Create directory (MKCOL method via custom request)
382
new_dir = webdav_path / "new_folder"
383
response = client.request(str(new_dir), 'MKCOL')
384
385
# List directory contents
386
for item in webdav_path.iterdir():
387
print(f"WebDAV item: {item}")
388
```
389
390
### Streaming Operations
391
392
```python
393
# Stream large files
394
large_file_url = HttpsPath("https://download.example.com/large-dataset.zip")
395
396
# Stream download
397
with large_file_url.open('rb') as remote_file:
398
with open('local-dataset.zip', 'wb') as local_file:
399
for chunk in remote_file:
400
local_file.write(chunk)
401
print(f"Downloaded chunk: {len(chunk)} bytes")
402
403
# Stream processing
404
csv_url = HttpsPath("https://data.example.com/big-data.csv")
405
with csv_url.open('r') as f:
406
import csv
407
reader = csv.DictReader(f)
408
for row_num, row in enumerate(reader):
409
process_row(row)
410
if row_num % 1000 == 0:
411
print(f"Processed {row_num} rows")
412
```
413
414
### Custom Headers and Parameters
415
416
```python
417
# Configure client with custom headers
418
class CustomHeadersAuth:
419
def __init__(self, api_key, user_agent):
420
self.api_key = api_key
421
self.user_agent = user_agent
422
423
def __call__(self, request):
424
request.headers.update({
425
'X-API-Key': self.api_key,
426
'User-Agent': self.user_agent,
427
'Accept': 'application/json'
428
})
429
return request
430
431
client = HttpClient(auth=CustomHeadersAuth('key123', 'MyApp/1.0'))
432
433
# Make requests with custom headers
434
api_path = HttpsPath("https://api.example.com/data", client=client)
435
response = api_path.get(params={'format': 'json', 'limit': 100})
436
```
437
438
### Session Management
439
440
```python
441
import requests
442
443
# Use persistent session
444
session = requests.Session()
445
session.headers.update({'User-Agent': 'CloudPathLib/1.0'})
446
session.auth = HTTPBasicAuth('user', 'pass')
447
448
# Configure client to use session
449
class SessionClient(HttpClient):
450
def __init__(self, session, **kwargs):
451
super().__init__(**kwargs)
452
self.session = session
453
454
def request(self, url, method, **kwargs):
455
return self.session.request(method, url, **kwargs)
456
457
client = SessionClient(session)
458
459
# All requests use the same session
460
path1 = HttpsPath("https://api.example.com/resource1", client=client)
461
path2 = HttpsPath("https://api.example.com/resource2", client=client)
462
463
response1 = path1.get() # Uses session
464
response2 = path2.get() # Reuses session connection
465
```
466
467
### Error Handling
468
469
```python
470
from cloudpathlib import CloudPathFileNotFoundError
471
import requests
472
473
try:
474
http_path = HttpsPath("https://api.example.com/nonexistent")
475
content = http_path.read_text()
476
except CloudPathFileNotFoundError:
477
print("HTTP resource not found")
478
except requests.exceptions.ConnectionError:
479
print("Connection failed")
480
except requests.exceptions.Timeout:
481
print("Request timed out")
482
except requests.exceptions.HTTPError as e:
483
print(f"HTTP error: {e}")
484
except requests.exceptions.RequestException as e:
485
print(f"Request error: {e}")
486
487
# Check response status
488
http_path = HttpsPath("https://api.example.com/data")
489
response = http_path.get()
490
491
if response.status_code == 200:
492
data = response.json()
493
elif response.status_code == 404:
494
print("Resource not found")
495
elif response.status_code == 401:
496
print("Authentication required")
497
else:
498
print(f"HTTP {response.status_code}: {response.reason}")
499
```
500
501
### Performance Optimization
502
503
```python
504
# Configure timeouts and retries
505
from requests.adapters import HTTPAdapter
506
from urllib3.util.retry import Retry
507
508
session = requests.Session()
509
510
# Configure retry strategy
511
retry_strategy = Retry(
512
total=3,
513
backoff_factor=1,
514
status_forcelist=[429, 500, 502, 503, 504]
515
)
516
517
adapter = HTTPAdapter(max_retries=retry_strategy)
518
session.mount("http://", adapter)
519
session.mount("https://", adapter)
520
521
# Set timeouts
522
session.timeout = (10, 30) # (connect, read) timeout
523
524
client = SessionClient(session)
525
526
# Concurrent downloads
527
import concurrent.futures
528
529
def download_file(url_str):
530
url = HttpsPath(url_str, client=client)
531
return url.download_to(f"downloads/{url.name}")
532
533
urls = [
534
"https://example.com/file1.txt",
535
"https://example.com/file2.txt",
536
"https://example.com/file3.txt"
537
]
538
539
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
540
futures = [executor.submit(download_file, url) for url in urls]
541
542
for future in concurrent.futures.as_completed(futures):
543
try:
544
result = future.result()
545
print(f"Downloaded: {result}")
546
except Exception as e:
547
print(f"Download failed: {e}")
548
```