Tessl Tile for pypi/cloudpathlib@0.22.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

anypath.md azure-integration.md client-management.md cloud-operations.md configuration.md core-operations.md directory-operations.md exceptions.md file-io.md gcs-integration.md http-support.md index.md patching.md s3-integration.md

http-support.mddocs/

0
# HTTP/HTTPS Support
1

2
HTTP and HTTPS resource access with custom authentication, directory listing parsers, and RESTful operations for web-based storage systems. This implementation provides pathlib-compatible access to HTTP/HTTPS resources with full control over HTTP operations.
3

4
## Capabilities
5

6
### HttpPath and HttpsPath Classes
7

8
HTTP-specific path implementations with RESTful operation support.
9

10
```python { .api }
11
class HttpPath(CloudPath):
12
    """HTTP resource path implementation."""
13
    
14
    @property
15
    def parsed_url(self) -> "urllib.parse.ParseResult":
16
        """
17
        Parsed URL components.
18
        
19
        Returns:
20
            ParseResult object with URL components
21
        """
22
    
23
    @property
24
    def drive(self) -> str:
25
        """
26
        Network location (netloc).
27
        
28
        Returns:
29
            Network location from URL
30
        """
31
    
32
    @property
33
    def anchor(self) -> str:
34
        """
35
        Full scheme + netloc.
36
        
37
        Returns:
38
            Scheme and network location
39
        """
40
    
41
    def get(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:
42
        """
43
        Issue GET request to the URL.
44
        
45
        Args:
46
            **kwargs: Arguments passed to urllib request
47
            
48
        Returns:
49
            Tuple of (HTTPResponse, response body)
50
        """
51
    
52
    def put(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:
53
        """
54
        Issue PUT request to the URL.
55
        
56
        Args:
57
            **kwargs: Arguments passed to urllib request
58
            
59
        Returns:
60
            Tuple of (HTTPResponse, response body)
61
        """
62
    
63
    def post(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:
64
        """
65
        Issue POST request to the URL.
66
        
67
        Args:
68
            **kwargs: Arguments passed to urllib request
69
            
70
        Returns:
71
            Tuple of (HTTPResponse, response body)
72
        """
73
    
74
    def delete(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:
75
        """
76
        Issue DELETE request to the URL.
77
        
78
        Args:
79
            **kwargs: Arguments passed to urllib request
80
            
81
        Returns:
82
            Tuple of (HTTPResponse, response body)
83
        """
84
    
85
    def head(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:
86
        """
87
        Issue HEAD request to the URL.
88
        
89
        Args:
90
            **kwargs: Arguments passed to urllib request
91
            
92
        Returns:
93
            Tuple of (HTTPResponse, response body)
94
        """
95

96
class HttpsPath(HttpPath):
97
    """HTTPS resource path implementation (same API as HttpPath)."""
98
```
99

100
### HttpClient and HttpsClient Classes
101

102
HTTP client with comprehensive authentication and configuration options.
103

104
```python { .api }
105
class HttpClient:
106
    """HTTP client for web resource access."""
107
    
108
    def __init__(
109
        self,
110
        file_cache_mode: FileCacheMode = None,
111
        local_cache_dir: str = None,
112
        content_type_method = None,
113
        auth = None,
114
        custom_list_page_parser = None,
115
        custom_dir_matcher = None,
116
        write_file_http_method: str = 'PUT'
117
    ):
118
        """
119
        Initialize HTTP client.
120
        
121
        Args:
122
            file_cache_mode: Cache management strategy
123
            local_cache_dir: Local directory for file cache
124
            content_type_method: Function to determine MIME types
125
            auth: Authentication handler (requests auth object)
126
            custom_list_page_parser: Function to parse directory listings
127
            custom_dir_matcher: Function to identify directories
128
            write_file_http_method: HTTP method for file uploads
129
        """
130
    
131
    def request(
132
        self,
133
        url: str,
134
        method: str,
135
        **kwargs
136
    ) -> typing.Tuple["http.client.HTTPResponse", bytes]:
137
        """
138
        Make HTTP request.
139
        
140
        Args:
141
            url: Target URL
142
            method: HTTP method
143
            **kwargs: Additional request arguments
144
            
145
        Returns:
146
            Tuple of (HTTPResponse, response body)
147
        """
148
    
149
    @property
150
    def dir_matcher(self):
151
        """Function to identify directories from HTTP responses."""
152
    
153
    @property
154
    def write_file_http_method(self) -> str:
155
        """HTTP method used for file uploads."""
156

157
class HttpsClient(HttpClient):
158
    """HTTPS client (same API as HttpClient)."""
159
```
160

161
## Usage Examples
162

163
### Basic HTTP Operations
164

165
```python
166
from cloudpathlib import HttpPath, HttpsPath, HttpClient
167

168
# Create HTTP paths
169
http_path = HttpPath("http://example.com/api/data.json")
170
https_path = HttpsPath("https://api.example.com/data.json")
171

172
# Access URL properties
173
print(f"Netloc: {https_path.drive}")        # "api.example.com"
174
print(f"Anchor: {https_path.anchor}")       # "https://api.example.com"
175
print(f"Parsed: {https_path.parsed_url}")   # ParseResult object
176
```
177

178
### RESTful HTTP Operations
179

180
```python
181
# Create HTTPS path for API endpoint
182
api_path = HttpsPath("https://api.example.com/users/123")
183

184
# GET request
185
response = api_path.get()
186
if response.status_code == 200:
187
    user_data = response.json()
188
    print(f"User: {user_data}")
189

190
# POST request with data
191
create_path = HttpsPath("https://api.example.com/users")
192
response = create_path.post(json={
193
    "name": "John Doe",
194
    "email": "john@example.com"
195
})
196

197
# PUT request to update
198
update_data = {"name": "Jane Doe"}
199
response = api_path.put(json=update_data)
200

201
# DELETE request
202
response = api_path.delete()
203
print(f"Delete status: {response.status_code}")
204

205
# HEAD request for metadata
206
response = api_path.head()
207
print(f"Content-Length: {response.headers.get('Content-Length')}")
208
```
209

210
### Authentication
211

212
```python
213
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
214

215
# Basic authentication
216
auth = HTTPBasicAuth('username', 'password')
217
client = HttpClient(auth=auth)
218

219
authenticated_path = HttpsPath(
220
    "https://protected.example.com/data.json",
221
    client=client
222
)
223

224
# API key authentication
225
class APIKeyAuth:
226
    def __init__(self, api_key):
227
        self.api_key = api_key
228
    
229
    def __call__(self, request):
230
        request.headers['Authorization'] = f'Bearer {self.api_key}'
231
        return request
232

233
api_auth = APIKeyAuth('your-api-key')
234
client = HttpClient(auth=api_auth)
235

236
# OAuth token authentication
237
def oauth_auth(request):
238
    request.headers['Authorization'] = f'Bearer {oauth_token}'
239
    return request
240

241
client = HttpClient(auth=oauth_auth)
242
```
243

244
### File Upload and Download
245

246
```python
247
# Download file from HTTP
248
file_url = HttpsPath("https://example.com/files/document.pdf")
249

250
# Download to local file
251
local_path = file_url.download_to("downloaded_document.pdf")
252
print(f"Downloaded to: {local_path}")
253

254
# Read content directly
255
content = file_url.read_bytes()
256

257
# Upload file via PUT (default)
258
upload_url = HttpsPath("https://upload.example.com/files/new_document.pdf")
259
upload_url.upload_from("local_document.pdf")
260

261
# Upload via POST
262
client = HttpClient(write_file_http_method='POST')
263
upload_url = HttpsPath("https://upload.example.com/files/", client=client)
264
upload_url.upload_from("local_document.pdf")
265
```
266

267
### Custom Directory Listing
268

269
```python
270
import re
271
from bs4 import BeautifulSoup
272

273
def parse_apache_directory_listing(response_text):
274
    """Parse Apache-style directory listing."""
275
    soup = BeautifulSoup(response_text, 'html.parser')
276
    entries = []
277
    
278
    for link in soup.find_all('a'):
279
        href = link.get('href')
280
        if href and href not in ['../', '../']:
281
            entries.append(href.rstrip('/'))
282
    
283
    return entries
284

285
def is_directory(name):
286
    """Identify directories by trailing slash or no extension."""
287
    return name.endswith('/') or '.' not in name.split('/')[-1]
288

289
# Configure client with custom parsers
290
client = HttpClient(
291
    custom_list_page_parser=parse_apache_directory_listing,
292
    custom_dir_matcher=is_directory
293
)
294

295
# List directory contents
296
dir_path = HttpPath("http://files.example.com/data/", client=client)
297
for item in dir_path.iterdir():
298
    print(f"{'Dir' if item.is_dir() else 'File'}: {item.name}")
299
```
300

301
### Working with APIs
302

303
```python
304
# REST API interaction
305
api_base = HttpsPath("https://jsonplaceholder.typicode.com")
306

307
# Get all posts
308
posts_path = api_base / "posts"
309
response = posts_path.get()
310
posts = response.json()
311
print(f"Found {len(posts)} posts")
312

313
# Get specific post
314
post_path = api_base / "posts" / "1"
315
response = post_path.get()
316
post = response.json()
317
print(f"Post title: {post['title']}")
318

319
# Create new post
320
new_post = {
321
    "title": "New Post",
322
    "body": "This is a new post",
323
    "userId": 1
324
}
325
response = posts_path.post(json=new_post)
326
created_post = response.json()
327
print(f"Created post ID: {created_post['id']}")
328

329
# Update post
330
updated_data = {"title": "Updated Title"}
331
response = post_path.put(json=updated_data)
332

333
# Delete post
334
response = post_path.delete()
335
print(f"Delete status: {response.status_code}")
336
```
337

338
### File Server Operations
339

340
```python
341
# Work with file servers
342
file_server = HttpsPath("https://files.example.com")
343

344
# List files in directory
345
data_dir = file_server / "data"
346
for file_path in data_dir.glob("*.csv"):
347
    print(f"CSV file: {file_path}")
348
    
349
    # Download and process
350
    local_file = file_path.download_to(f"local_{file_path.name}")
351
    process_csv_file(local_file)
352

353
# Upload files to server
354
local_files = Path("uploads/").glob("*.txt")
355
upload_dir = file_server / "uploads"
356

357
for local_file in local_files:
358
    remote_path = upload_dir / local_file.name
359
    remote_path.upload_from(local_file)
360
    print(f"Uploaded: {remote_path}")
361
```
362

363
### WebDAV Support
364

365
```python
366
from requests_toolbelt.auth.http_proxy_digest import HTTPProxyDigestAuth
367

368
# WebDAV server access
369
webdav_auth = HTTPDigestAuth('username', 'password')
370
client = HttpClient(
371
    auth=webdav_auth,
372
    write_file_http_method='PUT'
373
)
374

375
webdav_path = HttpsPath("https://webdav.example.com/files/", client=client)
376

377
# WebDAV operations
378
document = webdav_path / "document.txt"
379
document.write_text("WebDAV content")
380

381
# Create directory (MKCOL method via custom request)
382
new_dir = webdav_path / "new_folder"
383
response = client.request(str(new_dir), 'MKCOL')
384

385
# List directory contents
386
for item in webdav_path.iterdir():
387
    print(f"WebDAV item: {item}")
388
```
389

390
### Streaming Operations
391

392
```python
393
# Stream large files
394
large_file_url = HttpsPath("https://download.example.com/large-dataset.zip")
395

396
# Stream download
397
with large_file_url.open('rb') as remote_file:
398
    with open('local-dataset.zip', 'wb') as local_file:
399
        for chunk in remote_file:
400
            local_file.write(chunk)
401
            print(f"Downloaded chunk: {len(chunk)} bytes")
402

403
# Stream processing
404
csv_url = HttpsPath("https://data.example.com/big-data.csv")
405
with csv_url.open('r') as f:
406
    import csv
407
    reader = csv.DictReader(f)
408
    for row_num, row in enumerate(reader):
409
        process_row(row)
410
        if row_num % 1000 == 0:
411
            print(f"Processed {row_num} rows")
412
```
413

414
### Custom Headers and Parameters
415

416
```python
417
# Configure client with custom headers
418
class CustomHeadersAuth:
419
    def __init__(self, api_key, user_agent):
420
        self.api_key = api_key
421
        self.user_agent = user_agent
422
    
423
    def __call__(self, request):
424
        request.headers.update({
425
            'X-API-Key': self.api_key,
426
            'User-Agent': self.user_agent,
427
            'Accept': 'application/json'
428
        })
429
        return request
430

431
client = HttpClient(auth=CustomHeadersAuth('key123', 'MyApp/1.0'))
432

433
# Make requests with custom headers
434
api_path = HttpsPath("https://api.example.com/data", client=client)
435
response = api_path.get(params={'format': 'json', 'limit': 100})
436
```
437

438
### Session Management
439

440
```python
441
import requests
442

443
# Use persistent session
444
session = requests.Session()
445
session.headers.update({'User-Agent': 'CloudPathLib/1.0'})
446
session.auth = HTTPBasicAuth('user', 'pass')
447

448
# Configure client to use session
449
class SessionClient(HttpClient):
450
    def __init__(self, session, **kwargs):
451
        super().__init__(**kwargs)
452
        self.session = session
453
    
454
    def request(self, url, method, **kwargs):
455
        return self.session.request(method, url, **kwargs)
456

457
client = SessionClient(session)
458

459
# All requests use the same session
460
path1 = HttpsPath("https://api.example.com/resource1", client=client)
461
path2 = HttpsPath("https://api.example.com/resource2", client=client)
462

463
response1 = path1.get()  # Uses session
464
response2 = path2.get()  # Reuses session connection
465
```
466

467
### Error Handling
468

469
```python
470
from cloudpathlib import CloudPathFileNotFoundError
471
import requests
472

473
try:
474
    http_path = HttpsPath("https://api.example.com/nonexistent")
475
    content = http_path.read_text()
476
except CloudPathFileNotFoundError:
477
    print("HTTP resource not found")
478
except requests.exceptions.ConnectionError:
479
    print("Connection failed")
480
except requests.exceptions.Timeout:
481
    print("Request timed out")
482
except requests.exceptions.HTTPError as e:
483
    print(f"HTTP error: {e}")
484
except requests.exceptions.RequestException as e:
485
    print(f"Request error: {e}")
486

487
# Check response status
488
http_path = HttpsPath("https://api.example.com/data")
489
response = http_path.get()
490

491
if response.status_code == 200:
492
    data = response.json()
493
elif response.status_code == 404:
494
    print("Resource not found")
495
elif response.status_code == 401:
496
    print("Authentication required")
497
else:
498
    print(f"HTTP {response.status_code}: {response.reason}")
499
```
500

501
### Performance Optimization
502

503
```python
504
# Configure timeouts and retries
505
from requests.adapters import HTTPAdapter
506
from urllib3.util.retry import Retry
507

508
session = requests.Session()
509

510
# Configure retry strategy
511
retry_strategy = Retry(
512
    total=3,
513
    backoff_factor=1,
514
    status_forcelist=[429, 500, 502, 503, 504]
515
)
516

517
adapter = HTTPAdapter(max_retries=retry_strategy)
518
session.mount("http://", adapter)
519
session.mount("https://", adapter)
520

521
# Set timeouts
522
session.timeout = (10, 30)  # (connect, read) timeout
523

524
client = SessionClient(session)
525

526
# Concurrent downloads
527
import concurrent.futures
528

529
def download_file(url_str):
530
    url = HttpsPath(url_str, client=client)
531
    return url.download_to(f"downloads/{url.name}")
532

533
urls = [
534
    "https://example.com/file1.txt",
535
    "https://example.com/file2.txt",
536
    "https://example.com/file3.txt"
537
]
538

539
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
540
    futures = [executor.submit(download_file, url) for url in urls]
541
    
542
    for future in concurrent.futures.as_completed(futures):
543
        try:
544
            result = future.result()
545
            print(f"Downloaded: {result}")
546
        except Exception as e:
547
            print(f"Download failed: {e}")
548
```

Version

Tile

Files

http-support.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

http-support.mddocs/