or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

anypath.mdazure-integration.mdclient-management.mdcloud-operations.mdconfiguration.mdcore-operations.mddirectory-operations.mdexceptions.mdfile-io.mdgcs-integration.mdhttp-support.mdindex.mdpatching.mds3-integration.md

http-support.mddocs/

0

# HTTP/HTTPS Support

1

2

HTTP and HTTPS resource access with custom authentication, directory listing parsers, and RESTful operations for web-based storage systems. This implementation provides pathlib-compatible access to HTTP/HTTPS resources with full control over HTTP operations.

3

4

## Capabilities

5

6

### HttpPath and HttpsPath Classes

7

8

HTTP-specific path implementations with RESTful operation support.

9

10

```python { .api }

11

class HttpPath(CloudPath):

12

"""HTTP resource path implementation."""

13

14

@property

15

def parsed_url(self) -> "urllib.parse.ParseResult":

16

"""

17

Parsed URL components.

18

19

Returns:

20

ParseResult object with URL components

21

"""

22

23

@property

24

def drive(self) -> str:

25

"""

26

Network location (netloc).

27

28

Returns:

29

Network location from URL

30

"""

31

32

@property

33

def anchor(self) -> str:

34

"""

35

Full scheme + netloc.

36

37

Returns:

38

Scheme and network location

39

"""

40

41

def get(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:

42

"""

43

Issue GET request to the URL.

44

45

Args:

46

**kwargs: Arguments passed to urllib request

47

48

Returns:

49

Tuple of (HTTPResponse, response body)

50

"""

51

52

def put(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:

53

"""

54

Issue PUT request to the URL.

55

56

Args:

57

**kwargs: Arguments passed to urllib request

58

59

Returns:

60

Tuple of (HTTPResponse, response body)

61

"""

62

63

def post(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:

64

"""

65

Issue POST request to the URL.

66

67

Args:

68

**kwargs: Arguments passed to urllib request

69

70

Returns:

71

Tuple of (HTTPResponse, response body)

72

"""

73

74

def delete(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:

75

"""

76

Issue DELETE request to the URL.

77

78

Args:

79

**kwargs: Arguments passed to urllib request

80

81

Returns:

82

Tuple of (HTTPResponse, response body)

83

"""

84

85

def head(self, **kwargs) -> typing.Tuple["http.client.HTTPResponse", bytes]:

86

"""

87

Issue HEAD request to the URL.

88

89

Args:

90

**kwargs: Arguments passed to urllib request

91

92

Returns:

93

Tuple of (HTTPResponse, response body)

94

"""

95

96

class HttpsPath(HttpPath):

97

"""HTTPS resource path implementation (same API as HttpPath)."""

98

```

99

100

### HttpClient and HttpsClient Classes

101

102

HTTP client with comprehensive authentication and configuration options.

103

104

```python { .api }

105

class HttpClient:

106

"""HTTP client for web resource access."""

107

108

def __init__(

109

self,

110

file_cache_mode: FileCacheMode = None,

111

local_cache_dir: str = None,

112

content_type_method = None,

113

auth = None,

114

custom_list_page_parser = None,

115

custom_dir_matcher = None,

116

write_file_http_method: str = 'PUT'

117

):

118

"""

119

Initialize HTTP client.

120

121

Args:

122

file_cache_mode: Cache management strategy

123

local_cache_dir: Local directory for file cache

124

content_type_method: Function to determine MIME types

125

auth: Authentication handler (requests auth object)

126

custom_list_page_parser: Function to parse directory listings

127

custom_dir_matcher: Function to identify directories

128

write_file_http_method: HTTP method for file uploads

129

"""

130

131

def request(

132

self,

133

url: str,

134

method: str,

135

**kwargs

136

) -> typing.Tuple["http.client.HTTPResponse", bytes]:

137

"""

138

Make HTTP request.

139

140

Args:

141

url: Target URL

142

method: HTTP method

143

**kwargs: Additional request arguments

144

145

Returns:

146

Tuple of (HTTPResponse, response body)

147

"""

148

149

@property

150

def dir_matcher(self):

151

"""Function to identify directories from HTTP responses."""

152

153

@property

154

def write_file_http_method(self) -> str:

155

"""HTTP method used for file uploads."""

156

157

class HttpsClient(HttpClient):

158

"""HTTPS client (same API as HttpClient)."""

159

```

160

161

## Usage Examples

162

163

### Basic HTTP Operations

164

165

```python

166

from cloudpathlib import HttpPath, HttpsPath, HttpClient

167

168

# Create HTTP paths

169

http_path = HttpPath("http://example.com/api/data.json")

170

https_path = HttpsPath("https://api.example.com/data.json")

171

172

# Access URL properties

173

print(f"Netloc: {https_path.drive}") # "api.example.com"

174

print(f"Anchor: {https_path.anchor}") # "https://api.example.com"

175

print(f"Parsed: {https_path.parsed_url}") # ParseResult object

176

```

177

178

### RESTful HTTP Operations

179

180

```python

181

# Create HTTPS path for API endpoint

182

api_path = HttpsPath("https://api.example.com/users/123")

183

184

# GET request

185

response = api_path.get()

186

if response.status_code == 200:

187

user_data = response.json()

188

print(f"User: {user_data}")

189

190

# POST request with data

191

create_path = HttpsPath("https://api.example.com/users")

192

response = create_path.post(json={

193

"name": "John Doe",

194

"email": "john@example.com"

195

})

196

197

# PUT request to update

198

update_data = {"name": "Jane Doe"}

199

response = api_path.put(json=update_data)

200

201

# DELETE request

202

response = api_path.delete()

203

print(f"Delete status: {response.status_code}")

204

205

# HEAD request for metadata

206

response = api_path.head()

207

print(f"Content-Length: {response.headers.get('Content-Length')}")

208

```

209

210

### Authentication

211

212

```python

213

from requests.auth import HTTPBasicAuth, HTTPDigestAuth

214

215

# Basic authentication

216

auth = HTTPBasicAuth('username', 'password')

217

client = HttpClient(auth=auth)

218

219

authenticated_path = HttpsPath(

220

"https://protected.example.com/data.json",

221

client=client

222

)

223

224

# API key authentication

225

class APIKeyAuth:

226

def __init__(self, api_key):

227

self.api_key = api_key

228

229

def __call__(self, request):

230

request.headers['Authorization'] = f'Bearer {self.api_key}'

231

return request

232

233

api_auth = APIKeyAuth('your-api-key')

234

client = HttpClient(auth=api_auth)

235

236

# OAuth token authentication

237

def oauth_auth(request):

238

request.headers['Authorization'] = f'Bearer {oauth_token}'

239

return request

240

241

client = HttpClient(auth=oauth_auth)

242

```

243

244

### File Upload and Download

245

246

```python

247

# Download file from HTTP

248

file_url = HttpsPath("https://example.com/files/document.pdf")

249

250

# Download to local file

251

local_path = file_url.download_to("downloaded_document.pdf")

252

print(f"Downloaded to: {local_path}")

253

254

# Read content directly

255

content = file_url.read_bytes()

256

257

# Upload file via PUT (default)

258

upload_url = HttpsPath("https://upload.example.com/files/new_document.pdf")

259

upload_url.upload_from("local_document.pdf")

260

261

# Upload via POST

262

client = HttpClient(write_file_http_method='POST')

263

upload_url = HttpsPath("https://upload.example.com/files/", client=client)

264

upload_url.upload_from("local_document.pdf")

265

```

266

267

### Custom Directory Listing

268

269

```python

270

import re

271

from bs4 import BeautifulSoup

272

273

def parse_apache_directory_listing(response_text):

274

"""Parse Apache-style directory listing."""

275

soup = BeautifulSoup(response_text, 'html.parser')

276

entries = []

277

278

for link in soup.find_all('a'):

279

href = link.get('href')

280

if href and href not in ['../', '../']:

281

entries.append(href.rstrip('/'))

282

283

return entries

284

285

def is_directory(name):

286

"""Identify directories by trailing slash or no extension."""

287

return name.endswith('/') or '.' not in name.split('/')[-1]

288

289

# Configure client with custom parsers

290

client = HttpClient(

291

custom_list_page_parser=parse_apache_directory_listing,

292

custom_dir_matcher=is_directory

293

)

294

295

# List directory contents

296

dir_path = HttpPath("http://files.example.com/data/", client=client)

297

for item in dir_path.iterdir():

298

print(f"{'Dir' if item.is_dir() else 'File'}: {item.name}")

299

```

300

301

### Working with APIs

302

303

```python

304

# REST API interaction

305

api_base = HttpsPath("https://jsonplaceholder.typicode.com")

306

307

# Get all posts

308

posts_path = api_base / "posts"

309

response = posts_path.get()

310

posts = response.json()

311

print(f"Found {len(posts)} posts")

312

313

# Get specific post

314

post_path = api_base / "posts" / "1"

315

response = post_path.get()

316

post = response.json()

317

print(f"Post title: {post['title']}")

318

319

# Create new post

320

new_post = {

321

"title": "New Post",

322

"body": "This is a new post",

323

"userId": 1

324

}

325

response = posts_path.post(json=new_post)

326

created_post = response.json()

327

print(f"Created post ID: {created_post['id']}")

328

329

# Update post

330

updated_data = {"title": "Updated Title"}

331

response = post_path.put(json=updated_data)

332

333

# Delete post

334

response = post_path.delete()

335

print(f"Delete status: {response.status_code}")

336

```

337

338

### File Server Operations

339

340

```python

341

# Work with file servers

342

file_server = HttpsPath("https://files.example.com")

343

344

# List files in directory

345

data_dir = file_server / "data"

346

for file_path in data_dir.glob("*.csv"):

347

print(f"CSV file: {file_path}")

348

349

# Download and process

350

local_file = file_path.download_to(f"local_{file_path.name}")

351

process_csv_file(local_file)

352

353

# Upload files to server

354

local_files = Path("uploads/").glob("*.txt")

355

upload_dir = file_server / "uploads"

356

357

for local_file in local_files:

358

remote_path = upload_dir / local_file.name

359

remote_path.upload_from(local_file)

360

print(f"Uploaded: {remote_path}")

361

```

362

363

### WebDAV Support

364

365

```python

366

from requests_toolbelt.auth.http_proxy_digest import HTTPProxyDigestAuth

367

368

# WebDAV server access

369

webdav_auth = HTTPDigestAuth('username', 'password')

370

client = HttpClient(

371

auth=webdav_auth,

372

write_file_http_method='PUT'

373

)

374

375

webdav_path = HttpsPath("https://webdav.example.com/files/", client=client)

376

377

# WebDAV operations

378

document = webdav_path / "document.txt"

379

document.write_text("WebDAV content")

380

381

# Create directory (MKCOL method via custom request)

382

new_dir = webdav_path / "new_folder"

383

response = client.request(str(new_dir), 'MKCOL')

384

385

# List directory contents

386

for item in webdav_path.iterdir():

387

print(f"WebDAV item: {item}")

388

```

389

390

### Streaming Operations

391

392

```python

393

# Stream large files

394

large_file_url = HttpsPath("https://download.example.com/large-dataset.zip")

395

396

# Stream download

397

with large_file_url.open('rb') as remote_file:

398

with open('local-dataset.zip', 'wb') as local_file:

399

for chunk in remote_file:

400

local_file.write(chunk)

401

print(f"Downloaded chunk: {len(chunk)} bytes")

402

403

# Stream processing

404

csv_url = HttpsPath("https://data.example.com/big-data.csv")

405

with csv_url.open('r') as f:

406

import csv

407

reader = csv.DictReader(f)

408

for row_num, row in enumerate(reader):

409

process_row(row)

410

if row_num % 1000 == 0:

411

print(f"Processed {row_num} rows")

412

```

413

414

### Custom Headers and Parameters

415

416

```python

417

# Configure client with custom headers

418

class CustomHeadersAuth:

419

def __init__(self, api_key, user_agent):

420

self.api_key = api_key

421

self.user_agent = user_agent

422

423

def __call__(self, request):

424

request.headers.update({

425

'X-API-Key': self.api_key,

426

'User-Agent': self.user_agent,

427

'Accept': 'application/json'

428

})

429

return request

430

431

client = HttpClient(auth=CustomHeadersAuth('key123', 'MyApp/1.0'))

432

433

# Make requests with custom headers

434

api_path = HttpsPath("https://api.example.com/data", client=client)

435

response = api_path.get(params={'format': 'json', 'limit': 100})

436

```

437

438

### Session Management

439

440

```python

441

import requests

442

443

# Use persistent session

444

session = requests.Session()

445

session.headers.update({'User-Agent': 'CloudPathLib/1.0'})

446

session.auth = HTTPBasicAuth('user', 'pass')

447

448

# Configure client to use session

449

class SessionClient(HttpClient):

450

def __init__(self, session, **kwargs):

451

super().__init__(**kwargs)

452

self.session = session

453

454

def request(self, url, method, **kwargs):

455

return self.session.request(method, url, **kwargs)

456

457

client = SessionClient(session)

458

459

# All requests use the same session

460

path1 = HttpsPath("https://api.example.com/resource1", client=client)

461

path2 = HttpsPath("https://api.example.com/resource2", client=client)

462

463

response1 = path1.get() # Uses session

464

response2 = path2.get() # Reuses session connection

465

```

466

467

### Error Handling

468

469

```python

470

from cloudpathlib import CloudPathFileNotFoundError

471

import requests

472

473

try:

474

http_path = HttpsPath("https://api.example.com/nonexistent")

475

content = http_path.read_text()

476

except CloudPathFileNotFoundError:

477

print("HTTP resource not found")

478

except requests.exceptions.ConnectionError:

479

print("Connection failed")

480

except requests.exceptions.Timeout:

481

print("Request timed out")

482

except requests.exceptions.HTTPError as e:

483

print(f"HTTP error: {e}")

484

except requests.exceptions.RequestException as e:

485

print(f"Request error: {e}")

486

487

# Check response status

488

http_path = HttpsPath("https://api.example.com/data")

489

response = http_path.get()

490

491

if response.status_code == 200:

492

data = response.json()

493

elif response.status_code == 404:

494

print("Resource not found")

495

elif response.status_code == 401:

496

print("Authentication required")

497

else:

498

print(f"HTTP {response.status_code}: {response.reason}")

499

```

500

501

### Performance Optimization

502

503

```python

504

# Configure timeouts and retries

505

from requests.adapters import HTTPAdapter

506

from urllib3.util.retry import Retry

507

508

session = requests.Session()

509

510

# Configure retry strategy

511

retry_strategy = Retry(

512

total=3,

513

backoff_factor=1,

514

status_forcelist=[429, 500, 502, 503, 504]

515

)

516

517

adapter = HTTPAdapter(max_retries=retry_strategy)

518

session.mount("http://", adapter)

519

session.mount("https://", adapter)

520

521

# Set timeouts

522

session.timeout = (10, 30) # (connect, read) timeout

523

524

client = SessionClient(session)

525

526

# Concurrent downloads

527

import concurrent.futures

528

529

def download_file(url_str):

530

url = HttpsPath(url_str, client=client)

531

return url.download_to(f"downloads/{url.name}")

532

533

urls = [

534

"https://example.com/file1.txt",

535

"https://example.com/file2.txt",

536

"https://example.com/file3.txt"

537

]

538

539

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:

540

futures = [executor.submit(download_file, url) for url in urls]

541

542

for future in concurrent.futures.as_completed(futures):

543

try:

544

result = future.result()

545

print(f"Downloaded: {result}")

546

except Exception as e:

547

print(f"Download failed: {e}")

548

```