or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

anypath.mdazure-integration.mdclient-management.mdcloud-operations.mdconfiguration.mdcore-operations.mddirectory-operations.mdexceptions.mdfile-io.mdgcs-integration.mdhttp-support.mdindex.mdpatching.mds3-integration.md

patching.mddocs/

0

# Standard Library Integration

1

2

Monkey patching capabilities to make Python's built-in functions work transparently with cloud paths. These patches enable existing code to work with cloud storage without modification by extending standard library functions to recognize and handle CloudPath objects.

3

4

## Capabilities

5

6

### Patching Functions

7

8

Functions to patch various parts of the Python standard library.

9

10

```python { .api }

11

def patch_open(original_open = None) -> None:

12

"""

13

Patch builtin open() to work with CloudPaths.

14

15

Args:

16

original_open: Original open function to preserve (optional)

17

"""

18

19

def patch_os_functions() -> None:

20

"""

21

Patch os and os.path functions to work with CloudPaths.

22

23

Patches functions like os.listdir, os.stat, os.path.exists, etc.

24

"""

25

26

def patch_glob() -> None:

27

"""

28

Patch glob.glob() and glob.iglob() to work with CloudPaths.

29

"""

30

31

def patch_all_builtins() -> None:

32

"""

33

Apply all patches at once.

34

Equivalent to calling patch_open(), patch_os_functions(), and patch_glob().

35

"""

36

```

37

38

### Patched Functions

39

40

The following functions are modified to work with CloudPath objects:

41

42

#### Built-in Functions

43

```python { .api }

44

# After patch_open()

45

def open(file, mode='r', **kwargs):

46

"""Enhanced open() that works with CloudPath objects."""

47

```

48

49

#### OS Module Functions

50

```python { .api }

51

# After patch_os_functions()

52

def os.fspath(path): ...

53

def os.listdir(path): ...

54

def os.lstat(path): ...

55

def os.mkdir(path, mode=0o777, *, dir_fd=None): ...

56

def os.makedirs(name, mode=0o777, exist_ok=False): ...

57

def os.remove(path, *, dir_fd=None): ...

58

def os.removedirs(name): ...

59

def os.rename(src, dst, *, src_dir_fd=None, dst_dir_fd=None): ...

60

def os.renames(old, new): ...

61

def os.replace(src, dst, *, src_dir_fd=None, dst_dir_fd=None): ...

62

def os.rmdir(path, *, dir_fd=None): ...

63

def os.scandir(path='.'): ...

64

def os.stat(path, *, dir_fd=None, follow_symlinks=True): ...

65

def os.unlink(path, *, dir_fd=None): ...

66

def os.walk(top, topdown=True, onerror=None, followlinks=False): ...

67

```

68

69

#### OS.Path Module Functions

70

```python { .api }

71

# After patch_os_functions()

72

def os.path.basename(path): ...

73

def os.path.commonpath(paths): ...

74

def os.path.commonprefix(list): ...

75

def os.path.dirname(path): ...

76

def os.path.exists(path): ...

77

def os.path.getatime(path): ...

78

def os.path.getmtime(path): ...

79

def os.path.getctime(path): ...

80

def os.path.getsize(path): ...

81

def os.path.isfile(path): ...

82

def os.path.isdir(path): ...

83

def os.path.join(path, *paths): ...

84

def os.path.split(path): ...

85

def os.path.splitext(path): ...

86

```

87

88

#### Glob Module Functions

89

```python { .api }

90

# After patch_glob()

91

def glob.glob(pathname, *, recursive=False): ...

92

def glob.iglob(pathname, *, recursive=False): ...

93

```

94

95

## Usage Examples

96

97

### Basic Patching

98

99

```python

100

from cloudpathlib import patch_all_builtins, CloudPath

101

102

# Apply all patches

103

patch_all_builtins()

104

105

# Now standard library functions work with CloudPath

106

cloud_file = CloudPath("s3://my-bucket/data.txt")

107

108

# Built-in open() now works with CloudPath

109

with open(cloud_file, 'r') as f:

110

content = f.read()

111

112

# os.path functions work with CloudPath

113

import os.path

114

print(os.path.exists(cloud_file)) # True/False

115

print(os.path.basename(cloud_file)) # "data.txt"

116

print(os.path.dirname(cloud_file)) # "s3://my-bucket"

117

print(os.path.getsize(cloud_file)) # File size in bytes

118

119

# glob works with CloudPath

120

import glob

121

csv_files = glob.glob("s3://my-bucket/*.csv")

122

all_files = glob.glob("s3://my-bucket/**/*", recursive=True)

123

```

124

125

### Selective Patching

126

127

```python

128

from cloudpathlib import patch_open, patch_os_functions, patch_glob

129

130

# Apply patches selectively

131

patch_open() # Only patch open()

132

patch_os_functions() # Only patch os and os.path functions

133

patch_glob() # Only patch glob functions

134

135

# Or combine as needed

136

patch_open()

137

patch_glob() # Skip os functions if not needed

138

```

139

140

### Legacy Code Integration

141

142

```python

143

# Existing code that works with local files

144

def process_files(directory):

145

"""Legacy function that processes files in a directory."""

146

import os

147

import glob

148

149

# This code was written for local files

150

for filename in os.listdir(directory):

151

filepath = os.path.join(directory, filename)

152

153

if os.path.isfile(filepath):

154

size = os.path.getsize(filepath)

155

print(f"Processing {filename} ({size} bytes)")

156

157

with open(filepath, 'r') as f:

158

content = f.read()

159

# Process content...

160

161

# After patching, this works with cloud storage too!

162

from cloudpathlib import patch_all_builtins, CloudPath

163

164

patch_all_builtins()

165

166

# Same function now works with cloud paths

167

process_files("s3://my-bucket/data/") # Works!

168

process_files("/local/directory/") # Still works!

169

process_files("gs://bucket/files/") # Works!

170

```

171

172

### Environment Variable Configuration

173

174

```python

175

import os

176

from cloudpathlib import patch_all_builtins

177

178

# CloudPathLib automatically applies patches based on environment variables

179

# Set these before importing cloudpathlib:

180

181

# CLOUDPATHLIB_PATCH_OPEN=1 - patches open()

182

# CLOUDPATHLIB_PATCH_OS=1 - patches os functions

183

# CLOUDPATHLIB_PATCH_GLOB=1 - patches glob functions

184

# CLOUDPATHLIB_PATCH_ALL=1 - patches everything

185

186

# Or apply patches programmatically

187

if os.environ.get("ENABLE_CLOUD_PATCHING"):

188

patch_all_builtins()

189

190

# Now existing code works with cloud paths

191

def backup_config():

192

config_path = os.environ.get("CONFIG_PATH", "./config.json")

193

backup_path = os.environ.get("BACKUP_PATH", "./config.backup.json")

194

195

# Works whether paths are local or cloud URIs

196

if os.path.exists(config_path):

197

with open(config_path, 'r') as f:

198

config_data = f.read()

199

200

with open(backup_path, 'w') as f:

201

f.write(config_data)

202

203

print(f"Backed up {config_path} to {backup_path}")

204

205

# Usage

206

# CONFIG_PATH=s3://config-bucket/prod-config.json

207

# BACKUP_PATH=s3://backup-bucket/config-backup.json

208

backup_config() # Works with cloud paths!

209

```

210

211

### File Processing Pipelines

212

213

```python

214

from cloudpathlib import patch_all_builtins

215

import os

216

import glob

217

import shutil

218

219

patch_all_builtins()

220

221

def data_pipeline(input_dir, output_dir, pattern="*.csv"):

222

"""Data processing pipeline that works with any storage."""

223

224

# Create output directory

225

os.makedirs(output_dir, exist_ok=True)

226

227

# Find all matching files

228

search_pattern = os.path.join(input_dir, pattern)

229

input_files = glob.glob(search_pattern)

230

231

print(f"Found {len(input_files)} files matching {pattern}")

232

233

for input_file in input_files:

234

# Get file info

235

filename = os.path.basename(input_file)

236

file_size = os.path.getsize(input_file)

237

238

print(f"Processing {filename} ({file_size} bytes)")

239

240

# Read and process

241

with open(input_file, 'r') as f:

242

data = f.read()

243

244

processed_data = data.upper() # Example processing

245

246

# Write output

247

output_file = os.path.join(output_dir, f"processed_{filename}")

248

with open(output_file, 'w') as f:

249

f.write(processed_data)

250

251

print(f"Wrote {output_file}")

252

253

# Works with any combination of local and cloud storage

254

data_pipeline(

255

input_dir="s3://raw-data-bucket/csv/",

256

output_dir="s3://processed-data-bucket/csv/",

257

pattern="*.csv"

258

)

259

260

data_pipeline(

261

input_dir="/local/input/",

262

output_dir="gs://output-bucket/processed/",

263

pattern="*.txt"

264

)

265

```

266

267

### Directory Traversal

268

269

```python

270

from cloudpathlib import patch_all_builtins

271

import os

272

273

patch_all_builtins()

274

275

def find_files_by_extension(root_dir, extension):

276

"""Find all files with given extension."""

277

found_files = []

278

279

# os.walk now works with cloud paths

280

for dirpath, dirnames, filenames in os.walk(root_dir):

281

for filename in filenames:

282

if filename.endswith(extension):

283

filepath = os.path.join(dirpath, filename)

284

file_size = os.path.getsize(filepath)

285

found_files.append({

286

'path': filepath,

287

'size': file_size,

288

'dir': dirpath

289

})

290

291

return found_files

292

293

# Works with cloud storage

294

python_files = find_files_by_extension("s3://code-bucket/", ".py")

295

log_files = find_files_by_extension("gs://logs-bucket/", ".log")

296

297

for file_info in python_files:

298

print(f"Python file: {file_info['path']} ({file_info['size']} bytes)")

299

```

300

301

### CSV Processing Example

302

303

```python

304

from cloudpathlib import patch_all_builtins

305

import csv

306

import os

307

import glob

308

309

patch_all_builtins()

310

311

def process_csv_files(input_pattern, output_dir):

312

"""Process CSV files with standard library functions."""

313

314

# Find all CSV files

315

csv_files = glob.glob(input_pattern)

316

317

# Create output directory

318

os.makedirs(output_dir, exist_ok=True)

319

320

for csv_file in csv_files:

321

filename = os.path.basename(csv_file)

322

output_file = os.path.join(output_dir, f"summary_{filename}")

323

324

print(f"Processing {filename}")

325

326

# Read CSV

327

with open(csv_file, 'r', newline='') as infile:

328

reader = csv.DictReader(infile)

329

rows = list(reader)

330

331

# Generate summary

332

summary = {

333

'filename': filename,

334

'row_count': len(rows),

335

'columns': list(rows[0].keys()) if rows else [],

336

'file_size': os.path.getsize(csv_file)

337

}

338

339

# Write summary

340

with open(output_file, 'w', newline='') as outfile:

341

writer = csv.DictWriter(outfile, fieldnames=summary.keys())

342

writer.writeheader()

343

writer.writerow(summary)

344

345

print(f"Summary written to {output_file}")

346

347

# Works with cloud CSV files

348

process_csv_files(

349

input_pattern="s3://data-bucket/exports/*.csv",

350

output_dir="s3://reports-bucket/summaries/"

351

)

352

```

353

354

### JSON Configuration Processing

355

356

```python

357

from cloudpathlib import patch_all_builtins

358

import json

359

import os

360

import glob

361

362

patch_all_builtins()

363

364

def merge_config_files(config_pattern, output_file):

365

"""Merge multiple JSON config files."""

366

367

config_files = glob.glob(config_pattern)

368

merged_config = {}

369

370

for config_file in config_files:

371

filename = os.path.basename(config_file)

372

print(f"Loading config from {filename}")

373

374

with open(config_file, 'r') as f:

375

config_data = json.load(f)

376

377

# Merge configuration

378

merged_config.update(config_data)

379

380

# Write merged configuration

381

os.makedirs(os.path.dirname(output_file), exist_ok=True)

382

with open(output_file, 'w') as f:

383

json.dump(merged_config, f, indent=2)

384

385

print(f"Merged configuration written to {output_file}")

386

return merged_config

387

388

# Merge cloud-based config files

389

merged = merge_config_files(

390

config_pattern="s3://config-bucket/environments/*.json",

391

output_file="s3://config-bucket/merged/production.json"

392

)

393

```

394

395

### Batch File Operations

396

397

```python

398

from cloudpathlib import patch_all_builtins

399

import os

400

import shutil

401

import glob

402

403

patch_all_builtins()

404

405

def organize_files_by_date(source_pattern, base_output_dir):

406

"""Organize files into date-based directories."""

407

408

files_to_organize = glob.glob(source_pattern)

409

410

for file_path in files_to_organize:

411

# Get file modification time

412

stat_info = os.stat(file_path)

413

mod_time = stat_info.st_mtime

414

415

# Create date-based directory structure

416

from datetime import datetime

417

date_str = datetime.fromtimestamp(mod_time).strftime("%Y/%m/%d")

418

419

output_dir = os.path.join(base_output_dir, date_str)

420

os.makedirs(output_dir, exist_ok=True)

421

422

filename = os.path.basename(file_path)

423

output_path = os.path.join(output_dir, filename)

424

425

# Move file (copy for cross-cloud operations)

426

print(f"Moving {filename} to {date_str}/")

427

with open(file_path, 'rb') as src, open(output_path, 'wb') as dst:

428

dst.write(src.read())

429

430

# Remove original (be careful with this!)

431

# os.remove(file_path)

432

433

# Organize cloud files by date

434

organize_files_by_date(

435

source_pattern="s3://uploads-bucket/incoming/*",

436

base_output_dir="s3://organized-bucket/by-date/"

437

)

438

```

439

440

### Error Handling with Patched Functions

441

442

```python

443

from cloudpathlib import patch_all_builtins

444

import os

445

import glob

446

447

patch_all_builtins()

448

449

def safe_file_operations(file_pattern):

450

"""Demonstrate error handling with patched functions."""

451

452

try:

453

files = glob.glob(file_pattern)

454

print(f"Found {len(files)} files")

455

456

for file_path in files:

457

try:

458

# Check if file exists

459

if os.path.exists(file_path):

460

# Get file info

461

size = os.path.getsize(file_path)

462

print(f"File: {os.path.basename(file_path)} ({size} bytes)")

463

464

# Try to read file

465

with open(file_path, 'r') as f:

466

content = f.read(100) # Read first 100 chars

467

print(f"Content preview: {content[:50]}...")

468

469

except PermissionError:

470

print(f"Permission denied: {file_path}")

471

except UnicodeDecodeError:

472

print(f"Binary file (skipping): {file_path}")

473

except Exception as e:

474

print(f"Error processing {file_path}: {e}")

475

476

except Exception as e:

477

print(f"Error with pattern {file_pattern}: {e}")

478

479

# Handle errors gracefully

480

safe_file_operations("s3://my-bucket/**/*.txt")

481

safe_file_operations("/nonexistent/path/*")

482

```