0
# Standard Library Integration
1
2
Monkey patching capabilities to make Python's built-in functions work transparently with cloud paths. These patches enable existing code to work with cloud storage without modification by extending standard library functions to recognize and handle CloudPath objects.
3
4
## Capabilities
5
6
### Patching Functions
7
8
Functions to patch various parts of the Python standard library.
9
10
```python { .api }
11
def patch_open(original_open = None) -> None:
12
"""
13
Patch builtin open() to work with CloudPaths.
14
15
Args:
16
original_open: Original open function to preserve (optional)
17
"""
18
19
def patch_os_functions() -> None:
20
"""
21
Patch os and os.path functions to work with CloudPaths.
22
23
Patches functions like os.listdir, os.stat, os.path.exists, etc.
24
"""
25
26
def patch_glob() -> None:
27
"""
28
Patch glob.glob() and glob.iglob() to work with CloudPaths.
29
"""
30
31
def patch_all_builtins() -> None:
32
"""
33
Apply all patches at once.
34
Equivalent to calling patch_open(), patch_os_functions(), and patch_glob().
35
"""
36
```
37
38
### Patched Functions
39
40
The following functions are modified to work with CloudPath objects:
41
42
#### Built-in Functions
43
```python { .api }
44
# After patch_open()
45
def open(file, mode='r', **kwargs):
46
"""Enhanced open() that works with CloudPath objects."""
47
```
48
49
#### OS Module Functions
50
```python { .api }
51
# After patch_os_functions()
52
def os.fspath(path): ...
53
def os.listdir(path): ...
54
def os.lstat(path): ...
55
def os.mkdir(path, mode=0o777, *, dir_fd=None): ...
56
def os.makedirs(name, mode=0o777, exist_ok=False): ...
57
def os.remove(path, *, dir_fd=None): ...
58
def os.removedirs(name): ...
59
def os.rename(src, dst, *, src_dir_fd=None, dst_dir_fd=None): ...
60
def os.renames(old, new): ...
61
def os.replace(src, dst, *, src_dir_fd=None, dst_dir_fd=None): ...
62
def os.rmdir(path, *, dir_fd=None): ...
63
def os.scandir(path='.'): ...
64
def os.stat(path, *, dir_fd=None, follow_symlinks=True): ...
65
def os.unlink(path, *, dir_fd=None): ...
66
def os.walk(top, topdown=True, onerror=None, followlinks=False): ...
67
```
68
69
#### OS.Path Module Functions
70
```python { .api }
71
# After patch_os_functions()
72
def os.path.basename(path): ...
73
def os.path.commonpath(paths): ...
74
def os.path.commonprefix(list): ...
75
def os.path.dirname(path): ...
76
def os.path.exists(path): ...
77
def os.path.getatime(path): ...
78
def os.path.getmtime(path): ...
79
def os.path.getctime(path): ...
80
def os.path.getsize(path): ...
81
def os.path.isfile(path): ...
82
def os.path.isdir(path): ...
83
def os.path.join(path, *paths): ...
84
def os.path.split(path): ...
85
def os.path.splitext(path): ...
86
```
87
88
#### Glob Module Functions
89
```python { .api }
90
# After patch_glob()
91
def glob.glob(pathname, *, recursive=False): ...
92
def glob.iglob(pathname, *, recursive=False): ...
93
```
94
95
## Usage Examples
96
97
### Basic Patching
98
99
```python
100
from cloudpathlib import patch_all_builtins, CloudPath
101
102
# Apply all patches
103
patch_all_builtins()
104
105
# Now standard library functions work with CloudPath
106
cloud_file = CloudPath("s3://my-bucket/data.txt")
107
108
# Built-in open() now works with CloudPath
109
with open(cloud_file, 'r') as f:
110
content = f.read()
111
112
# os.path functions work with CloudPath
113
import os.path
114
print(os.path.exists(cloud_file)) # True/False
115
print(os.path.basename(cloud_file)) # "data.txt"
116
print(os.path.dirname(cloud_file)) # "s3://my-bucket"
117
print(os.path.getsize(cloud_file)) # File size in bytes
118
119
# glob works with CloudPath
120
import glob
121
csv_files = glob.glob("s3://my-bucket/*.csv")
122
all_files = glob.glob("s3://my-bucket/**/*", recursive=True)
123
```
124
125
### Selective Patching
126
127
```python
128
from cloudpathlib import patch_open, patch_os_functions, patch_glob
129
130
# Apply patches selectively
131
patch_open() # Only patch open()
132
patch_os_functions() # Only patch os and os.path functions
133
patch_glob() # Only patch glob functions
134
135
# Or combine as needed
136
patch_open()
137
patch_glob() # Skip os functions if not needed
138
```
139
140
### Legacy Code Integration
141
142
```python
143
# Existing code that works with local files
144
def process_files(directory):
145
"""Legacy function that processes files in a directory."""
146
import os
147
import glob
148
149
# This code was written for local files
150
for filename in os.listdir(directory):
151
filepath = os.path.join(directory, filename)
152
153
if os.path.isfile(filepath):
154
size = os.path.getsize(filepath)
155
print(f"Processing {filename} ({size} bytes)")
156
157
with open(filepath, 'r') as f:
158
content = f.read()
159
# Process content...
160
161
# After patching, this works with cloud storage too!
162
from cloudpathlib import patch_all_builtins, CloudPath
163
164
patch_all_builtins()
165
166
# Same function now works with cloud paths
167
process_files("s3://my-bucket/data/") # Works!
168
process_files("/local/directory/") # Still works!
169
process_files("gs://bucket/files/") # Works!
170
```
171
172
### Environment Variable Configuration
173
174
```python
175
import os
176
from cloudpathlib import patch_all_builtins
177
178
# CloudPathLib automatically applies patches based on environment variables
179
# Set these before importing cloudpathlib:
180
181
# CLOUDPATHLIB_PATCH_OPEN=1 - patches open()
182
# CLOUDPATHLIB_PATCH_OS=1 - patches os functions
183
# CLOUDPATHLIB_PATCH_GLOB=1 - patches glob functions
184
# CLOUDPATHLIB_PATCH_ALL=1 - patches everything
185
186
# Or apply patches programmatically
187
if os.environ.get("ENABLE_CLOUD_PATCHING"):
188
patch_all_builtins()
189
190
# Now existing code works with cloud paths
191
def backup_config():
192
config_path = os.environ.get("CONFIG_PATH", "./config.json")
193
backup_path = os.environ.get("BACKUP_PATH", "./config.backup.json")
194
195
# Works whether paths are local or cloud URIs
196
if os.path.exists(config_path):
197
with open(config_path, 'r') as f:
198
config_data = f.read()
199
200
with open(backup_path, 'w') as f:
201
f.write(config_data)
202
203
print(f"Backed up {config_path} to {backup_path}")
204
205
# Usage
206
# CONFIG_PATH=s3://config-bucket/prod-config.json
207
# BACKUP_PATH=s3://backup-bucket/config-backup.json
208
backup_config() # Works with cloud paths!
209
```
210
211
### File Processing Pipelines
212
213
```python
214
from cloudpathlib import patch_all_builtins
215
import os
216
import glob
217
import shutil
218
219
patch_all_builtins()
220
221
def data_pipeline(input_dir, output_dir, pattern="*.csv"):
222
"""Data processing pipeline that works with any storage."""
223
224
# Create output directory
225
os.makedirs(output_dir, exist_ok=True)
226
227
# Find all matching files
228
search_pattern = os.path.join(input_dir, pattern)
229
input_files = glob.glob(search_pattern)
230
231
print(f"Found {len(input_files)} files matching {pattern}")
232
233
for input_file in input_files:
234
# Get file info
235
filename = os.path.basename(input_file)
236
file_size = os.path.getsize(input_file)
237
238
print(f"Processing {filename} ({file_size} bytes)")
239
240
# Read and process
241
with open(input_file, 'r') as f:
242
data = f.read()
243
244
processed_data = data.upper() # Example processing
245
246
# Write output
247
output_file = os.path.join(output_dir, f"processed_{filename}")
248
with open(output_file, 'w') as f:
249
f.write(processed_data)
250
251
print(f"Wrote {output_file}")
252
253
# Works with any combination of local and cloud storage
254
data_pipeline(
255
input_dir="s3://raw-data-bucket/csv/",
256
output_dir="s3://processed-data-bucket/csv/",
257
pattern="*.csv"
258
)
259
260
data_pipeline(
261
input_dir="/local/input/",
262
output_dir="gs://output-bucket/processed/",
263
pattern="*.txt"
264
)
265
```
266
267
### Directory Traversal
268
269
```python
270
from cloudpathlib import patch_all_builtins
271
import os
272
273
patch_all_builtins()
274
275
def find_files_by_extension(root_dir, extension):
276
"""Find all files with given extension."""
277
found_files = []
278
279
# os.walk now works with cloud paths
280
for dirpath, dirnames, filenames in os.walk(root_dir):
281
for filename in filenames:
282
if filename.endswith(extension):
283
filepath = os.path.join(dirpath, filename)
284
file_size = os.path.getsize(filepath)
285
found_files.append({
286
'path': filepath,
287
'size': file_size,
288
'dir': dirpath
289
})
290
291
return found_files
292
293
# Works with cloud storage
294
python_files = find_files_by_extension("s3://code-bucket/", ".py")
295
log_files = find_files_by_extension("gs://logs-bucket/", ".log")
296
297
for file_info in python_files:
298
print(f"Python file: {file_info['path']} ({file_info['size']} bytes)")
299
```
300
301
### CSV Processing Example
302
303
```python
304
from cloudpathlib import patch_all_builtins
305
import csv
306
import os
307
import glob
308
309
patch_all_builtins()
310
311
def process_csv_files(input_pattern, output_dir):
312
"""Process CSV files with standard library functions."""
313
314
# Find all CSV files
315
csv_files = glob.glob(input_pattern)
316
317
# Create output directory
318
os.makedirs(output_dir, exist_ok=True)
319
320
for csv_file in csv_files:
321
filename = os.path.basename(csv_file)
322
output_file = os.path.join(output_dir, f"summary_{filename}")
323
324
print(f"Processing {filename}")
325
326
# Read CSV
327
with open(csv_file, 'r', newline='') as infile:
328
reader = csv.DictReader(infile)
329
rows = list(reader)
330
331
# Generate summary
332
summary = {
333
'filename': filename,
334
'row_count': len(rows),
335
'columns': list(rows[0].keys()) if rows else [],
336
'file_size': os.path.getsize(csv_file)
337
}
338
339
# Write summary
340
with open(output_file, 'w', newline='') as outfile:
341
writer = csv.DictWriter(outfile, fieldnames=summary.keys())
342
writer.writeheader()
343
writer.writerow(summary)
344
345
print(f"Summary written to {output_file}")
346
347
# Works with cloud CSV files
348
process_csv_files(
349
input_pattern="s3://data-bucket/exports/*.csv",
350
output_dir="s3://reports-bucket/summaries/"
351
)
352
```
353
354
### JSON Configuration Processing
355
356
```python
357
from cloudpathlib import patch_all_builtins
358
import json
359
import os
360
import glob
361
362
patch_all_builtins()
363
364
def merge_config_files(config_pattern, output_file):
365
"""Merge multiple JSON config files."""
366
367
config_files = glob.glob(config_pattern)
368
merged_config = {}
369
370
for config_file in config_files:
371
filename = os.path.basename(config_file)
372
print(f"Loading config from {filename}")
373
374
with open(config_file, 'r') as f:
375
config_data = json.load(f)
376
377
# Merge configuration
378
merged_config.update(config_data)
379
380
# Write merged configuration
381
os.makedirs(os.path.dirname(output_file), exist_ok=True)
382
with open(output_file, 'w') as f:
383
json.dump(merged_config, f, indent=2)
384
385
print(f"Merged configuration written to {output_file}")
386
return merged_config
387
388
# Merge cloud-based config files
389
merged = merge_config_files(
390
config_pattern="s3://config-bucket/environments/*.json",
391
output_file="s3://config-bucket/merged/production.json"
392
)
393
```
394
395
### Batch File Operations
396
397
```python
398
from cloudpathlib import patch_all_builtins
399
import os
400
import shutil
401
import glob
402
403
patch_all_builtins()
404
405
def organize_files_by_date(source_pattern, base_output_dir):
406
"""Organize files into date-based directories."""
407
408
files_to_organize = glob.glob(source_pattern)
409
410
for file_path in files_to_organize:
411
# Get file modification time
412
stat_info = os.stat(file_path)
413
mod_time = stat_info.st_mtime
414
415
# Create date-based directory structure
416
from datetime import datetime
417
date_str = datetime.fromtimestamp(mod_time).strftime("%Y/%m/%d")
418
419
output_dir = os.path.join(base_output_dir, date_str)
420
os.makedirs(output_dir, exist_ok=True)
421
422
filename = os.path.basename(file_path)
423
output_path = os.path.join(output_dir, filename)
424
425
# Move file (copy for cross-cloud operations)
426
print(f"Moving {filename} to {date_str}/")
427
with open(file_path, 'rb') as src, open(output_path, 'wb') as dst:
428
dst.write(src.read())
429
430
# Remove original (be careful with this!)
431
# os.remove(file_path)
432
433
# Organize cloud files by date
434
organize_files_by_date(
435
source_pattern="s3://uploads-bucket/incoming/*",
436
base_output_dir="s3://organized-bucket/by-date/"
437
)
438
```
439
440
### Error Handling with Patched Functions
441
442
```python
443
from cloudpathlib import patch_all_builtins
444
import os
445
import glob
446
447
patch_all_builtins()
448
449
def safe_file_operations(file_pattern):
450
"""Demonstrate error handling with patched functions."""
451
452
try:
453
files = glob.glob(file_pattern)
454
print(f"Found {len(files)} files")
455
456
for file_path in files:
457
try:
458
# Check if file exists
459
if os.path.exists(file_path):
460
# Get file info
461
size = os.path.getsize(file_path)
462
print(f"File: {os.path.basename(file_path)} ({size} bytes)")
463
464
# Try to read file
465
with open(file_path, 'r') as f:
466
content = f.read(100) # Read first 100 chars
467
print(f"Content preview: {content[:50]}...")
468
469
except PermissionError:
470
print(f"Permission denied: {file_path}")
471
except UnicodeDecodeError:
472
print(f"Binary file (skipping): {file_path}")
473
except Exception as e:
474
print(f"Error processing {file_path}: {e}")
475
476
except Exception as e:
477
print(f"Error with pattern {file_pattern}: {e}")
478
479
# Handle errors gracefully
480
safe_file_operations("s3://my-bucket/**/*.txt")
481
safe_file_operations("/nonexistent/path/*")
482
```