Tessl Tile for pypi/cloudpathlib@0.22.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

anypath.md azure-integration.md client-management.md cloud-operations.md configuration.md core-operations.md directory-operations.md exceptions.md file-io.md gcs-integration.md http-support.md index.md patching.md s3-integration.md

patching.mddocs/

0
# Standard Library Integration
1

2
Monkey patching capabilities to make Python's built-in functions work transparently with cloud paths. These patches enable existing code to work with cloud storage without modification by extending standard library functions to recognize and handle CloudPath objects.
3

4
## Capabilities
5

6
### Patching Functions
7

8
Functions to patch various parts of the Python standard library.
9

10
```python { .api }
11
def patch_open(original_open = None) -> None:
12
    """
13
    Patch builtin open() to work with CloudPaths.
14
    
15
    Args:
16
        original_open: Original open function to preserve (optional)
17
    """
18

19
def patch_os_functions() -> None:
20
    """
21
    Patch os and os.path functions to work with CloudPaths.
22
    
23
    Patches functions like os.listdir, os.stat, os.path.exists, etc.
24
    """
25

26
def patch_glob() -> None:
27
    """
28
    Patch glob.glob() and glob.iglob() to work with CloudPaths.
29
    """
30

31
def patch_all_builtins() -> None:
32
    """
33
    Apply all patches at once.
34
    Equivalent to calling patch_open(), patch_os_functions(), and patch_glob().
35
    """
36
```
37

38
### Patched Functions
39

40
The following functions are modified to work with CloudPath objects:
41

42
#### Built-in Functions
43
```python { .api }
44
# After patch_open()
45
def open(file, mode='r', **kwargs):
46
    """Enhanced open() that works with CloudPath objects."""
47
```
48

49
#### OS Module Functions
50
```python { .api }
51
# After patch_os_functions()
52
def os.fspath(path): ...
53
def os.listdir(path): ...
54
def os.lstat(path): ...
55
def os.mkdir(path, mode=0o777, *, dir_fd=None): ...
56
def os.makedirs(name, mode=0o777, exist_ok=False): ...
57
def os.remove(path, *, dir_fd=None): ...
58
def os.removedirs(name): ...
59
def os.rename(src, dst, *, src_dir_fd=None, dst_dir_fd=None): ...
60
def os.renames(old, new): ...
61
def os.replace(src, dst, *, src_dir_fd=None, dst_dir_fd=None): ...
62
def os.rmdir(path, *, dir_fd=None): ...
63
def os.scandir(path='.'): ...
64
def os.stat(path, *, dir_fd=None, follow_symlinks=True): ...
65
def os.unlink(path, *, dir_fd=None): ...
66
def os.walk(top, topdown=True, onerror=None, followlinks=False): ...
67
```
68

69
#### OS.Path Module Functions
70
```python { .api }
71
# After patch_os_functions()
72
def os.path.basename(path): ...
73
def os.path.commonpath(paths): ...
74
def os.path.commonprefix(list): ...
75
def os.path.dirname(path): ...
76
def os.path.exists(path): ...
77
def os.path.getatime(path): ...
78
def os.path.getmtime(path): ...
79
def os.path.getctime(path): ...
80
def os.path.getsize(path): ...
81
def os.path.isfile(path): ...
82
def os.path.isdir(path): ...
83
def os.path.join(path, *paths): ...
84
def os.path.split(path): ...
85
def os.path.splitext(path): ...
86
```
87

88
#### Glob Module Functions
89
```python { .api }
90
# After patch_glob()
91
def glob.glob(pathname, *, recursive=False): ...
92
def glob.iglob(pathname, *, recursive=False): ...
93
```
94

95
## Usage Examples
96

97
### Basic Patching
98

99
```python
100
from cloudpathlib import patch_all_builtins, CloudPath
101

102
# Apply all patches
103
patch_all_builtins()
104

105
# Now standard library functions work with CloudPath
106
cloud_file = CloudPath("s3://my-bucket/data.txt")
107

108
# Built-in open() now works with CloudPath
109
with open(cloud_file, 'r') as f:
110
    content = f.read()
111

112
# os.path functions work with CloudPath
113
import os.path
114
print(os.path.exists(cloud_file))      # True/False
115
print(os.path.basename(cloud_file))    # "data.txt"
116
print(os.path.dirname(cloud_file))     # "s3://my-bucket"
117
print(os.path.getsize(cloud_file))     # File size in bytes
118

119
# glob works with CloudPath
120
import glob
121
csv_files = glob.glob("s3://my-bucket/*.csv")
122
all_files = glob.glob("s3://my-bucket/**/*", recursive=True)
123
```
124

125
### Selective Patching
126

127
```python
128
from cloudpathlib import patch_open, patch_os_functions, patch_glob
129

130
# Apply patches selectively
131
patch_open()        # Only patch open()
132
patch_os_functions()  # Only patch os and os.path functions
133
patch_glob()        # Only patch glob functions
134

135
# Or combine as needed
136
patch_open()
137
patch_glob()  # Skip os functions if not needed
138
```
139

140
### Legacy Code Integration
141

142
```python
143
# Existing code that works with local files
144
def process_files(directory):
145
    """Legacy function that processes files in a directory."""
146
    import os
147
    import glob
148
    
149
    # This code was written for local files
150
    for filename in os.listdir(directory):
151
        filepath = os.path.join(directory, filename)
152
        
153
        if os.path.isfile(filepath):
154
            size = os.path.getsize(filepath)
155
            print(f"Processing {filename} ({size} bytes)")
156
            
157
            with open(filepath, 'r') as f:
158
                content = f.read()
159
                # Process content...
160

161
# After patching, this works with cloud storage too!
162
from cloudpathlib import patch_all_builtins, CloudPath
163

164
patch_all_builtins()
165

166
# Same function now works with cloud paths
167
process_files("s3://my-bucket/data/")      # Works!
168
process_files("/local/directory/")         # Still works!
169
process_files("gs://bucket/files/")        # Works!
170
```
171

172
### Environment Variable Configuration
173

174
```python
175
import os
176
from cloudpathlib import patch_all_builtins
177

178
# CloudPathLib automatically applies patches based on environment variables
179
# Set these before importing cloudpathlib:
180

181
# CLOUDPATHLIB_PATCH_OPEN=1      - patches open()
182
# CLOUDPATHLIB_PATCH_OS=1        - patches os functions  
183
# CLOUDPATHLIB_PATCH_GLOB=1      - patches glob functions
184
# CLOUDPATHLIB_PATCH_ALL=1       - patches everything
185

186
# Or apply patches programmatically
187
if os.environ.get("ENABLE_CLOUD_PATCHING"):
188
    patch_all_builtins()
189

190
# Now existing code works with cloud paths
191
def backup_config():
192
    config_path = os.environ.get("CONFIG_PATH", "./config.json")
193
    backup_path = os.environ.get("BACKUP_PATH", "./config.backup.json")
194
    
195
    # Works whether paths are local or cloud URIs
196
    if os.path.exists(config_path):
197
        with open(config_path, 'r') as f:
198
            config_data = f.read()
199
        
200
        with open(backup_path, 'w') as f:
201
            f.write(config_data)
202
        
203
        print(f"Backed up {config_path} to {backup_path}")
204

205
# Usage
206
# CONFIG_PATH=s3://config-bucket/prod-config.json
207
# BACKUP_PATH=s3://backup-bucket/config-backup.json
208
backup_config()  # Works with cloud paths!
209
```
210

211
### File Processing Pipelines
212

213
```python
214
from cloudpathlib import patch_all_builtins
215
import os
216
import glob
217
import shutil
218

219
patch_all_builtins()
220

221
def data_pipeline(input_dir, output_dir, pattern="*.csv"):
222
    """Data processing pipeline that works with any storage."""
223
    
224
    # Create output directory
225
    os.makedirs(output_dir, exist_ok=True)
226
    
227
    # Find all matching files
228
    search_pattern = os.path.join(input_dir, pattern)
229
    input_files = glob.glob(search_pattern)
230
    
231
    print(f"Found {len(input_files)} files matching {pattern}")
232
    
233
    for input_file in input_files:
234
        # Get file info
235
        filename = os.path.basename(input_file)
236
        file_size = os.path.getsize(input_file)
237
        
238
        print(f"Processing {filename} ({file_size} bytes)")
239
        
240
        # Read and process
241
        with open(input_file, 'r') as f:
242
            data = f.read()
243
        
244
        processed_data = data.upper()  # Example processing
245
        
246
        # Write output
247
        output_file = os.path.join(output_dir, f"processed_{filename}")
248
        with open(output_file, 'w') as f:
249
            f.write(processed_data)
250
        
251
        print(f"Wrote {output_file}")
252

253
# Works with any combination of local and cloud storage
254
data_pipeline(
255
    input_dir="s3://raw-data-bucket/csv/",
256
    output_dir="s3://processed-data-bucket/csv/",
257
    pattern="*.csv"
258
)
259

260
data_pipeline(
261
    input_dir="/local/input/",
262
    output_dir="gs://output-bucket/processed/",
263
    pattern="*.txt"
264
)
265
```
266

267
### Directory Traversal
268

269
```python
270
from cloudpathlib import patch_all_builtins
271
import os
272

273
patch_all_builtins()
274

275
def find_files_by_extension(root_dir, extension):
276
    """Find all files with given extension."""
277
    found_files = []
278
    
279
    # os.walk now works with cloud paths
280
    for dirpath, dirnames, filenames in os.walk(root_dir):
281
        for filename in filenames:
282
            if filename.endswith(extension):
283
                filepath = os.path.join(dirpath, filename)
284
                file_size = os.path.getsize(filepath)
285
                found_files.append({
286
                    'path': filepath,
287
                    'size': file_size,
288
                    'dir': dirpath
289
                })
290
    
291
    return found_files
292

293
# Works with cloud storage
294
python_files = find_files_by_extension("s3://code-bucket/", ".py")
295
log_files = find_files_by_extension("gs://logs-bucket/", ".log")
296

297
for file_info in python_files:
298
    print(f"Python file: {file_info['path']} ({file_info['size']} bytes)")
299
```
300

301
### CSV Processing Example
302

303
```python
304
from cloudpathlib import patch_all_builtins
305
import csv
306
import os
307
import glob
308

309
patch_all_builtins()
310

311
def process_csv_files(input_pattern, output_dir):
312
    """Process CSV files with standard library functions."""
313
    
314
    # Find all CSV files
315
    csv_files = glob.glob(input_pattern)
316
    
317
    # Create output directory
318
    os.makedirs(output_dir, exist_ok=True)
319
    
320
    for csv_file in csv_files:
321
        filename = os.path.basename(csv_file)
322
        output_file = os.path.join(output_dir, f"summary_{filename}")
323
        
324
        print(f"Processing {filename}")
325
        
326
        # Read CSV
327
        with open(csv_file, 'r', newline='') as infile:
328
            reader = csv.DictReader(infile)
329
            rows = list(reader)
330
        
331
        # Generate summary
332
        summary = {
333
            'filename': filename,
334
            'row_count': len(rows),
335
            'columns': list(rows[0].keys()) if rows else [],
336
            'file_size': os.path.getsize(csv_file)
337
        }
338
        
339
        # Write summary
340
        with open(output_file, 'w', newline='') as outfile:
341
            writer = csv.DictWriter(outfile, fieldnames=summary.keys())
342
            writer.writeheader()
343
            writer.writerow(summary)
344
        
345
        print(f"Summary written to {output_file}")
346

347
# Works with cloud CSV files
348
process_csv_files(
349
    input_pattern="s3://data-bucket/exports/*.csv",
350
    output_dir="s3://reports-bucket/summaries/"
351
)
352
```
353

354
### JSON Configuration Processing
355

356
```python
357
from cloudpathlib import patch_all_builtins
358
import json
359
import os
360
import glob
361

362
patch_all_builtins()
363

364
def merge_config_files(config_pattern, output_file):
365
    """Merge multiple JSON config files."""
366
    
367
    config_files = glob.glob(config_pattern)
368
    merged_config = {}
369
    
370
    for config_file in config_files:
371
        filename = os.path.basename(config_file)
372
        print(f"Loading config from {filename}")
373
        
374
        with open(config_file, 'r') as f:
375
            config_data = json.load(f)
376
        
377
        # Merge configuration
378
        merged_config.update(config_data)
379
    
380
    # Write merged configuration
381
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
382
    with open(output_file, 'w') as f:
383
        json.dump(merged_config, f, indent=2)
384
    
385
    print(f"Merged configuration written to {output_file}")
386
    return merged_config
387

388
# Merge cloud-based config files
389
merged = merge_config_files(
390
    config_pattern="s3://config-bucket/environments/*.json",
391
    output_file="s3://config-bucket/merged/production.json"
392
)
393
```
394

395
### Batch File Operations
396

397
```python
398
from cloudpathlib import patch_all_builtins
399
import os
400
import shutil
401
import glob
402

403
patch_all_builtins()
404

405
def organize_files_by_date(source_pattern, base_output_dir):
406
    """Organize files into date-based directories."""
407
    
408
    files_to_organize = glob.glob(source_pattern)
409
    
410
    for file_path in files_to_organize:
411
        # Get file modification time
412
        stat_info = os.stat(file_path)
413
        mod_time = stat_info.st_mtime
414
        
415
        # Create date-based directory structure
416
        from datetime import datetime
417
        date_str = datetime.fromtimestamp(mod_time).strftime("%Y/%m/%d")
418
        
419
        output_dir = os.path.join(base_output_dir, date_str)
420
        os.makedirs(output_dir, exist_ok=True)
421
        
422
        filename = os.path.basename(file_path)
423
        output_path = os.path.join(output_dir, filename)
424
        
425
        # Move file (copy for cross-cloud operations)
426
        print(f"Moving {filename} to {date_str}/")
427
        with open(file_path, 'rb') as src, open(output_path, 'wb') as dst:
428
            dst.write(src.read())
429
        
430
        # Remove original (be careful with this!)
431
        # os.remove(file_path)
432

433
# Organize cloud files by date
434
organize_files_by_date(
435
    source_pattern="s3://uploads-bucket/incoming/*",
436
    base_output_dir="s3://organized-bucket/by-date/"
437
)
438
```
439

440
### Error Handling with Patched Functions  
441

442
```python
443
from cloudpathlib import patch_all_builtins
444
import os
445
import glob
446

447
patch_all_builtins()
448

449
def safe_file_operations(file_pattern):
450
    """Demonstrate error handling with patched functions."""
451
    
452
    try:
453
        files = glob.glob(file_pattern)
454
        print(f"Found {len(files)} files")
455
        
456
        for file_path in files:
457
            try:
458
                # Check if file exists
459
                if os.path.exists(file_path):
460
                    # Get file info
461
                    size = os.path.getsize(file_path)
462
                    print(f"File: {os.path.basename(file_path)} ({size} bytes)")
463
                    
464
                    # Try to read file
465
                    with open(file_path, 'r') as f:
466
                        content = f.read(100)  # Read first 100 chars
467
                        print(f"Content preview: {content[:50]}...")
468
                
469
            except PermissionError:
470
                print(f"Permission denied: {file_path}")
471
            except UnicodeDecodeError:
472
                print(f"Binary file (skipping): {file_path}")
473
            except Exception as e:
474
                print(f"Error processing {file_path}: {e}")
475
    
476
    except Exception as e:
477
        print(f"Error with pattern {file_pattern}: {e}")
478

479
# Handle errors gracefully
480
safe_file_operations("s3://my-bucket/**/*.txt")
481
safe_file_operations("/nonexistent/path/*")
482
```

Version

Tile

Files

patching.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

patching.mddocs/