0
# Universal Path Handling
1
2
AnyPath provides intelligent dispatching between cloud paths and local filesystem paths, enabling code that works seamlessly with both local and cloud storage. This universal interface allows you to write path-agnostic code that automatically handles different storage backends.
3
4
## Capabilities
5
6
### AnyPath Class
7
8
Polymorphic constructor that automatically dispatches to the appropriate path type.
9
10
```python { .api }
11
class AnyPath:
12
"""Universal path constructor."""
13
14
def __new__(
15
cls,
16
*args,
17
**kwargs
18
) -> typing.Union[CloudPath, "pathlib.Path"]:
19
"""
20
Create appropriate path type based on input.
21
22
Args:
23
*args: Path arguments
24
**kwargs: Additional arguments
25
26
Returns:
27
CloudPath instance for cloud URIs, pathlib.Path for local paths
28
"""
29
30
@classmethod
31
def validate(cls, v):
32
"""
33
Pydantic validator for AnyPath instances.
34
35
Args:
36
v: Value to validate
37
38
Returns:
39
Validated path object
40
"""
41
```
42
43
### Helper Functions
44
45
Utility functions for path conversion and handling.
46
47
```python { .api }
48
def to_anypath(
49
s: typing.Union[str, "os.PathLike"]
50
) -> typing.Union[CloudPath, "pathlib.Path"]:
51
"""
52
Convert string or PathLike to appropriate path type.
53
54
Args:
55
s: String or path-like object
56
57
Returns:
58
CloudPath for cloud URIs, pathlib.Path for local paths
59
"""
60
```
61
62
## Usage Examples
63
64
### Basic AnyPath Usage
65
66
```python
67
from cloudpathlib import AnyPath
68
69
# Automatically dispatches to appropriate path type
70
cloud_path = AnyPath("s3://my-bucket/file.txt")
71
print(type(cloud_path)) # <class 'cloudpathlib.s3.S3Path'>
72
73
local_path = AnyPath("/home/user/file.txt")
74
print(type(local_path)) # <class 'pathlib.PosixPath'>
75
76
windows_path = AnyPath("C:\\Users\\user\\file.txt")
77
print(type(windows_path)) # <class 'pathlib.WindowsPath'>
78
79
# Works with different cloud providers
80
gcs_path = AnyPath("gs://my-bucket/file.txt")
81
azure_path = AnyPath("az://my-container/file.txt")
82
http_path = AnyPath("https://example.com/file.txt")
83
```
84
85
### Path-Agnostic Functions
86
87
```python
88
def process_file(path_str):
89
"""Process file regardless of storage location."""
90
path = AnyPath(path_str)
91
92
# Same API works for both local and cloud paths
93
if path.exists():
94
content = path.read_text()
95
96
# Process content
97
processed = content.upper()
98
99
# Write back to same location
100
output_path = path.with_stem(path.stem + "_processed")
101
output_path.write_text(processed)
102
103
return output_path
104
else:
105
raise FileNotFoundError(f"File not found: {path}")
106
107
# Works with any path type
108
local_result = process_file("/tmp/local_file.txt")
109
s3_result = process_file("s3://bucket/cloud_file.txt")
110
gcs_result = process_file("gs://bucket/gcs_file.txt")
111
```
112
113
### Configuration-Driven Path Handling
114
115
```python
116
import os
117
from pathlib import Path
118
119
def get_data_path(filename):
120
"""Get data path based on environment configuration."""
121
storage_type = os.getenv("STORAGE_TYPE", "local")
122
123
if storage_type == "local":
124
base_path = os.getenv("LOCAL_DATA_DIR", "./data")
125
return AnyPath(base_path) / filename
126
elif storage_type == "s3":
127
bucket = os.getenv("S3_BUCKET", "default-bucket")
128
return AnyPath(f"s3://{bucket}/data") / filename
129
elif storage_type == "gcs":
130
bucket = os.getenv("GCS_BUCKET", "default-bucket")
131
return AnyPath(f"gs://{bucket}/data") / filename
132
else:
133
raise ValueError(f"Unknown storage type: {storage_type}")
134
135
# Usage - works with any configured storage
136
data_file = get_data_path("dataset.csv")
137
print(f"Using: {data_file}")
138
139
# Read/write operations work the same regardless of backend
140
if data_file.exists():
141
data = data_file.read_text()
142
else:
143
data_file.write_text("id,name,value\n1,test,100")
144
```
145
146
### Batch Processing with Mixed Paths
147
148
```python
149
def process_file_list(file_paths):
150
"""Process list of files from different storage locations."""
151
results = []
152
153
for path_str in file_paths:
154
path = AnyPath(path_str)
155
156
print(f"Processing {path} (type: {type(path).__name__})")
157
158
if path.exists():
159
# Same operations work for all path types
160
size = path.stat().st_size
161
modified = path.stat().st_mtime
162
163
results.append({
164
'path': str(path),
165
'type': type(path).__name__,
166
'size': size,
167
'modified': modified
168
})
169
else:
170
print(f"Skipping non-existent file: {path}")
171
172
return results
173
174
# Mix of local and cloud paths
175
mixed_paths = [
176
"/home/user/local_file.txt",
177
"s3://my-bucket/s3_file.txt",
178
"gs://my-bucket/gcs_file.txt",
179
"az://my-container/azure_file.txt",
180
"C:\\Users\\user\\windows_file.txt"
181
]
182
183
results = process_file_list(mixed_paths)
184
for result in results:
185
print(f"{result['type']}: {result['path']} ({result['size']} bytes)")
186
```
187
188
### Data Pipeline with Flexible Storage
189
190
```python
191
class DataPipeline:
192
"""Data pipeline that works with any storage backend."""
193
194
def __init__(self, input_path, output_path, temp_dir=None):
195
self.input_path = AnyPath(input_path)
196
self.output_path = AnyPath(output_path)
197
self.temp_dir = AnyPath(temp_dir) if temp_dir else None
198
199
def process(self):
200
"""Run the pipeline."""
201
print(f"Input: {self.input_path} ({type(self.input_path).__name__})")
202
print(f"Output: {self.output_path} ({type(self.output_path).__name__})")
203
204
# Read input data
205
raw_data = self.input_path.read_text()
206
207
# Process data
208
processed_data = self.transform_data(raw_data)
209
210
# Write temporary result if temp directory specified
211
if self.temp_dir:
212
temp_file = self.temp_dir / f"temp_{self.input_path.name}"
213
temp_file.parent.mkdir(parents=True, exist_ok=True)
214
temp_file.write_text(processed_data)
215
print(f"Temp file: {temp_file}")
216
217
# Write final output
218
self.output_path.parent.mkdir(parents=True, exist_ok=True)
219
self.output_path.write_text(processed_data)
220
221
return self.output_path
222
223
def transform_data(self, data):
224
"""Transform the data (example transformation)."""
225
lines = data.strip().split('\n')
226
processed_lines = [f"PROCESSED: {line}" for line in lines]
227
return '\n'.join(processed_lines)
228
229
# Works with any combination of storage types
230
pipeline1 = DataPipeline(
231
input_path="s3://source-bucket/raw_data.txt",
232
output_path="/tmp/processed_data.txt",
233
temp_dir="gs://temp-bucket/pipeline-temp/"
234
)
235
236
pipeline2 = DataPipeline(
237
input_path="/home/user/input.txt",
238
output_path="az://output-container/result.txt"
239
)
240
241
# Same interface, different storage backends
242
result1 = pipeline1.process()
243
result2 = pipeline2.process()
244
```
245
246
### Dynamic Path Resolution
247
248
```python
249
def resolve_path(path_spec):
250
"""Resolve path specification to actual path."""
251
if isinstance(path_spec, dict):
252
# Dynamic path specification
253
storage_type = path_spec.get('type', 'local')
254
255
if storage_type == 'local':
256
base_dir = path_spec.get('base_dir', '.')
257
filename = path_spec['filename']
258
return AnyPath(base_dir) / filename
259
260
elif storage_type == 's3':
261
bucket = path_spec['bucket']
262
key = path_spec['key']
263
return AnyPath(f"s3://{bucket}/{key}")
264
265
elif storage_type == 'gcs':
266
bucket = path_spec['bucket']
267
blob = path_spec['blob']
268
return AnyPath(f"gs://{bucket}/{blob}")
269
270
elif storage_type == 'azure':
271
container = path_spec['container']
272
blob = path_spec['blob']
273
return AnyPath(f"az://{container}/{blob}")
274
275
else:
276
# Direct path specification
277
return AnyPath(path_spec)
278
279
# Example path specifications
280
path_specs = [
281
"/direct/local/path.txt",
282
"s3://direct-bucket/file.txt",
283
{
284
'type': 'local',
285
'base_dir': '/home/user/data',
286
'filename': 'config.json'
287
},
288
{
289
'type': 's3',
290
'bucket': 'my-data-bucket',
291
'key': 'processed/results.csv'
292
},
293
{
294
'type': 'gcs',
295
'bucket': 'analytics-bucket',
296
'blob': 'reports/monthly.pdf'
297
}
298
]
299
300
# Resolve all specifications
301
resolved_paths = [resolve_path(spec) for spec in path_specs]
302
for original, resolved in zip(path_specs, resolved_paths):
303
print(f"{original} -> {resolved} ({type(resolved).__name__})")
304
```
305
306
### Testing with Path Abstraction
307
308
```python
309
import tempfile
310
import pytest
311
312
class TestDataProcessor:
313
"""Test data processor with different storage backends."""
314
315
def setup_test_data(self, storage_type="local"):
316
"""Setup test data for different storage types."""
317
if storage_type == "local":
318
temp_dir = tempfile.mkdtemp()
319
test_file = AnyPath(temp_dir) / "test_data.txt"
320
else:
321
# Use environment variables for cloud testing
322
if storage_type == "s3":
323
test_file = AnyPath("s3://test-bucket/test_data.txt")
324
elif storage_type == "gcs":
325
test_file = AnyPath("gs://test-bucket/test_data.txt")
326
else:
327
pytest.skip(f"Storage type {storage_type} not configured for testing")
328
329
# Same setup code works for all storage types
330
test_file.write_text("line1\nline2\nline3")
331
return test_file
332
333
@pytest.mark.parametrize("storage_type", ["local", "s3", "gcs"])
334
def test_file_processing(self, storage_type):
335
"""Test file processing with different storage backends."""
336
test_file = self.setup_test_data(storage_type)
337
338
# Process file
339
result = process_file(str(test_file))
340
341
# Verify results work the same way
342
assert result.exists()
343
content = result.read_text()
344
assert "LINE1" in content # Assuming process_file converts to uppercase
345
346
# Cleanup
347
if storage_type == "local":
348
result.unlink()
349
test_file.unlink()
350
351
# Usage in configuration management
352
def load_config(config_path_spec):
353
"""Load configuration from various sources."""
354
config_path = AnyPath(config_path_spec)
355
356
if config_path.exists():
357
return json.loads(config_path.read_text())
358
else:
359
# Return default config
360
return {"default": True}
361
362
# Works with any path type
363
local_config = load_config("./config.json")
364
s3_config = load_config("s3://config-bucket/prod-config.json")
365
gcs_config = load_config("gs://config-bucket/staging-config.json")
366
```
367
368
### Helper Function Usage
369
370
```python
371
from cloudpathlib import to_anypath
372
373
# Convert various inputs to appropriate path types
374
paths = [
375
"/local/file.txt",
376
"s3://bucket/file.txt",
377
Path("/another/local/file.txt"),
378
"gs://bucket/data.json",
379
"https://example.com/api/data"
380
]
381
382
converted_paths = [to_anypath(p) for p in paths]
383
384
for original, converted in zip(paths, converted_paths):
385
print(f"{original} -> {type(converted).__name__}")
386
387
# Use in functions that accept string or path objects
388
def safe_read_file(path_input):
389
"""Safely read file from string or path object."""
390
path = to_anypath(path_input)
391
392
try:
393
return path.read_text()
394
except Exception as e:
395
print(f"Error reading {path}: {e}")
396
return None
397
398
# Works with any input type
399
content1 = safe_read_file("/tmp/file.txt")
400
content2 = safe_read_file("s3://bucket/file.txt")
401
content3 = safe_read_file(Path("/home/user/file.txt"))
402
```
403
404
### Pydantic Integration
405
406
```python
407
from pydantic import BaseModel
408
from cloudpathlib import AnyPath
409
410
class DataConfig(BaseModel):
411
"""Configuration model with path validation."""
412
413
input_path: AnyPath
414
output_path: AnyPath
415
temp_dir: AnyPath = None
416
417
class Config:
418
# Allow AnyPath types
419
arbitrary_types_allowed = True
420
421
# Validation works with any path type
422
config_data = {
423
"input_path": "s3://source-bucket/data.csv",
424
"output_path": "/tmp/processed.csv",
425
"temp_dir": "gs://temp-bucket/workspace/"
426
}
427
428
config = DataConfig(**config_data)
429
print(f"Input: {config.input_path} ({type(config.input_path).__name__})")
430
print(f"Output: {config.output_path} ({type(config.output_path).__name__})")
431
print(f"Temp: {config.temp_dir} ({type(config.temp_dir).__name__})")
432
433
# Use validated paths
434
if config.input_path.exists():
435
data = config.input_path.read_text()
436
config.output_path.write_text(data.upper())
437
```