0
# Configuration and Enums
1
2
Configuration options for cache management, file handling modes, and other library settings that control behavior across all cloud providers. These settings allow fine-tuned control over caching, performance, and integration with existing systems.
3
4
## Capabilities
5
6
### FileCacheMode Enum
7
8
Configuration enum for controlling how CloudPathLib manages local file caching.
9
10
```python { .api }
11
class FileCacheMode(str, Enum):
12
"""File cache management strategies."""
13
14
persistent = "persistent"
15
"""
16
Cache persists until manually cleared.
17
Files remain cached across Python sessions.
18
"""
19
20
tmp_dir = "tmp_dir"
21
"""
22
Cache in temporary directory (default).
23
Files cached in system temp directory, may be cleaned by OS.
24
"""
25
26
cloudpath_object = "cloudpath_object"
27
"""
28
Cache cleared when CloudPath object is deleted.
29
Automatic cleanup when path objects go out of scope.
30
"""
31
32
close_file = "close_file"
33
"""
34
Cache cleared when file is closed.
35
Immediate cleanup after file operations complete.
36
"""
37
38
@classmethod
39
def from_environment(cls) -> "FileCacheMode":
40
"""
41
Parse cache mode from environment variable.
42
43
Returns:
44
FileCacheMode from CLOUDPATHLIB_CACHE_MODE env var
45
"""
46
```
47
48
### Implementation Registry
49
50
Global registry that tracks all available cloud provider implementations and their associated path and client classes.
51
52
```python { .api }
53
implementation_registry: typing.Dict[str, "CloudImplementation"]
54
"""
55
Global registry mapping cloud provider keys to their implementation metadata.
56
Keys: "s3", "gs", "azure", "http", "https"
57
"""
58
59
class CloudImplementation:
60
"""
61
Metadata container for cloud provider implementations.
62
63
Attributes:
64
name (str): Provider identifier ("s3", "gs", "azure", etc.)
65
dependencies_loaded (bool): Whether required dependencies are available
66
_client_class (Type[Client]): Client class for this provider
67
_path_class (Type[CloudPath]): Path class for this provider
68
"""
69
name: str
70
dependencies_loaded: bool = True
71
_client_class: typing.Type["Client"]
72
_path_class: typing.Type["CloudPath"]
73
74
def validate_completeness(self) -> None:
75
"""Validate that implementation has all required components."""
76
```
77
78
## Usage Examples
79
80
### Basic Cache Mode Configuration
81
82
```python
83
from cloudpathlib import FileCacheMode, S3Client, CloudPath
84
85
# Configure client with specific cache mode
86
client = S3Client(
87
file_cache_mode=FileCacheMode.persistent,
88
local_cache_dir="/var/cache/cloudpathlib"
89
)
90
91
# Create paths with configured caching
92
path = CloudPath("s3://my-bucket/large-file.dat", client=client)
93
94
# File is cached persistently
95
content = path.read_bytes() # Downloads and caches
96
content = path.read_bytes() # Uses cached version (no download)
97
98
# Cache persists across Python sessions
99
```
100
101
### Environment-Based Configuration
102
103
```python
104
import os
105
from cloudpathlib import FileCacheMode
106
107
# Set environment variable
108
os.environ["CLOUDPATHLIB_CACHE_MODE"] = "persistent"
109
110
# Parse from environment
111
cache_mode = FileCacheMode.from_environment()
112
print(f"Cache mode: {cache_mode}") # FileCacheMode.persistent
113
114
# Use in client configuration
115
client = S3Client(file_cache_mode=cache_mode)
116
```
117
118
### Different Cache Strategies
119
120
```python
121
# Persistent caching - files stay cached until manually cleared
122
persistent_client = S3Client(
123
file_cache_mode=FileCacheMode.persistent,
124
local_cache_dir="/persistent/cache"
125
)
126
127
# Temporary caching - system handles cleanup
128
temp_client = S3Client(
129
file_cache_mode=FileCacheMode.tmp_dir
130
)
131
132
# Object-scoped caching - cleared when path object is deleted
133
object_client = S3Client(
134
file_cache_mode=FileCacheMode.cloudpath_object
135
)
136
137
# File-scoped caching - cleared when file is closed
138
file_client = S3Client(
139
file_cache_mode=FileCacheMode.close_file
140
)
141
142
# Demonstrate different behaviors
143
path1 = CloudPath("s3://bucket/file.txt", client=persistent_client)
144
path2 = CloudPath("s3://bucket/file.txt", client=temp_client)
145
path3 = CloudPath("s3://bucket/file.txt", client=object_client)
146
path4 = CloudPath("s3://bucket/file.txt", client=file_client)
147
148
# Read files with different caching behaviors
149
content1 = path1.read_text() # Cached persistently
150
content2 = path2.read_text() # Cached in temp directory
151
content3 = path3.read_text() # Cached until path3 is deleted
152
content4 = path4.read_text() # Cache cleared immediately after read
153
```
154
155
### Performance-Oriented Configuration
156
157
```python
158
def configure_high_performance_client():
159
"""Configure client for high-performance scenarios."""
160
return S3Client(
161
file_cache_mode=FileCacheMode.persistent,
162
local_cache_dir="/fast/ssd/cache", # Use fast storage for cache
163
boto3_transfer_config=boto3.s3.transfer.TransferConfig(
164
multipart_threshold=1024 * 25, # 25MB
165
max_concurrency=10,
166
multipart_chunksize=1024 * 25,
167
use_threads=True
168
)
169
)
170
171
def configure_memory_constrained_client():
172
"""Configure client for memory-constrained environments."""
173
return S3Client(
174
file_cache_mode=FileCacheMode.close_file, # Immediate cleanup
175
local_cache_dir="/tmp/cloudpath_cache" # Use temp directory
176
)
177
178
# Use appropriate configuration
179
high_perf_client = configure_high_performance_client()
180
memory_client = configure_memory_constrained_client()
181
```
182
183
### Development vs Production Configuration
184
185
```python
186
import os
187
188
def get_cache_config():
189
"""Get cache configuration based on environment."""
190
environment = os.getenv("ENVIRONMENT", "development")
191
192
if environment == "production":
193
return {
194
"file_cache_mode": FileCacheMode.persistent,
195
"local_cache_dir": "/var/cache/app/cloudpathlib"
196
}
197
elif environment == "staging":
198
return {
199
"file_cache_mode": FileCacheMode.tmp_dir,
200
"local_cache_dir": "/tmp/staging_cache"
201
}
202
else: # development
203
return {
204
"file_cache_mode": FileCacheMode.cloudpath_object,
205
"local_cache_dir": "./dev_cache"
206
}
207
208
# Apply environment-specific configuration
209
cache_config = get_cache_config()
210
client = S3Client(**cache_config)
211
```
212
213
### Cache Directory Management
214
215
```python
216
import tempfile
217
import shutil
218
from pathlib import Path
219
220
class ManagedCacheDirectory:
221
"""Context manager for temporary cache directories."""
222
223
def __init__(self, prefix="cloudpath_"):
224
self.prefix = prefix
225
self.temp_dir = None
226
227
def __enter__(self):
228
self.temp_dir = Path(tempfile.mkdtemp(prefix=self.prefix))
229
return str(self.temp_dir)
230
231
def __exit__(self, exc_type, exc_val, exc_tb):
232
if self.temp_dir and self.temp_dir.exists():
233
shutil.rmtree(self.temp_dir)
234
235
# Use managed cache directory
236
with ManagedCacheDirectory() as cache_dir:
237
client = S3Client(
238
file_cache_mode=FileCacheMode.persistent,
239
local_cache_dir=cache_dir
240
)
241
242
path = CloudPath("s3://bucket/file.txt", client=client)
243
content = path.read_text() # Cached in managed directory
244
245
# Directory automatically cleaned up when exiting context
246
```
247
248
### Cache Monitoring
249
250
```python
251
import os
252
from pathlib import Path
253
254
def get_cache_stats(cache_dir):
255
"""Get statistics about cache directory."""
256
cache_path = Path(cache_dir)
257
258
if not cache_path.exists():
259
return {"exists": False}
260
261
files = list(cache_path.rglob("*"))
262
file_sizes = [f.stat().st_size for f in files if f.is_file()]
263
264
return {
265
"exists": True,
266
"total_files": len([f for f in files if f.is_file()]),
267
"total_directories": len([f for f in files if f.is_dir()]),
268
"total_size_bytes": sum(file_sizes),
269
"total_size_mb": sum(file_sizes) / (1024 * 1024),
270
"largest_file_bytes": max(file_sizes) if file_sizes else 0
271
}
272
273
# Monitor cache usage
274
cache_dir = "/tmp/cloudpath_cache"
275
client = S3Client(
276
file_cache_mode=FileCacheMode.persistent,
277
local_cache_dir=cache_dir
278
)
279
280
# Perform operations
281
path1 = CloudPath("s3://bucket/file1.txt", client=client)
282
path2 = CloudPath("s3://bucket/file2.txt", client=client)
283
284
content1 = path1.read_text()
285
content2 = path2.read_text()
286
287
# Check cache statistics
288
stats = get_cache_stats(cache_dir)
289
print(f"Cache stats: {stats}")
290
```
291
292
### Configuration Validation
293
294
```python
295
from pathlib import Path
296
297
def validate_cache_configuration(file_cache_mode, local_cache_dir):
298
"""Validate cache configuration settings."""
299
issues = []
300
301
# Validate cache mode
302
if not isinstance(file_cache_mode, FileCacheMode):
303
issues.append(f"Invalid cache mode: {file_cache_mode}")
304
305
# Validate cache directory
306
if local_cache_dir:
307
cache_path = Path(local_cache_dir)
308
309
# Check if parent directory exists
310
if not cache_path.parent.exists():
311
issues.append(f"Cache directory parent does not exist: {cache_path.parent}")
312
313
# Check if we can create the directory
314
try:
315
cache_path.mkdir(parents=True, exist_ok=True)
316
except PermissionError:
317
issues.append(f"Cannot create cache directory: {cache_path}")
318
319
# Check write permissions
320
if cache_path.exists() and not os.access(cache_path, os.W_OK):
321
issues.append(f"No write permission to cache directory: {cache_path}")
322
323
return issues
324
325
# Validate configuration before using
326
cache_mode = FileCacheMode.persistent
327
cache_dir = "/tmp/my_cache"
328
329
issues = validate_cache_configuration(cache_mode, cache_dir)
330
if issues:
331
print("Configuration issues:")
332
for issue in issues:
333
print(f" - {issue}")
334
else:
335
print("Configuration is valid")
336
client = S3Client(
337
file_cache_mode=cache_mode,
338
local_cache_dir=cache_dir
339
)
340
```
341
342
### Cache Cleanup Utilities
343
344
```python
345
import time
346
from datetime import datetime, timedelta
347
348
def cleanup_old_cache_files(cache_dir, max_age_days=7):
349
"""Remove cache files older than specified days."""
350
cache_path = Path(cache_dir)
351
352
if not cache_path.exists():
353
return 0
354
355
cutoff_time = time.time() - (max_age_days * 24 * 60 * 60)
356
removed_count = 0
357
358
for file_path in cache_path.rglob("*"):
359
if file_path.is_file():
360
if file_path.stat().st_mtime < cutoff_time:
361
file_path.unlink()
362
removed_count += 1
363
364
return removed_count
365
366
def cleanup_large_cache_files(cache_dir, max_size_mb=100):
367
"""Remove cache files larger than specified size."""
368
cache_path = Path(cache_dir)
369
370
if not cache_path.exists():
371
return 0
372
373
max_size_bytes = max_size_mb * 1024 * 1024
374
removed_count = 0
375
376
for file_path in cache_path.rglob("*"):
377
if file_path.is_file():
378
if file_path.stat().st_size > max_size_bytes:
379
file_path.unlink()
380
removed_count += 1
381
382
return removed_count
383
384
# Usage
385
cache_dir = "/tmp/cloudpath_cache"
386
387
# Clean up old files
388
old_files_removed = cleanup_old_cache_files(cache_dir, max_age_days=3)
389
print(f"Removed {old_files_removed} old cache files")
390
391
# Clean up large files
392
large_files_removed = cleanup_large_cache_files(cache_dir, max_size_mb=50)
393
print(f"Removed {large_files_removed} large cache files")
394
```
395
396
### Advanced Configuration Patterns
397
398
```python
399
class CacheConfiguration:
400
"""Advanced cache configuration management."""
401
402
def __init__(self):
403
self.configurations = {}
404
405
def register_config(self, name, **kwargs):
406
"""Register a named configuration."""
407
self.configurations[name] = kwargs
408
409
def get_client(self, config_name, client_class, **additional_args):
410
"""Create client with named configuration."""
411
config = self.configurations.get(config_name, {})
412
config.update(additional_args)
413
return client_class(**config)
414
415
# Set up configuration registry
416
cache_config = CacheConfiguration()
417
418
# Register different configurations
419
cache_config.register_config(
420
"high_performance",
421
file_cache_mode=FileCacheMode.persistent,
422
local_cache_dir="/fast/cache"
423
)
424
425
cache_config.register_config(
426
"low_memory",
427
file_cache_mode=FileCacheMode.close_file,
428
local_cache_dir="/tmp/cache"
429
)
430
431
cache_config.register_config(
432
"development",
433
file_cache_mode=FileCacheMode.cloudpath_object,
434
local_cache_dir="./dev_cache"
435
)
436
437
# Create clients with named configurations
438
high_perf_s3 = cache_config.get_client(
439
"high_performance",
440
S3Client,
441
aws_profile="production"
442
)
443
444
low_mem_gs = cache_config.get_client(
445
"low_memory",
446
GSClient,
447
project="my-project"
448
)
449
```
450
451
### Environment Variable Integration
452
453
```python
454
import os
455
456
class EnvironmentConfiguration:
457
"""Configuration management using environment variables."""
458
459
@staticmethod
460
def get_cache_mode():
461
"""Get cache mode from environment."""
462
mode_str = os.getenv("CLOUDPATHLIB_CACHE_MODE", "tmp_dir")
463
try:
464
return FileCacheMode(mode_str)
465
except ValueError:
466
print(f"Invalid cache mode '{mode_str}', using default")
467
return FileCacheMode.tmp_dir
468
469
@staticmethod
470
def get_cache_dir():
471
"""Get cache directory from environment."""
472
return os.getenv("CLOUDPATHLIB_CACHE_DIR")
473
474
@staticmethod
475
def is_caching_enabled():
476
"""Check if caching is enabled."""
477
return os.getenv("CLOUDPATHLIB_DISABLE_CACHE", "").lower() != "true"
478
479
@classmethod
480
def create_s3_client(cls):
481
"""Create S3 client from environment configuration."""
482
if not cls.is_caching_enabled():
483
# Disable caching
484
return S3Client(file_cache_mode=FileCacheMode.close_file)
485
486
return S3Client(
487
file_cache_mode=cls.get_cache_mode(),
488
local_cache_dir=cls.get_cache_dir()
489
)
490
491
# Usage with environment variables
492
"""
493
Environment setup:
494
export CLOUDPATHLIB_CACHE_MODE=persistent
495
export CLOUDPATHLIB_CACHE_DIR=/var/cache/myapp
496
export CLOUDPATHLIB_DISABLE_CACHE=false
497
"""
498
499
env_client = EnvironmentConfiguration.create_s3_client()
500
path = CloudPath("s3://bucket/file.txt", client=env_client)
501
```
502
503
### Configuration Documentation
504
505
```python
506
def print_configuration_help():
507
"""Print help for CloudPathLib configuration options."""
508
509
help_text = """
510
CloudPathLib Configuration Options
511
=================================
512
513
Environment Variables:
514
CLOUDPATHLIB_CACHE_MODE - Cache management mode
515
Values: persistent, tmp_dir, cloudpath_object, close_file
516
Default: tmp_dir
517
518
CLOUDPATHLIB_CACHE_DIR - Custom cache directory path
519
Default: System temp directory
520
521
CLOUDPATHLIB_DISABLE_CACHE - Disable all caching
522
Values: true, false
523
Default: false
524
525
CLOUDPATHLIB_PATCH_OPEN - Auto-patch open() function
526
Values: true, false
527
Default: false
528
529
CLOUDPATHLIB_PATCH_OS - Auto-patch os functions
530
Values: true, false
531
Default: false
532
533
CLOUDPATHLIB_PATCH_GLOB - Auto-patch glob functions
534
Values: true, false
535
Default: false
536
537
CLOUDPATHLIB_PATCH_ALL - Auto-patch all functions
538
Values: true, false
539
Default: false
540
541
Cache Modes:
542
persistent - Files cached until manually cleared
543
tmp_dir - Files cached in temp directory (default)
544
cloudpath_object - Cache cleared when CloudPath deleted
545
close_file - Cache cleared when file closed
546
547
Example Configuration:
548
export CLOUDPATHLIB_CACHE_MODE=persistent
549
export CLOUDPATHLIB_CACHE_DIR=/var/cache/myapp
550
export CLOUDPATHLIB_PATCH_ALL=true
551
"""
552
553
print(help_text)
554
555
# Show configuration help
556
print_configuration_help()
557
```