or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

anypath.mdazure-integration.mdclient-management.mdcloud-operations.mdconfiguration.mdcore-operations.mddirectory-operations.mdexceptions.mdfile-io.mdgcs-integration.mdhttp-support.mdindex.mdpatching.mds3-integration.md

configuration.mddocs/

0

# Configuration and Enums

1

2

Configuration options for cache management, file handling modes, and other library settings that control behavior across all cloud providers. These settings allow fine-tuned control over caching, performance, and integration with existing systems.

3

4

## Capabilities

5

6

### FileCacheMode Enum

7

8

Configuration enum for controlling how CloudPathLib manages local file caching.

9

10

```python { .api }

11

class FileCacheMode(str, Enum):

12

"""File cache management strategies."""

13

14

persistent = "persistent"

15

"""

16

Cache persists until manually cleared.

17

Files remain cached across Python sessions.

18

"""

19

20

tmp_dir = "tmp_dir"

21

"""

22

Cache in temporary directory (default).

23

Files cached in system temp directory, may be cleaned by OS.

24

"""

25

26

cloudpath_object = "cloudpath_object"

27

"""

28

Cache cleared when CloudPath object is deleted.

29

Automatic cleanup when path objects go out of scope.

30

"""

31

32

close_file = "close_file"

33

"""

34

Cache cleared when file is closed.

35

Immediate cleanup after file operations complete.

36

"""

37

38

@classmethod

39

def from_environment(cls) -> "FileCacheMode":

40

"""

41

Parse cache mode from environment variable.

42

43

Returns:

44

FileCacheMode from CLOUDPATHLIB_CACHE_MODE env var

45

"""

46

```

47

48

### Implementation Registry

49

50

Global registry that tracks all available cloud provider implementations and their associated path and client classes.

51

52

```python { .api }

53

implementation_registry: typing.Dict[str, "CloudImplementation"]

54

"""

55

Global registry mapping cloud provider keys to their implementation metadata.

56

Keys: "s3", "gs", "azure", "http", "https"

57

"""

58

59

class CloudImplementation:

60

"""

61

Metadata container for cloud provider implementations.

62

63

Attributes:

64

name (str): Provider identifier ("s3", "gs", "azure", etc.)

65

dependencies_loaded (bool): Whether required dependencies are available

66

_client_class (Type[Client]): Client class for this provider

67

_path_class (Type[CloudPath]): Path class for this provider

68

"""

69

name: str

70

dependencies_loaded: bool = True

71

_client_class: typing.Type["Client"]

72

_path_class: typing.Type["CloudPath"]

73

74

def validate_completeness(self) -> None:

75

"""Validate that implementation has all required components."""

76

```

77

78

## Usage Examples

79

80

### Basic Cache Mode Configuration

81

82

```python

83

from cloudpathlib import FileCacheMode, S3Client, CloudPath

84

85

# Configure client with specific cache mode

86

client = S3Client(

87

file_cache_mode=FileCacheMode.persistent,

88

local_cache_dir="/var/cache/cloudpathlib"

89

)

90

91

# Create paths with configured caching

92

path = CloudPath("s3://my-bucket/large-file.dat", client=client)

93

94

# File is cached persistently

95

content = path.read_bytes() # Downloads and caches

96

content = path.read_bytes() # Uses cached version (no download)

97

98

# Cache persists across Python sessions

99

```

100

101

### Environment-Based Configuration

102

103

```python

104

import os

105

from cloudpathlib import FileCacheMode

106

107

# Set environment variable

108

os.environ["CLOUDPATHLIB_CACHE_MODE"] = "persistent"

109

110

# Parse from environment

111

cache_mode = FileCacheMode.from_environment()

112

print(f"Cache mode: {cache_mode}") # FileCacheMode.persistent

113

114

# Use in client configuration

115

client = S3Client(file_cache_mode=cache_mode)

116

```

117

118

### Different Cache Strategies

119

120

```python

121

# Persistent caching - files stay cached until manually cleared

122

persistent_client = S3Client(

123

file_cache_mode=FileCacheMode.persistent,

124

local_cache_dir="/persistent/cache"

125

)

126

127

# Temporary caching - system handles cleanup

128

temp_client = S3Client(

129

file_cache_mode=FileCacheMode.tmp_dir

130

)

131

132

# Object-scoped caching - cleared when path object is deleted

133

object_client = S3Client(

134

file_cache_mode=FileCacheMode.cloudpath_object

135

)

136

137

# File-scoped caching - cleared when file is closed

138

file_client = S3Client(

139

file_cache_mode=FileCacheMode.close_file

140

)

141

142

# Demonstrate different behaviors

143

path1 = CloudPath("s3://bucket/file.txt", client=persistent_client)

144

path2 = CloudPath("s3://bucket/file.txt", client=temp_client)

145

path3 = CloudPath("s3://bucket/file.txt", client=object_client)

146

path4 = CloudPath("s3://bucket/file.txt", client=file_client)

147

148

# Read files with different caching behaviors

149

content1 = path1.read_text() # Cached persistently

150

content2 = path2.read_text() # Cached in temp directory

151

content3 = path3.read_text() # Cached until path3 is deleted

152

content4 = path4.read_text() # Cache cleared immediately after read

153

```

154

155

### Performance-Oriented Configuration

156

157

```python

158

def configure_high_performance_client():

159

"""Configure client for high-performance scenarios."""

160

return S3Client(

161

file_cache_mode=FileCacheMode.persistent,

162

local_cache_dir="/fast/ssd/cache", # Use fast storage for cache

163

boto3_transfer_config=boto3.s3.transfer.TransferConfig(

164

multipart_threshold=1024 * 25, # 25MB

165

max_concurrency=10,

166

multipart_chunksize=1024 * 25,

167

use_threads=True

168

)

169

)

170

171

def configure_memory_constrained_client():

172

"""Configure client for memory-constrained environments."""

173

return S3Client(

174

file_cache_mode=FileCacheMode.close_file, # Immediate cleanup

175

local_cache_dir="/tmp/cloudpath_cache" # Use temp directory

176

)

177

178

# Use appropriate configuration

179

high_perf_client = configure_high_performance_client()

180

memory_client = configure_memory_constrained_client()

181

```

182

183

### Development vs Production Configuration

184

185

```python

186

import os

187

188

def get_cache_config():

189

"""Get cache configuration based on environment."""

190

environment = os.getenv("ENVIRONMENT", "development")

191

192

if environment == "production":

193

return {

194

"file_cache_mode": FileCacheMode.persistent,

195

"local_cache_dir": "/var/cache/app/cloudpathlib"

196

}

197

elif environment == "staging":

198

return {

199

"file_cache_mode": FileCacheMode.tmp_dir,

200

"local_cache_dir": "/tmp/staging_cache"

201

}

202

else: # development

203

return {

204

"file_cache_mode": FileCacheMode.cloudpath_object,

205

"local_cache_dir": "./dev_cache"

206

}

207

208

# Apply environment-specific configuration

209

cache_config = get_cache_config()

210

client = S3Client(**cache_config)

211

```

212

213

### Cache Directory Management

214

215

```python

216

import tempfile

217

import shutil

218

from pathlib import Path

219

220

class ManagedCacheDirectory:

221

"""Context manager for temporary cache directories."""

222

223

def __init__(self, prefix="cloudpath_"):

224

self.prefix = prefix

225

self.temp_dir = None

226

227

def __enter__(self):

228

self.temp_dir = Path(tempfile.mkdtemp(prefix=self.prefix))

229

return str(self.temp_dir)

230

231

def __exit__(self, exc_type, exc_val, exc_tb):

232

if self.temp_dir and self.temp_dir.exists():

233

shutil.rmtree(self.temp_dir)

234

235

# Use managed cache directory

236

with ManagedCacheDirectory() as cache_dir:

237

client = S3Client(

238

file_cache_mode=FileCacheMode.persistent,

239

local_cache_dir=cache_dir

240

)

241

242

path = CloudPath("s3://bucket/file.txt", client=client)

243

content = path.read_text() # Cached in managed directory

244

245

# Directory automatically cleaned up when exiting context

246

```

247

248

### Cache Monitoring

249

250

```python

251

import os

252

from pathlib import Path

253

254

def get_cache_stats(cache_dir):

255

"""Get statistics about cache directory."""

256

cache_path = Path(cache_dir)

257

258

if not cache_path.exists():

259

return {"exists": False}

260

261

files = list(cache_path.rglob("*"))

262

file_sizes = [f.stat().st_size for f in files if f.is_file()]

263

264

return {

265

"exists": True,

266

"total_files": len([f for f in files if f.is_file()]),

267

"total_directories": len([f for f in files if f.is_dir()]),

268

"total_size_bytes": sum(file_sizes),

269

"total_size_mb": sum(file_sizes) / (1024 * 1024),

270

"largest_file_bytes": max(file_sizes) if file_sizes else 0

271

}

272

273

# Monitor cache usage

274

cache_dir = "/tmp/cloudpath_cache"

275

client = S3Client(

276

file_cache_mode=FileCacheMode.persistent,

277

local_cache_dir=cache_dir

278

)

279

280

# Perform operations

281

path1 = CloudPath("s3://bucket/file1.txt", client=client)

282

path2 = CloudPath("s3://bucket/file2.txt", client=client)

283

284

content1 = path1.read_text()

285

content2 = path2.read_text()

286

287

# Check cache statistics

288

stats = get_cache_stats(cache_dir)

289

print(f"Cache stats: {stats}")

290

```

291

292

### Configuration Validation

293

294

```python

295

from pathlib import Path

296

297

def validate_cache_configuration(file_cache_mode, local_cache_dir):

298

"""Validate cache configuration settings."""

299

issues = []

300

301

# Validate cache mode

302

if not isinstance(file_cache_mode, FileCacheMode):

303

issues.append(f"Invalid cache mode: {file_cache_mode}")

304

305

# Validate cache directory

306

if local_cache_dir:

307

cache_path = Path(local_cache_dir)

308

309

# Check if parent directory exists

310

if not cache_path.parent.exists():

311

issues.append(f"Cache directory parent does not exist: {cache_path.parent}")

312

313

# Check if we can create the directory

314

try:

315

cache_path.mkdir(parents=True, exist_ok=True)

316

except PermissionError:

317

issues.append(f"Cannot create cache directory: {cache_path}")

318

319

# Check write permissions

320

if cache_path.exists() and not os.access(cache_path, os.W_OK):

321

issues.append(f"No write permission to cache directory: {cache_path}")

322

323

return issues

324

325

# Validate configuration before using

326

cache_mode = FileCacheMode.persistent

327

cache_dir = "/tmp/my_cache"

328

329

issues = validate_cache_configuration(cache_mode, cache_dir)

330

if issues:

331

print("Configuration issues:")

332

for issue in issues:

333

print(f" - {issue}")

334

else:

335

print("Configuration is valid")

336

client = S3Client(

337

file_cache_mode=cache_mode,

338

local_cache_dir=cache_dir

339

)

340

```

341

342

### Cache Cleanup Utilities

343

344

```python

345

import time

346

from datetime import datetime, timedelta

347

348

def cleanup_old_cache_files(cache_dir, max_age_days=7):

349

"""Remove cache files older than specified days."""

350

cache_path = Path(cache_dir)

351

352

if not cache_path.exists():

353

return 0

354

355

cutoff_time = time.time() - (max_age_days * 24 * 60 * 60)

356

removed_count = 0

357

358

for file_path in cache_path.rglob("*"):

359

if file_path.is_file():

360

if file_path.stat().st_mtime < cutoff_time:

361

file_path.unlink()

362

removed_count += 1

363

364

return removed_count

365

366

def cleanup_large_cache_files(cache_dir, max_size_mb=100):

367

"""Remove cache files larger than specified size."""

368

cache_path = Path(cache_dir)

369

370

if not cache_path.exists():

371

return 0

372

373

max_size_bytes = max_size_mb * 1024 * 1024

374

removed_count = 0

375

376

for file_path in cache_path.rglob("*"):

377

if file_path.is_file():

378

if file_path.stat().st_size > max_size_bytes:

379

file_path.unlink()

380

removed_count += 1

381

382

return removed_count

383

384

# Usage

385

cache_dir = "/tmp/cloudpath_cache"

386

387

# Clean up old files

388

old_files_removed = cleanup_old_cache_files(cache_dir, max_age_days=3)

389

print(f"Removed {old_files_removed} old cache files")

390

391

# Clean up large files

392

large_files_removed = cleanup_large_cache_files(cache_dir, max_size_mb=50)

393

print(f"Removed {large_files_removed} large cache files")

394

```

395

396

### Advanced Configuration Patterns

397

398

```python

399

class CacheConfiguration:

400

"""Advanced cache configuration management."""

401

402

def __init__(self):

403

self.configurations = {}

404

405

def register_config(self, name, **kwargs):

406

"""Register a named configuration."""

407

self.configurations[name] = kwargs

408

409

def get_client(self, config_name, client_class, **additional_args):

410

"""Create client with named configuration."""

411

config = self.configurations.get(config_name, {})

412

config.update(additional_args)

413

return client_class(**config)

414

415

# Set up configuration registry

416

cache_config = CacheConfiguration()

417

418

# Register different configurations

419

cache_config.register_config(

420

"high_performance",

421

file_cache_mode=FileCacheMode.persistent,

422

local_cache_dir="/fast/cache"

423

)

424

425

cache_config.register_config(

426

"low_memory",

427

file_cache_mode=FileCacheMode.close_file,

428

local_cache_dir="/tmp/cache"

429

)

430

431

cache_config.register_config(

432

"development",

433

file_cache_mode=FileCacheMode.cloudpath_object,

434

local_cache_dir="./dev_cache"

435

)

436

437

# Create clients with named configurations

438

high_perf_s3 = cache_config.get_client(

439

"high_performance",

440

S3Client,

441

aws_profile="production"

442

)

443

444

low_mem_gs = cache_config.get_client(

445

"low_memory",

446

GSClient,

447

project="my-project"

448

)

449

```

450

451

### Environment Variable Integration

452

453

```python

454

import os

455

456

class EnvironmentConfiguration:

457

"""Configuration management using environment variables."""

458

459

@staticmethod

460

def get_cache_mode():

461

"""Get cache mode from environment."""

462

mode_str = os.getenv("CLOUDPATHLIB_CACHE_MODE", "tmp_dir")

463

try:

464

return FileCacheMode(mode_str)

465

except ValueError:

466

print(f"Invalid cache mode '{mode_str}', using default")

467

return FileCacheMode.tmp_dir

468

469

@staticmethod

470

def get_cache_dir():

471

"""Get cache directory from environment."""

472

return os.getenv("CLOUDPATHLIB_CACHE_DIR")

473

474

@staticmethod

475

def is_caching_enabled():

476

"""Check if caching is enabled."""

477

return os.getenv("CLOUDPATHLIB_DISABLE_CACHE", "").lower() != "true"

478

479

@classmethod

480

def create_s3_client(cls):

481

"""Create S3 client from environment configuration."""

482

if not cls.is_caching_enabled():

483

# Disable caching

484

return S3Client(file_cache_mode=FileCacheMode.close_file)

485

486

return S3Client(

487

file_cache_mode=cls.get_cache_mode(),

488

local_cache_dir=cls.get_cache_dir()

489

)

490

491

# Usage with environment variables

492

"""

493

Environment setup:

494

export CLOUDPATHLIB_CACHE_MODE=persistent

495

export CLOUDPATHLIB_CACHE_DIR=/var/cache/myapp

496

export CLOUDPATHLIB_DISABLE_CACHE=false

497

"""

498

499

env_client = EnvironmentConfiguration.create_s3_client()

500

path = CloudPath("s3://bucket/file.txt", client=env_client)

501

```

502

503

### Configuration Documentation

504

505

```python

506

def print_configuration_help():

507

"""Print help for CloudPathLib configuration options."""

508

509

help_text = """

510

CloudPathLib Configuration Options

511

=================================

512

513

Environment Variables:

514

CLOUDPATHLIB_CACHE_MODE - Cache management mode

515

Values: persistent, tmp_dir, cloudpath_object, close_file

516

Default: tmp_dir

517

518

CLOUDPATHLIB_CACHE_DIR - Custom cache directory path

519

Default: System temp directory

520

521

CLOUDPATHLIB_DISABLE_CACHE - Disable all caching

522

Values: true, false

523

Default: false

524

525

CLOUDPATHLIB_PATCH_OPEN - Auto-patch open() function

526

Values: true, false

527

Default: false

528

529

CLOUDPATHLIB_PATCH_OS - Auto-patch os functions

530

Values: true, false

531

Default: false

532

533

CLOUDPATHLIB_PATCH_GLOB - Auto-patch glob functions

534

Values: true, false

535

Default: false

536

537

CLOUDPATHLIB_PATCH_ALL - Auto-patch all functions

538

Values: true, false

539

Default: false

540

541

Cache Modes:

542

persistent - Files cached until manually cleared

543

tmp_dir - Files cached in temp directory (default)

544

cloudpath_object - Cache cleared when CloudPath deleted

545

close_file - Cache cleared when file closed

546

547

Example Configuration:

548

export CLOUDPATHLIB_CACHE_MODE=persistent

549

export CLOUDPATHLIB_CACHE_DIR=/var/cache/myapp

550

export CLOUDPATHLIB_PATCH_ALL=true

551

"""

552

553

print(help_text)

554

555

# Show configuration help

556

print_configuration_help()

557

```