or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

anypath.mdazure-integration.mdclient-management.mdcloud-operations.mdconfiguration.mdcore-operations.mddirectory-operations.mdexceptions.mdfile-io.mdgcs-integration.mdhttp-support.mdindex.mdpatching.mds3-integration.md

anypath.mddocs/

0

# Universal Path Handling

1

2

AnyPath provides intelligent dispatching between cloud paths and local filesystem paths, enabling code that works seamlessly with both local and cloud storage. This universal interface allows you to write path-agnostic code that automatically handles different storage backends.

3

4

## Capabilities

5

6

### AnyPath Class

7

8

Polymorphic constructor that automatically dispatches to the appropriate path type.

9

10

```python { .api }

11

class AnyPath:

12

"""Universal path constructor."""

13

14

def __new__(

15

cls,

16

*args,

17

**kwargs

18

) -> typing.Union[CloudPath, "pathlib.Path"]:

19

"""

20

Create appropriate path type based on input.

21

22

Args:

23

*args: Path arguments

24

**kwargs: Additional arguments

25

26

Returns:

27

CloudPath instance for cloud URIs, pathlib.Path for local paths

28

"""

29

30

@classmethod

31

def validate(cls, v):

32

"""

33

Pydantic validator for AnyPath instances.

34

35

Args:

36

v: Value to validate

37

38

Returns:

39

Validated path object

40

"""

41

```

42

43

### Helper Functions

44

45

Utility functions for path conversion and handling.

46

47

```python { .api }

48

def to_anypath(

49

s: typing.Union[str, "os.PathLike"]

50

) -> typing.Union[CloudPath, "pathlib.Path"]:

51

"""

52

Convert string or PathLike to appropriate path type.

53

54

Args:

55

s: String or path-like object

56

57

Returns:

58

CloudPath for cloud URIs, pathlib.Path for local paths

59

"""

60

```

61

62

## Usage Examples

63

64

### Basic AnyPath Usage

65

66

```python

67

from cloudpathlib import AnyPath

68

69

# Automatically dispatches to appropriate path type

70

cloud_path = AnyPath("s3://my-bucket/file.txt")

71

print(type(cloud_path)) # <class 'cloudpathlib.s3.S3Path'>

72

73

local_path = AnyPath("/home/user/file.txt")

74

print(type(local_path)) # <class 'pathlib.PosixPath'>

75

76

windows_path = AnyPath("C:\\Users\\user\\file.txt")

77

print(type(windows_path)) # <class 'pathlib.WindowsPath'>

78

79

# Works with different cloud providers

80

gcs_path = AnyPath("gs://my-bucket/file.txt")

81

azure_path = AnyPath("az://my-container/file.txt")

82

http_path = AnyPath("https://example.com/file.txt")

83

```

84

85

### Path-Agnostic Functions

86

87

```python

88

def process_file(path_str):

89

"""Process file regardless of storage location."""

90

path = AnyPath(path_str)

91

92

# Same API works for both local and cloud paths

93

if path.exists():

94

content = path.read_text()

95

96

# Process content

97

processed = content.upper()

98

99

# Write back to same location

100

output_path = path.with_stem(path.stem + "_processed")

101

output_path.write_text(processed)

102

103

return output_path

104

else:

105

raise FileNotFoundError(f"File not found: {path}")

106

107

# Works with any path type

108

local_result = process_file("/tmp/local_file.txt")

109

s3_result = process_file("s3://bucket/cloud_file.txt")

110

gcs_result = process_file("gs://bucket/gcs_file.txt")

111

```

112

113

### Configuration-Driven Path Handling

114

115

```python

116

import os

117

from pathlib import Path

118

119

def get_data_path(filename):

120

"""Get data path based on environment configuration."""

121

storage_type = os.getenv("STORAGE_TYPE", "local")

122

123

if storage_type == "local":

124

base_path = os.getenv("LOCAL_DATA_DIR", "./data")

125

return AnyPath(base_path) / filename

126

elif storage_type == "s3":

127

bucket = os.getenv("S3_BUCKET", "default-bucket")

128

return AnyPath(f"s3://{bucket}/data") / filename

129

elif storage_type == "gcs":

130

bucket = os.getenv("GCS_BUCKET", "default-bucket")

131

return AnyPath(f"gs://{bucket}/data") / filename

132

else:

133

raise ValueError(f"Unknown storage type: {storage_type}")

134

135

# Usage - works with any configured storage

136

data_file = get_data_path("dataset.csv")

137

print(f"Using: {data_file}")

138

139

# Read/write operations work the same regardless of backend

140

if data_file.exists():

141

data = data_file.read_text()

142

else:

143

data_file.write_text("id,name,value\n1,test,100")

144

```

145

146

### Batch Processing with Mixed Paths

147

148

```python

149

def process_file_list(file_paths):

150

"""Process list of files from different storage locations."""

151

results = []

152

153

for path_str in file_paths:

154

path = AnyPath(path_str)

155

156

print(f"Processing {path} (type: {type(path).__name__})")

157

158

if path.exists():

159

# Same operations work for all path types

160

size = path.stat().st_size

161

modified = path.stat().st_mtime

162

163

results.append({

164

'path': str(path),

165

'type': type(path).__name__,

166

'size': size,

167

'modified': modified

168

})

169

else:

170

print(f"Skipping non-existent file: {path}")

171

172

return results

173

174

# Mix of local and cloud paths

175

mixed_paths = [

176

"/home/user/local_file.txt",

177

"s3://my-bucket/s3_file.txt",

178

"gs://my-bucket/gcs_file.txt",

179

"az://my-container/azure_file.txt",

180

"C:\\Users\\user\\windows_file.txt"

181

]

182

183

results = process_file_list(mixed_paths)

184

for result in results:

185

print(f"{result['type']}: {result['path']} ({result['size']} bytes)")

186

```

187

188

### Data Pipeline with Flexible Storage

189

190

```python

191

class DataPipeline:

192

"""Data pipeline that works with any storage backend."""

193

194

def __init__(self, input_path, output_path, temp_dir=None):

195

self.input_path = AnyPath(input_path)

196

self.output_path = AnyPath(output_path)

197

self.temp_dir = AnyPath(temp_dir) if temp_dir else None

198

199

def process(self):

200

"""Run the pipeline."""

201

print(f"Input: {self.input_path} ({type(self.input_path).__name__})")

202

print(f"Output: {self.output_path} ({type(self.output_path).__name__})")

203

204

# Read input data

205

raw_data = self.input_path.read_text()

206

207

# Process data

208

processed_data = self.transform_data(raw_data)

209

210

# Write temporary result if temp directory specified

211

if self.temp_dir:

212

temp_file = self.temp_dir / f"temp_{self.input_path.name}"

213

temp_file.parent.mkdir(parents=True, exist_ok=True)

214

temp_file.write_text(processed_data)

215

print(f"Temp file: {temp_file}")

216

217

# Write final output

218

self.output_path.parent.mkdir(parents=True, exist_ok=True)

219

self.output_path.write_text(processed_data)

220

221

return self.output_path

222

223

def transform_data(self, data):

224

"""Transform the data (example transformation)."""

225

lines = data.strip().split('\n')

226

processed_lines = [f"PROCESSED: {line}" for line in lines]

227

return '\n'.join(processed_lines)

228

229

# Works with any combination of storage types

230

pipeline1 = DataPipeline(

231

input_path="s3://source-bucket/raw_data.txt",

232

output_path="/tmp/processed_data.txt",

233

temp_dir="gs://temp-bucket/pipeline-temp/"

234

)

235

236

pipeline2 = DataPipeline(

237

input_path="/home/user/input.txt",

238

output_path="az://output-container/result.txt"

239

)

240

241

# Same interface, different storage backends

242

result1 = pipeline1.process()

243

result2 = pipeline2.process()

244

```

245

246

### Dynamic Path Resolution

247

248

```python

249

def resolve_path(path_spec):

250

"""Resolve path specification to actual path."""

251

if isinstance(path_spec, dict):

252

# Dynamic path specification

253

storage_type = path_spec.get('type', 'local')

254

255

if storage_type == 'local':

256

base_dir = path_spec.get('base_dir', '.')

257

filename = path_spec['filename']

258

return AnyPath(base_dir) / filename

259

260

elif storage_type == 's3':

261

bucket = path_spec['bucket']

262

key = path_spec['key']

263

return AnyPath(f"s3://{bucket}/{key}")

264

265

elif storage_type == 'gcs':

266

bucket = path_spec['bucket']

267

blob = path_spec['blob']

268

return AnyPath(f"gs://{bucket}/{blob}")

269

270

elif storage_type == 'azure':

271

container = path_spec['container']

272

blob = path_spec['blob']

273

return AnyPath(f"az://{container}/{blob}")

274

275

else:

276

# Direct path specification

277

return AnyPath(path_spec)

278

279

# Example path specifications

280

path_specs = [

281

"/direct/local/path.txt",

282

"s3://direct-bucket/file.txt",

283

{

284

'type': 'local',

285

'base_dir': '/home/user/data',

286

'filename': 'config.json'

287

},

288

{

289

'type': 's3',

290

'bucket': 'my-data-bucket',

291

'key': 'processed/results.csv'

292

},

293

{

294

'type': 'gcs',

295

'bucket': 'analytics-bucket',

296

'blob': 'reports/monthly.pdf'

297

}

298

]

299

300

# Resolve all specifications

301

resolved_paths = [resolve_path(spec) for spec in path_specs]

302

for original, resolved in zip(path_specs, resolved_paths):

303

print(f"{original} -> {resolved} ({type(resolved).__name__})")

304

```

305

306

### Testing with Path Abstraction

307

308

```python

309

import tempfile

310

import pytest

311

312

class TestDataProcessor:

313

"""Test data processor with different storage backends."""

314

315

def setup_test_data(self, storage_type="local"):

316

"""Setup test data for different storage types."""

317

if storage_type == "local":

318

temp_dir = tempfile.mkdtemp()

319

test_file = AnyPath(temp_dir) / "test_data.txt"

320

else:

321

# Use environment variables for cloud testing

322

if storage_type == "s3":

323

test_file = AnyPath("s3://test-bucket/test_data.txt")

324

elif storage_type == "gcs":

325

test_file = AnyPath("gs://test-bucket/test_data.txt")

326

else:

327

pytest.skip(f"Storage type {storage_type} not configured for testing")

328

329

# Same setup code works for all storage types

330

test_file.write_text("line1\nline2\nline3")

331

return test_file

332

333

@pytest.mark.parametrize("storage_type", ["local", "s3", "gcs"])

334

def test_file_processing(self, storage_type):

335

"""Test file processing with different storage backends."""

336

test_file = self.setup_test_data(storage_type)

337

338

# Process file

339

result = process_file(str(test_file))

340

341

# Verify results work the same way

342

assert result.exists()

343

content = result.read_text()

344

assert "LINE1" in content # Assuming process_file converts to uppercase

345

346

# Cleanup

347

if storage_type == "local":

348

result.unlink()

349

test_file.unlink()

350

351

# Usage in configuration management

352

def load_config(config_path_spec):

353

"""Load configuration from various sources."""

354

config_path = AnyPath(config_path_spec)

355

356

if config_path.exists():

357

return json.loads(config_path.read_text())

358

else:

359

# Return default config

360

return {"default": True}

361

362

# Works with any path type

363

local_config = load_config("./config.json")

364

s3_config = load_config("s3://config-bucket/prod-config.json")

365

gcs_config = load_config("gs://config-bucket/staging-config.json")

366

```

367

368

### Helper Function Usage

369

370

```python

371

from cloudpathlib import to_anypath

372

373

# Convert various inputs to appropriate path types

374

paths = [

375

"/local/file.txt",

376

"s3://bucket/file.txt",

377

Path("/another/local/file.txt"),

378

"gs://bucket/data.json",

379

"https://example.com/api/data"

380

]

381

382

converted_paths = [to_anypath(p) for p in paths]

383

384

for original, converted in zip(paths, converted_paths):

385

print(f"{original} -> {type(converted).__name__}")

386

387

# Use in functions that accept string or path objects

388

def safe_read_file(path_input):

389

"""Safely read file from string or path object."""

390

path = to_anypath(path_input)

391

392

try:

393

return path.read_text()

394

except Exception as e:

395

print(f"Error reading {path}: {e}")

396

return None

397

398

# Works with any input type

399

content1 = safe_read_file("/tmp/file.txt")

400

content2 = safe_read_file("s3://bucket/file.txt")

401

content3 = safe_read_file(Path("/home/user/file.txt"))

402

```

403

404

### Pydantic Integration

405

406

```python

407

from pydantic import BaseModel

408

from cloudpathlib import AnyPath

409

410

class DataConfig(BaseModel):

411

"""Configuration model with path validation."""

412

413

input_path: AnyPath

414

output_path: AnyPath

415

temp_dir: AnyPath = None

416

417

class Config:

418

# Allow AnyPath types

419

arbitrary_types_allowed = True

420

421

# Validation works with any path type

422

config_data = {

423

"input_path": "s3://source-bucket/data.csv",

424

"output_path": "/tmp/processed.csv",

425

"temp_dir": "gs://temp-bucket/workspace/"

426

}

427

428

config = DataConfig(**config_data)

429

print(f"Input: {config.input_path} ({type(config.input_path).__name__})")

430

print(f"Output: {config.output_path} ({type(config.output_path).__name__})")

431

print(f"Temp: {config.temp_dir} ({type(config.temp_dir).__name__})")

432

433

# Use validated paths

434

if config.input_path.exists():

435

data = config.input_path.read_text()

436

config.output_path.write_text(data.upper())

437

```