or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

anypath.mdazure-integration.mdclient-management.mdcloud-operations.mdconfiguration.mdcore-operations.mddirectory-operations.mdexceptions.mdfile-io.mdgcs-integration.mdhttp-support.mdindex.mdpatching.mds3-integration.md

azure-integration.mddocs/

0

# Azure Blob Storage Integration

1

2

Azure Blob Storage support with Azure Active Directory authentication, hierarchical namespace support for ADLS Gen2, and Azure-specific blob operations. This implementation provides comprehensive access to Azure Blob Storage and Azure Data Lake Storage Gen2 capabilities.

3

4

## Capabilities

5

6

### AzureBlobPath Class

7

8

Azure Blob Storage-specific path implementation with access to Azure metadata and ADLS Gen2 support.

9

10

```python { .api }

11

class AzureBlobPath(CloudPath):

12

"""Azure Blob Storage path implementation."""

13

14

@property

15

def container(self) -> str:

16

"""

17

Azure container name.

18

19

Returns:

20

Container name from the Azure URI

21

"""

22

23

@property

24

def blob(self) -> str:

25

"""

26

Blob name (path within container).

27

28

Returns:

29

Blob name string

30

"""

31

32

@property

33

def etag(self) -> str:

34

"""

35

Azure blob ETag identifier.

36

37

Returns:

38

ETag string for the blob

39

"""

40

41

@property

42

def md5(self) -> str:

43

"""

44

MD5 hash of the blob content.

45

46

Returns:

47

MD5 hash string

48

"""

49

```

50

51

### AzureBlobClient Class

52

53

Azure Blob Storage client with comprehensive authentication and configuration options.

54

55

```python { .api }

56

class AzureBlobClient:

57

"""Azure Blob Storage client."""

58

59

def __init__(

60

self,

61

account_url: str = None,

62

credential = None,

63

connection_string: str = None,

64

blob_service_client = None,

65

data_lake_client = None,

66

file_cache_mode: FileCacheMode = None,

67

local_cache_dir: str = None,

68

content_type_method = None

69

):

70

"""

71

Initialize Azure Blob client.

72

73

Args:

74

account_url: Azure storage account URL

75

credential: Azure credential object (various types supported)

76

connection_string: Azure storage connection string

77

blob_service_client: Custom BlobServiceClient instance

78

data_lake_client: Custom DataLakeServiceClient for ADLS Gen2

79

file_cache_mode: Cache management strategy

80

local_cache_dir: Local directory for file cache

81

content_type_method: Function to determine MIME types

82

"""

83

```

84

85

## Usage Examples

86

87

### Basic Azure Blob Operations

88

89

```python

90

from cloudpathlib import AzureBlobPath, AzureBlobClient

91

92

# Create Azure path (uses default client)

93

az_path = AzureBlobPath("az://my-container/data/file.txt")

94

95

# Access Azure-specific properties

96

print(f"Container: {az_path.container}") # "my-container"

97

print(f"Blob: {az_path.blob}") # "data/file.txt"

98

99

# Check if blob exists and get metadata

100

if az_path.exists():

101

print(f"ETag: {az_path.etag}")

102

print(f"MD5: {az_path.md5}")

103

```

104

105

### Connection String Authentication

106

107

```python

108

# Use connection string from Azure portal

109

connection_string = (

110

"DefaultEndpointsProtocol=https;"

111

"AccountName=mystorageaccount;"

112

"AccountKey=myaccountkey;"

113

"EndpointSuffix=core.windows.net"

114

)

115

116

client = AzureBlobClient(connection_string=connection_string)

117

client.set_as_default_client()

118

119

# Create paths using connection string

120

az_path = AzureBlobPath("az://my-container/data.json")

121

content = az_path.read_text()

122

```

123

124

### Account Key Authentication

125

126

```python

127

from azure.storage.blob import BlobServiceClient

128

129

# Create client with account key

130

account_url = "https://mystorageaccount.blob.core.windows.net"

131

account_key = "your-account-key"

132

133

client = AzureBlobClient(

134

account_url=account_url,

135

credential=account_key

136

)

137

138

az_path = AzureBlobPath("az://my-container/file.txt", client=client)

139

```

140

141

### Azure Active Directory Authentication

142

143

```python

144

from azure.identity import DefaultAzureCredential, ClientSecretCredential

145

146

# Use default Azure credential (recommended for production)

147

credential = DefaultAzureCredential()

148

client = AzureBlobClient(

149

account_url="https://mystorageaccount.blob.core.windows.net",

150

credential=credential

151

)

152

153

# Or use service principal

154

credential = ClientSecretCredential(

155

tenant_id="your-tenant-id",

156

client_id="your-client-id",

157

client_secret="your-client-secret"

158

)

159

160

client = AzureBlobClient(

161

account_url="https://mystorageaccount.blob.core.windows.net",

162

credential=credential

163

)

164

```

165

166

### Managed Identity Authentication

167

168

```python

169

from azure.identity import ManagedIdentityCredential

170

171

# Use managed identity (for Azure VMs, App Service, etc.)

172

credential = ManagedIdentityCredential()

173

client = AzureBlobClient(

174

account_url="https://mystorageaccount.blob.core.windows.net",

175

credential=credential

176

)

177

178

# Use with specific client ID

179

credential = ManagedIdentityCredential(client_id="your-managed-identity-client-id")

180

```

181

182

### SAS Token Authentication

183

184

```python

185

# Use Shared Access Signature token

186

sas_token = "your-sas-token"

187

account_url = f"https://mystorageaccount.blob.core.windows.net?{sas_token}"

188

189

client = AzureBlobClient(account_url=account_url)

190

191

az_path = AzureBlobPath("az://my-container/file.txt", client=client)

192

```

193

194

### Azure Data Lake Storage Gen2 (ADLS Gen2)

195

196

```python

197

from azure.storage.filedatalake import DataLakeServiceClient

198

199

# ADLS Gen2 with hierarchical namespace support

200

dfs_client = DataLakeServiceClient(

201

account_url="https://mystorageaccount.dfs.core.windows.net",

202

credential=DefaultAzureCredential()

203

)

204

205

client = AzureBlobClient(data_lake_client=dfs_client)

206

207

# ADLS Gen2 supports true directory operations

208

adls_path = AzureBlobPath("az://filesystem/directory/", client=client)

209

adls_path.mkdir(parents=True, exist_ok=True)

210

211

# Create files in directory structure

212

file_path = adls_path / "data.txt"

213

file_path.write_text("ADLS Gen2 content")

214

```

215

216

### Blob Tiers and Storage Classes

217

218

```python

219

# Upload with specific access tier

220

def upload_with_tier(local_path, az_path, tier):

221

"""Upload blob with specific access tier."""

222

with open(local_path, 'rb') as data:

223

blob_client = az_path.client.blob_service_client.get_blob_client(

224

container=az_path.container,

225

blob=az_path.blob

226

)

227

blob_client.upload_blob(data, standard_blob_tier=tier, overwrite=True)

228

229

# Usage examples

230

az_path = AzureBlobPath("az://my-container/archive.zip")

231

upload_with_tier("data.zip", az_path, "Archive") # Cold storage

232

233

# Different access tiers: Hot, Cool, Archive

234

tiers = ["Hot", "Cool", "Archive"]

235

```

236

237

### Blob Metadata and Properties

238

239

```python

240

# Set custom metadata

241

def set_blob_metadata(az_path, metadata_dict):

242

"""Set custom metadata on Azure blob."""

243

blob_client = az_path.client.blob_service_client.get_blob_client(

244

container=az_path.container,

245

blob=az_path.blob

246

)

247

blob_client.set_blob_metadata(metadata=metadata_dict)

248

249

def get_blob_metadata(az_path):

250

"""Get blob metadata and properties."""

251

blob_client = az_path.client.blob_service_client.get_blob_client(

252

container=az_path.container,

253

blob=az_path.blob

254

)

255

properties = blob_client.get_blob_properties()

256

return properties.metadata, properties

257

258

# Usage

259

az_path = AzureBlobPath("az://my-container/document.pdf")

260

261

# Set metadata

262

set_blob_metadata(az_path, {

263

"author": "Data Team",

264

"project": "Analytics",

265

"version": "1.0"

266

})

267

268

# Read metadata and properties

269

metadata, properties = get_blob_metadata(az_path)

270

print(f"Metadata: {metadata}")

271

print(f"Content Type: {properties.content_settings.content_type}")

272

print(f"Last Modified: {properties.last_modified}")

273

```

274

275

### Lease Operations

276

277

```python

278

from azure.storage.blob import BlobLeaseClient

279

280

def acquire_blob_lease(az_path, lease_duration=60):

281

"""Acquire exclusive lease on blob."""

282

blob_client = az_path.client.blob_service_client.get_blob_client(

283

container=az_path.container,

284

blob=az_path.blob

285

)

286

287

lease_client = BlobLeaseClient(blob_client)

288

lease_id = lease_client.acquire(lease_duration=lease_duration)

289

return lease_client, lease_id

290

291

# Usage

292

az_path = AzureBlobPath("az://my-container/critical-file.txt")

293

lease_client, lease_id = acquire_blob_lease(az_path)

294

295

try:

296

# Perform operations with exclusive access

297

content = az_path.read_text()

298

modified_content = content + "\nModified with lease"

299

az_path.write_text(modified_content)

300

finally:

301

# Always release lease

302

lease_client.release()

303

```

304

305

### Batch Operations

306

307

```python

308

import concurrent.futures

309

from pathlib import Path

310

311

def upload_file_to_azure(local_path, az_base):

312

"""Upload single file to Azure."""

313

az_path = az_base / local_path.name

314

az_path.upload_from(local_path)

315

return az_path

316

317

# Parallel upload

318

local_files = list(Path("data/").glob("*.csv"))

319

az_base = AzureBlobPath("az://my-container/csv-data/")

320

321

with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:

322

futures = [executor.submit(upload_file_to_azure, f, az_base) for f in local_files]

323

324

for future in concurrent.futures.as_completed(futures):

325

try:

326

az_path = future.result()

327

print(f"Uploaded: {az_path}")

328

except Exception as e:

329

print(f"Upload failed: {e}")

330

```

331

332

### Snapshot Operations

333

334

```python

335

from datetime import datetime

336

337

def create_blob_snapshot(az_path):

338

"""Create snapshot of blob."""

339

blob_client = az_path.client.blob_service_client.get_blob_client(

340

container=az_path.container,

341

blob=az_path.blob

342

)

343

344

snapshot = blob_client.create_snapshot()

345

return snapshot['snapshot']

346

347

def list_blob_snapshots(az_path):

348

"""List all snapshots of a blob."""

349

container_client = az_path.client.blob_service_client.get_container_client(

350

az_path.container

351

)

352

353

snapshots = []

354

for blob in container_client.list_blobs(name_starts_with=az_path.blob, include=['snapshots']):

355

if blob.name == az_path.blob and blob.snapshot:

356

snapshots.append({

357

'snapshot': blob.snapshot,

358

'last_modified': blob.last_modified,

359

'size': blob.size

360

})

361

362

return sorted(snapshots, key=lambda x: x['last_modified'], reverse=True)

363

364

# Usage

365

az_path = AzureBlobPath("az://my-container/important.txt")

366

367

# Create snapshot before modification

368

snapshot_id = create_blob_snapshot(az_path)

369

print(f"Created snapshot: {snapshot_id}")

370

371

# List all snapshots

372

snapshots = list_blob_snapshots(az_path)

373

for snapshot in snapshots:

374

print(f"Snapshot {snapshot['snapshot']}: {snapshot['last_modified']}")

375

```

376

377

### Container Operations

378

379

```python

380

def create_container(container_name, client):

381

"""Create container with public access."""

382

container_client = client.blob_service_client.get_container_client(container_name)

383

384

try:

385

container_client.create_container(public_access='blob')

386

print(f"Created container: {container_name}")

387

except Exception as e:

388

print(f"Container creation failed: {e}")

389

390

def list_containers(client):

391

"""List all containers in storage account."""

392

containers = []

393

for container in client.blob_service_client.list_containers():

394

containers.append({

395

'name': container.name,

396

'last_modified': container.last_modified,

397

'public_access': container.public_access

398

})

399

return containers

400

401

# Usage

402

client = AzureBlobClient(connection_string=connection_string)

403

create_container("new-container", client)

404

containers = list_containers(client)

405

```

406

407

### Cross-Region Replication

408

409

```python

410

# Work with geo-replicated storage accounts

411

primary_client = AzureBlobClient(

412

account_url="https://mystorageaccount.blob.core.windows.net",

413

credential=credential

414

)

415

416

secondary_client = AzureBlobClient(

417

account_url="https://mystorageaccount-secondary.blob.core.windows.net",

418

credential=credential

419

)

420

421

# Read from secondary region (read-access geo-redundant storage)

422

primary_path = AzureBlobPath("az://my-container/data.txt", client=primary_client)

423

secondary_path = AzureBlobPath("az://my-container/data.txt", client=secondary_client)

424

425

try:

426

content = primary_path.read_text()

427

except Exception:

428

# Fallback to secondary region

429

content = secondary_path.read_text()

430

```

431

432

### Event Grid Integration

433

434

```python

435

# Work with Azure Event Grid for blob events

436

def setup_blob_monitoring(az_path):

437

"""Example of how blob operations can trigger events."""

438

439

# Note: Event Grid setup requires Azure portal configuration

440

# This shows the blob operations that can trigger events

441

442

# These operations can trigger blob events:

443

blob_operations = [

444

az_path.write_text("New content"), # BlobCreated

445

az_path.copy(az_path.with_suffix('.bak')), # BlobCreated

446

az_path.unlink(), # BlobDeleted

447

]

448

449

return blob_operations

450

451

# Usage

452

az_path = AzureBlobPath("az://monitored-container/file.txt")

453

setup_blob_monitoring(az_path)

454

```

455

456

### Performance Optimization

457

458

```python

459

# Configure for high-throughput operations

460

from azure.storage.blob import BlobServiceClient

461

from azure.core.pipeline.transport import RequestsTransport

462

463

# Custom transport with connection pooling

464

transport = RequestsTransport(

465

connection_pool_maxsize=100,

466

connection_pool_block=False

467

)

468

469

blob_service_client = BlobServiceClient(

470

account_url="https://mystorageaccount.blob.core.windows.net",

471

credential=credential,

472

transport=transport

473

)

474

475

client = AzureBlobClient(blob_service_client=blob_service_client)

476

477

# Performance monitoring

478

import time

479

az_path = AzureBlobPath("az://my-container/large-file.dat", client=client)

480

481

start_time = time.time()

482

az_path.download_to("local-large-file.dat")

483

duration = time.time() - start_time

484

print(f"Download completed in {duration:.2f} seconds")

485

```

486

487

### Error Handling

488

489

```python

490

from cloudpathlib import (

491

CloudPathFileNotFoundError,

492

MissingCredentialsError

493

)

494

from azure.core.exceptions import (

495

AzureError,

496

ResourceNotFoundError,

497

ClientAuthenticationError

498

)

499

500

try:

501

az_path = AzureBlobPath("az://nonexistent-container/file.txt")

502

content = az_path.read_text()

503

except CloudPathFileNotFoundError:

504

print("Azure blob not found")

505

except ClientAuthenticationError:

506

print("Azure authentication failed")

507

except ResourceNotFoundError:

508

print("Azure resource not found")

509

except AzureError as e:

510

print(f"Azure error: {e}")

511

```