0
# Azure Blob Storage Integration
1
2
Azure Blob Storage support with Azure Active Directory authentication, hierarchical namespace support for ADLS Gen2, and Azure-specific blob operations. This implementation provides comprehensive access to Azure Blob Storage and Azure Data Lake Storage Gen2 capabilities.
3
4
## Capabilities
5
6
### AzureBlobPath Class
7
8
Azure Blob Storage-specific path implementation with access to Azure metadata and ADLS Gen2 support.
9
10
```python { .api }
11
class AzureBlobPath(CloudPath):
12
"""Azure Blob Storage path implementation."""
13
14
@property
15
def container(self) -> str:
16
"""
17
Azure container name.
18
19
Returns:
20
Container name from the Azure URI
21
"""
22
23
@property
24
def blob(self) -> str:
25
"""
26
Blob name (path within container).
27
28
Returns:
29
Blob name string
30
"""
31
32
@property
33
def etag(self) -> str:
34
"""
35
Azure blob ETag identifier.
36
37
Returns:
38
ETag string for the blob
39
"""
40
41
@property
42
def md5(self) -> str:
43
"""
44
MD5 hash of the blob content.
45
46
Returns:
47
MD5 hash string
48
"""
49
```
50
51
### AzureBlobClient Class
52
53
Azure Blob Storage client with comprehensive authentication and configuration options.
54
55
```python { .api }
56
class AzureBlobClient:
57
"""Azure Blob Storage client."""
58
59
def __init__(
60
self,
61
account_url: str = None,
62
credential = None,
63
connection_string: str = None,
64
blob_service_client = None,
65
data_lake_client = None,
66
file_cache_mode: FileCacheMode = None,
67
local_cache_dir: str = None,
68
content_type_method = None
69
):
70
"""
71
Initialize Azure Blob client.
72
73
Args:
74
account_url: Azure storage account URL
75
credential: Azure credential object (various types supported)
76
connection_string: Azure storage connection string
77
blob_service_client: Custom BlobServiceClient instance
78
data_lake_client: Custom DataLakeServiceClient for ADLS Gen2
79
file_cache_mode: Cache management strategy
80
local_cache_dir: Local directory for file cache
81
content_type_method: Function to determine MIME types
82
"""
83
```
84
85
## Usage Examples
86
87
### Basic Azure Blob Operations
88
89
```python
90
from cloudpathlib import AzureBlobPath, AzureBlobClient
91
92
# Create Azure path (uses default client)
93
az_path = AzureBlobPath("az://my-container/data/file.txt")
94
95
# Access Azure-specific properties
96
print(f"Container: {az_path.container}") # "my-container"
97
print(f"Blob: {az_path.blob}") # "data/file.txt"
98
99
# Check if blob exists and get metadata
100
if az_path.exists():
101
print(f"ETag: {az_path.etag}")
102
print(f"MD5: {az_path.md5}")
103
```
104
105
### Connection String Authentication
106
107
```python
108
# Use connection string from Azure portal
109
connection_string = (
110
"DefaultEndpointsProtocol=https;"
111
"AccountName=mystorageaccount;"
112
"AccountKey=myaccountkey;"
113
"EndpointSuffix=core.windows.net"
114
)
115
116
client = AzureBlobClient(connection_string=connection_string)
117
client.set_as_default_client()
118
119
# Create paths using connection string
120
az_path = AzureBlobPath("az://my-container/data.json")
121
content = az_path.read_text()
122
```
123
124
### Account Key Authentication
125
126
```python
127
from azure.storage.blob import BlobServiceClient
128
129
# Create client with account key
130
account_url = "https://mystorageaccount.blob.core.windows.net"
131
account_key = "your-account-key"
132
133
client = AzureBlobClient(
134
account_url=account_url,
135
credential=account_key
136
)
137
138
az_path = AzureBlobPath("az://my-container/file.txt", client=client)
139
```
140
141
### Azure Active Directory Authentication
142
143
```python
144
from azure.identity import DefaultAzureCredential, ClientSecretCredential
145
146
# Use default Azure credential (recommended for production)
147
credential = DefaultAzureCredential()
148
client = AzureBlobClient(
149
account_url="https://mystorageaccount.blob.core.windows.net",
150
credential=credential
151
)
152
153
# Or use service principal
154
credential = ClientSecretCredential(
155
tenant_id="your-tenant-id",
156
client_id="your-client-id",
157
client_secret="your-client-secret"
158
)
159
160
client = AzureBlobClient(
161
account_url="https://mystorageaccount.blob.core.windows.net",
162
credential=credential
163
)
164
```
165
166
### Managed Identity Authentication
167
168
```python
169
from azure.identity import ManagedIdentityCredential
170
171
# Use managed identity (for Azure VMs, App Service, etc.)
172
credential = ManagedIdentityCredential()
173
client = AzureBlobClient(
174
account_url="https://mystorageaccount.blob.core.windows.net",
175
credential=credential
176
)
177
178
# Use with specific client ID
179
credential = ManagedIdentityCredential(client_id="your-managed-identity-client-id")
180
```
181
182
### SAS Token Authentication
183
184
```python
185
# Use Shared Access Signature token
186
sas_token = "your-sas-token"
187
account_url = f"https://mystorageaccount.blob.core.windows.net?{sas_token}"
188
189
client = AzureBlobClient(account_url=account_url)
190
191
az_path = AzureBlobPath("az://my-container/file.txt", client=client)
192
```
193
194
### Azure Data Lake Storage Gen2 (ADLS Gen2)
195
196
```python
197
from azure.storage.filedatalake import DataLakeServiceClient
198
199
# ADLS Gen2 with hierarchical namespace support
200
dfs_client = DataLakeServiceClient(
201
account_url="https://mystorageaccount.dfs.core.windows.net",
202
credential=DefaultAzureCredential()
203
)
204
205
client = AzureBlobClient(data_lake_client=dfs_client)
206
207
# ADLS Gen2 supports true directory operations
208
adls_path = AzureBlobPath("az://filesystem/directory/", client=client)
209
adls_path.mkdir(parents=True, exist_ok=True)
210
211
# Create files in directory structure
212
file_path = adls_path / "data.txt"
213
file_path.write_text("ADLS Gen2 content")
214
```
215
216
### Blob Tiers and Storage Classes
217
218
```python
219
# Upload with specific access tier
220
def upload_with_tier(local_path, az_path, tier):
221
"""Upload blob with specific access tier."""
222
with open(local_path, 'rb') as data:
223
blob_client = az_path.client.blob_service_client.get_blob_client(
224
container=az_path.container,
225
blob=az_path.blob
226
)
227
blob_client.upload_blob(data, standard_blob_tier=tier, overwrite=True)
228
229
# Usage examples
230
az_path = AzureBlobPath("az://my-container/archive.zip")
231
upload_with_tier("data.zip", az_path, "Archive") # Cold storage
232
233
# Different access tiers: Hot, Cool, Archive
234
tiers = ["Hot", "Cool", "Archive"]
235
```
236
237
### Blob Metadata and Properties
238
239
```python
240
# Set custom metadata
241
def set_blob_metadata(az_path, metadata_dict):
242
"""Set custom metadata on Azure blob."""
243
blob_client = az_path.client.blob_service_client.get_blob_client(
244
container=az_path.container,
245
blob=az_path.blob
246
)
247
blob_client.set_blob_metadata(metadata=metadata_dict)
248
249
def get_blob_metadata(az_path):
250
"""Get blob metadata and properties."""
251
blob_client = az_path.client.blob_service_client.get_blob_client(
252
container=az_path.container,
253
blob=az_path.blob
254
)
255
properties = blob_client.get_blob_properties()
256
return properties.metadata, properties
257
258
# Usage
259
az_path = AzureBlobPath("az://my-container/document.pdf")
260
261
# Set metadata
262
set_blob_metadata(az_path, {
263
"author": "Data Team",
264
"project": "Analytics",
265
"version": "1.0"
266
})
267
268
# Read metadata and properties
269
metadata, properties = get_blob_metadata(az_path)
270
print(f"Metadata: {metadata}")
271
print(f"Content Type: {properties.content_settings.content_type}")
272
print(f"Last Modified: {properties.last_modified}")
273
```
274
275
### Lease Operations
276
277
```python
278
from azure.storage.blob import BlobLeaseClient
279
280
def acquire_blob_lease(az_path, lease_duration=60):
281
"""Acquire exclusive lease on blob."""
282
blob_client = az_path.client.blob_service_client.get_blob_client(
283
container=az_path.container,
284
blob=az_path.blob
285
)
286
287
lease_client = BlobLeaseClient(blob_client)
288
lease_id = lease_client.acquire(lease_duration=lease_duration)
289
return lease_client, lease_id
290
291
# Usage
292
az_path = AzureBlobPath("az://my-container/critical-file.txt")
293
lease_client, lease_id = acquire_blob_lease(az_path)
294
295
try:
296
# Perform operations with exclusive access
297
content = az_path.read_text()
298
modified_content = content + "\nModified with lease"
299
az_path.write_text(modified_content)
300
finally:
301
# Always release lease
302
lease_client.release()
303
```
304
305
### Batch Operations
306
307
```python
308
import concurrent.futures
309
from pathlib import Path
310
311
def upload_file_to_azure(local_path, az_base):
312
"""Upload single file to Azure."""
313
az_path = az_base / local_path.name
314
az_path.upload_from(local_path)
315
return az_path
316
317
# Parallel upload
318
local_files = list(Path("data/").glob("*.csv"))
319
az_base = AzureBlobPath("az://my-container/csv-data/")
320
321
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
322
futures = [executor.submit(upload_file_to_azure, f, az_base) for f in local_files]
323
324
for future in concurrent.futures.as_completed(futures):
325
try:
326
az_path = future.result()
327
print(f"Uploaded: {az_path}")
328
except Exception as e:
329
print(f"Upload failed: {e}")
330
```
331
332
### Snapshot Operations
333
334
```python
335
from datetime import datetime
336
337
def create_blob_snapshot(az_path):
338
"""Create snapshot of blob."""
339
blob_client = az_path.client.blob_service_client.get_blob_client(
340
container=az_path.container,
341
blob=az_path.blob
342
)
343
344
snapshot = blob_client.create_snapshot()
345
return snapshot['snapshot']
346
347
def list_blob_snapshots(az_path):
348
"""List all snapshots of a blob."""
349
container_client = az_path.client.blob_service_client.get_container_client(
350
az_path.container
351
)
352
353
snapshots = []
354
for blob in container_client.list_blobs(name_starts_with=az_path.blob, include=['snapshots']):
355
if blob.name == az_path.blob and blob.snapshot:
356
snapshots.append({
357
'snapshot': blob.snapshot,
358
'last_modified': blob.last_modified,
359
'size': blob.size
360
})
361
362
return sorted(snapshots, key=lambda x: x['last_modified'], reverse=True)
363
364
# Usage
365
az_path = AzureBlobPath("az://my-container/important.txt")
366
367
# Create snapshot before modification
368
snapshot_id = create_blob_snapshot(az_path)
369
print(f"Created snapshot: {snapshot_id}")
370
371
# List all snapshots
372
snapshots = list_blob_snapshots(az_path)
373
for snapshot in snapshots:
374
print(f"Snapshot {snapshot['snapshot']}: {snapshot['last_modified']}")
375
```
376
377
### Container Operations
378
379
```python
380
def create_container(container_name, client):
381
"""Create container with public access."""
382
container_client = client.blob_service_client.get_container_client(container_name)
383
384
try:
385
container_client.create_container(public_access='blob')
386
print(f"Created container: {container_name}")
387
except Exception as e:
388
print(f"Container creation failed: {e}")
389
390
def list_containers(client):
391
"""List all containers in storage account."""
392
containers = []
393
for container in client.blob_service_client.list_containers():
394
containers.append({
395
'name': container.name,
396
'last_modified': container.last_modified,
397
'public_access': container.public_access
398
})
399
return containers
400
401
# Usage
402
client = AzureBlobClient(connection_string=connection_string)
403
create_container("new-container", client)
404
containers = list_containers(client)
405
```
406
407
### Cross-Region Replication
408
409
```python
410
# Work with geo-replicated storage accounts
411
primary_client = AzureBlobClient(
412
account_url="https://mystorageaccount.blob.core.windows.net",
413
credential=credential
414
)
415
416
secondary_client = AzureBlobClient(
417
account_url="https://mystorageaccount-secondary.blob.core.windows.net",
418
credential=credential
419
)
420
421
# Read from secondary region (read-access geo-redundant storage)
422
primary_path = AzureBlobPath("az://my-container/data.txt", client=primary_client)
423
secondary_path = AzureBlobPath("az://my-container/data.txt", client=secondary_client)
424
425
try:
426
content = primary_path.read_text()
427
except Exception:
428
# Fallback to secondary region
429
content = secondary_path.read_text()
430
```
431
432
### Event Grid Integration
433
434
```python
435
# Work with Azure Event Grid for blob events
436
def setup_blob_monitoring(az_path):
437
"""Example of how blob operations can trigger events."""
438
439
# Note: Event Grid setup requires Azure portal configuration
440
# This shows the blob operations that can trigger events
441
442
# These operations can trigger blob events:
443
blob_operations = [
444
az_path.write_text("New content"), # BlobCreated
445
az_path.copy(az_path.with_suffix('.bak')), # BlobCreated
446
az_path.unlink(), # BlobDeleted
447
]
448
449
return blob_operations
450
451
# Usage
452
az_path = AzureBlobPath("az://monitored-container/file.txt")
453
setup_blob_monitoring(az_path)
454
```
455
456
### Performance Optimization
457
458
```python
459
# Configure for high-throughput operations
460
from azure.storage.blob import BlobServiceClient
461
from azure.core.pipeline.transport import RequestsTransport
462
463
# Custom transport with connection pooling
464
transport = RequestsTransport(
465
connection_pool_maxsize=100,
466
connection_pool_block=False
467
)
468
469
blob_service_client = BlobServiceClient(
470
account_url="https://mystorageaccount.blob.core.windows.net",
471
credential=credential,
472
transport=transport
473
)
474
475
client = AzureBlobClient(blob_service_client=blob_service_client)
476
477
# Performance monitoring
478
import time
479
az_path = AzureBlobPath("az://my-container/large-file.dat", client=client)
480
481
start_time = time.time()
482
az_path.download_to("local-large-file.dat")
483
duration = time.time() - start_time
484
print(f"Download completed in {duration:.2f} seconds")
485
```
486
487
### Error Handling
488
489
```python
490
from cloudpathlib import (
491
CloudPathFileNotFoundError,
492
MissingCredentialsError
493
)
494
from azure.core.exceptions import (
495
AzureError,
496
ResourceNotFoundError,
497
ClientAuthenticationError
498
)
499
500
try:
501
az_path = AzureBlobPath("az://nonexistent-container/file.txt")
502
content = az_path.read_text()
503
except CloudPathFileNotFoundError:
504
print("Azure blob not found")
505
except ClientAuthenticationError:
506
print("Azure authentication failed")
507
except ResourceNotFoundError:
508
print("Azure resource not found")
509
except AzureError as e:
510
print(f"Azure error: {e}")
511
```