0
# Azure File Share
1
2
Comprehensive Azure File Share integration for managing file shares, directories, and files within Azure Storage. Azure File Share provides fully managed file shares in the cloud accessible via SMB protocol and REST API.
3
4
## Capabilities
5
6
### Azure File Share Hook
7
8
Core hook for connecting to and managing Azure File Share resources including shares, directories, and file operations.
9
10
```python { .api }
11
class AzureFileShareHook(BaseHook):
12
"""
13
Hook for Azure File Share operations.
14
15
Provides methods for managing file shares, directories, and files
16
with support for various authentication methods.
17
"""
18
19
def __init__(
20
self,
21
share_name: str | None = None,
22
file_path: str | None = None,
23
directory_path: str | None = None,
24
azure_fileshare_conn_id: str = "azure_fileshare_default",
25
): ...
26
27
def get_conn(self) -> None: ...
28
29
@property
30
def share_service_client(self) -> ShareServiceClient: ...
31
32
@property
33
def share_directory_client(self) -> ShareDirectoryClient: ...
34
35
@property
36
def share_file_client(self) -> ShareFileClient: ...
37
38
def check_for_directory(self) -> bool: ...
39
40
def list_directories_and_files(self) -> list: ...
41
42
def list_files(self) -> list[str]: ...
43
44
def create_share(self, share_name: str, **kwargs) -> bool: ...
45
46
def delete_share(self, share_name: str, **kwargs) -> bool: ...
47
48
def create_directory(self, **kwargs) -> Any: ...
49
50
def get_file(self, file_path: str, **kwargs) -> None: ...
51
52
def get_file_to_stream(self, stream: IO, **kwargs) -> None: ...
53
54
def load_file(self, file_path: str, **kwargs) -> None: ...
55
56
def load_data(self, string_data: bytes | str | IO, **kwargs) -> None: ...
57
```
58
59
## Usage Examples
60
61
### Basic File Share Operations
62
63
```python
64
from airflow import DAG
65
from airflow.providers.microsoft.azure.hooks.fileshare import AzureFileShareHook
66
from datetime import datetime, timedelta
67
68
dag = DAG(
69
'azure_fileshare_example',
70
default_args={'owner': 'data-team'},
71
description='Azure File Share operations',
72
schedule_interval=timedelta(days=1),
73
start_date=datetime(2024, 1, 1),
74
catchup=False
75
)
76
77
def manage_fileshare_operations(**context):
78
# Initialize hook
79
hook = AzureFileShareHook(
80
share_name='data-share',
81
azure_fileshare_conn_id='azure_fileshare_connection'
82
)
83
84
# Create file share
85
hook.create_share('data-share')
86
87
# Create directory structure
88
hook = AzureFileShareHook(
89
share_name='data-share',
90
directory_path='processed/2024/01',
91
azure_fileshare_conn_id='azure_fileshare_connection'
92
)
93
hook.create_directory()
94
95
# Upload file
96
hook = AzureFileShareHook(
97
share_name='data-share',
98
file_path='processed/2024/01/data.csv',
99
azure_fileshare_conn_id='azure_fileshare_connection'
100
)
101
hook.load_file('/local/path/to/data.csv')
102
103
# List files in directory
104
hook = AzureFileShareHook(
105
share_name='data-share',
106
directory_path='processed/2024/01',
107
azure_fileshare_conn_id='azure_fileshare_connection'
108
)
109
files = hook.list_files()
110
print(f"Files in directory: {files}")
111
112
# Create PythonOperator task
113
from airflow.operators.python import PythonOperator
114
115
fileshare_task = PythonOperator(
116
task_id='manage_fileshare',
117
python_callable=manage_fileshare_operations,
118
dag=dag
119
)
120
```
121
122
### File Share Data Processing Pipeline
123
124
```python
125
def process_shared_files(**context):
126
# Hook for reading files
127
read_hook = AzureFileShareHook(
128
share_name='input-data',
129
directory_path='raw',
130
azure_fileshare_conn_id='azure_fileshare_connection'
131
)
132
133
# List all files to process
134
files = read_hook.list_files()
135
136
for file_name in files:
137
# Download file for processing
138
file_hook = AzureFileShareHook(
139
share_name='input-data',
140
file_path=f'raw/{file_name}',
141
azure_fileshare_conn_id='azure_fileshare_connection'
142
)
143
144
# Download to local temp file
145
import tempfile
146
with tempfile.NamedTemporaryFile() as temp_file:
147
file_hook.get_file_to_stream(temp_file)
148
149
# Process the file (your custom logic here)
150
processed_data = process_data_from_stream(temp_file)
151
152
# Upload processed file to output share
153
output_hook = AzureFileShareHook(
154
share_name='processed-data',
155
file_path=f'processed/{file_name}',
156
azure_fileshare_conn_id='azure_fileshare_connection'
157
)
158
output_hook.load_data(processed_data)
159
160
processing_task = PythonOperator(
161
task_id='process_shared_files',
162
python_callable=process_shared_files,
163
dag=dag
164
)
165
```
166
167
### File Share with Directory Management
168
169
```python
170
def organize_file_share(**context):
171
base_hook = AzureFileShareHook(
172
share_name='document-archive',
173
azure_fileshare_conn_id='azure_fileshare_connection'
174
)
175
176
# Create share if it doesn't exist
177
base_hook.create_share('document-archive')
178
179
# Create organized directory structure
180
directories = [
181
'documents/2024/invoices',
182
'documents/2024/reports',
183
'documents/2024/contracts',
184
'templates/email',
185
'templates/reports'
186
]
187
188
for directory_path in directories:
189
dir_hook = AzureFileShareHook(
190
share_name='document-archive',
191
directory_path=directory_path,
192
azure_fileshare_conn_id='azure_fileshare_connection'
193
)
194
195
# Check if directory exists
196
if not dir_hook.check_for_directory():
197
dir_hook.create_directory()
198
print(f"Created directory: {directory_path}")
199
200
# List contents of each directory
201
contents = dir_hook.list_directories_and_files()
202
print(f"Contents of {directory_path}: {contents}")
203
204
organization_task = PythonOperator(
205
task_id='organize_file_share',
206
python_callable=organize_file_share,
207
dag=dag
208
)
209
```
210
211
## Authentication and Connection
212
213
Azure File Share supports multiple authentication methods:
214
215
- **Account Key**: Storage account name and access key
216
- **SAS Token**: Shared Access Signature for granular permissions
217
- **Connection String**: Complete connection string with authentication
218
- **Managed Identity**: For Azure-hosted Airflow instances
219
- **DefaultAzureCredential**: Azure SDK default credential chain
220
221
Connection configuration requires the storage account information and chosen authentication method in the connection extras.
222
223
## File Share Features
224
225
Azure File Share provides:
226
227
- **SMB Protocol**: Standard SMB 3.0 file sharing protocol
228
- **REST API**: HTTP-based file operations
229
- **Directory Structure**: Hierarchical file organization
230
- **Cross-Platform**: Windows, Linux, and macOS compatibility
231
- **Shared Access**: Multiple clients can access simultaneously
232
- **Snapshots**: Point-in-time share snapshots
233
- **Premium Performance**: High IOPS and low latency options
234
235
## Types
236
237
```python { .api }
238
# Azure File Share client types
239
class ShareServiceClient:
240
"""Client for managing file share service operations."""
241
def create_share(self, share_name: str, **kwargs) -> ShareClient: ...
242
def delete_share(self, share_name: str, **kwargs) -> None: ...
243
def list_shares(self, **kwargs) -> ItemPaged[ShareProperties]: ...
244
245
class ShareDirectoryClient:
246
"""Client for directory operations within a file share."""
247
def create_directory(self, **kwargs) -> dict[str, Any]: ...
248
def delete_directory(self, **kwargs) -> None: ...
249
def list_directories_and_files(self, **kwargs) -> ItemPaged[dict[str, Any]]: ...
250
251
class ShareFileClient:
252
"""Client for file operations within a file share."""
253
def upload_file(self, data: Any, **kwargs) -> dict[str, Any]: ...
254
def download_file(self, **kwargs) -> StorageStreamDownloader: ...
255
def delete_file(self, **kwargs) -> None: ...
256
257
class FileProperties:
258
"""Properties of a file in Azure File Share."""
259
name: str
260
size: int
261
last_modified: datetime
262
content_type: str
263
```