0
# File Operations
1
2
File management capabilities for accessing, downloading, and managing files on compute nodes and task outputs. This includes retrieving task output files, log files, and other files created during task execution.
3
4
## Capabilities
5
6
### Task File Operations
7
8
Manage files associated with specific tasks including output files, logs, and working directory contents.
9
10
```python { .api }
11
def list_from_task(job_id, task_id, file_list_from_task_options=None, custom_headers=None, raw=False, **operation_config):
12
"""
13
List files associated with the specified task.
14
15
Args:
16
job_id: ID of the job containing the task
17
task_id: ID of the task
18
file_list_from_task_options: Additional options for listing including recursive
19
20
Returns:
21
ItemPaged[FileProperties]: Paginated list of files
22
"""
23
24
def get_from_task(job_id, task_id, file_path, file_get_from_task_options=None, custom_headers=None, raw=False, **operation_config):
25
"""
26
Get the content of the specified task file.
27
28
Args:
29
job_id: ID of the job containing the task
30
task_id: ID of the task
31
file_path: Path to the file relative to task working directory
32
file_get_from_task_options: Additional options including byte range
33
34
Returns:
35
Stream: File content stream
36
"""
37
38
def get_properties_from_task(job_id, task_id, file_path, file_get_properties_from_task_options=None, custom_headers=None, raw=False, **operation_config):
39
"""
40
Get properties of the specified task file.
41
42
Args:
43
job_id: ID of the job containing the task
44
task_id: ID of the task
45
file_path: Path to the file relative to task working directory
46
file_get_properties_from_task_options: Additional options
47
48
Returns:
49
None (properties returned in response headers)
50
"""
51
52
def delete_from_task(job_id, task_id, file_path, file_delete_from_task_options=None, custom_headers=None, raw=False, **operation_config):
53
"""
54
Delete the specified task file.
55
56
Args:
57
job_id: ID of the job containing the task
58
task_id: ID of the task
59
file_path: Path to the file relative to task working directory
60
file_delete_from_task_options: Additional options
61
62
Returns:
63
None
64
"""
65
```
66
67
### Compute Node File Operations
68
69
Manage files on compute nodes including system files, application files, and shared data.
70
71
```python { .api }
72
def list_from_compute_node(pool_id, node_id, file_list_from_compute_node_options=None, custom_headers=None, raw=False, **operation_config):
73
"""
74
List files on the specified compute node.
75
76
Args:
77
pool_id: ID of the pool containing the node
78
node_id: ID of the compute node
79
file_list_from_compute_node_options: Additional options for listing including recursive
80
81
Returns:
82
ItemPaged[FileProperties]: Paginated list of files
83
"""
84
85
def get_from_compute_node(pool_id, node_id, file_path, file_get_from_compute_node_options=None, custom_headers=None, raw=False, **operation_config):
86
"""
87
Get the content of the specified file from a compute node.
88
89
Args:
90
pool_id: ID of the pool containing the node
91
node_id: ID of the compute node
92
file_path: Path to the file on the compute node
93
file_get_from_compute_node_options: Additional options including byte range
94
95
Returns:
96
Stream: File content stream
97
"""
98
99
def get_properties_from_compute_node(pool_id, node_id, file_path, file_get_properties_from_compute_node_options=None, custom_headers=None, raw=False, **operation_config):
100
"""
101
Get properties of the specified file on a compute node.
102
103
Args:
104
pool_id: ID of the pool containing the node
105
node_id: ID of the compute node
106
file_path: Path to the file on the compute node
107
file_get_properties_from_compute_node_options: Additional options
108
109
Returns:
110
None (properties returned in response headers)
111
"""
112
113
def delete_from_compute_node(pool_id, node_id, file_path, file_delete_from_compute_node_options=None, custom_headers=None, raw=False, **operation_config):
114
"""
115
Delete the specified file from a compute node.
116
117
Args:
118
pool_id: ID of the pool containing the node
119
node_id: ID of the compute node
120
file_path: Path to the file on the compute node
121
file_delete_from_compute_node_options: Additional options
122
123
Returns:
124
None
125
"""
126
```
127
128
## Usage Examples
129
130
### Listing and Retrieving Task Files
131
132
```python
133
# List all files for a task
134
files = client.file.list_from_task("my-job", "task-001")
135
for file_info in files:
136
print(f"File: {file_info.name}")
137
print(f" Size: {file_info.properties.content_length} bytes")
138
print(f" Modified: {file_info.properties.last_modified}")
139
print(f" Type: {'Directory' if file_info.is_directory else 'File'}")
140
141
# List files recursively in subdirectories
142
from azure.batch.models import FileListFromTaskOptions
143
list_options = FileListFromTaskOptions(recursive=True)
144
files = client.file.list_from_task("my-job", "task-001", list_options)
145
146
# Download stdout and stderr files
147
stdout_content = client.file.get_from_task("my-job", "task-001", "stdout.txt")
148
with open("local_stdout.txt", "wb") as f:
149
for chunk in stdout_content:
150
f.write(chunk)
151
152
stderr_content = client.file.get_from_task("my-job", "task-001", "stderr.txt")
153
with open("local_stderr.txt", "wb") as f:
154
for chunk in stderr_content:
155
f.write(chunk)
156
157
# Download a specific output file
158
output_file = client.file.get_from_task("my-job", "task-001", "results/output.json")
159
with open("output.json", "wb") as f:
160
for chunk in output_file:
161
f.write(chunk)
162
```
163
164
### Working with File Properties and Ranges
165
166
```python
167
from azure.batch.models import FileGetFromTaskOptions
168
169
# Get file properties without downloading content
170
client.file.get_properties_from_task("my-job", "task-001", "large_output.txt")
171
172
# Download only part of a large file (first 1KB)
173
get_options = FileGetFromTaskOptions(
174
ocp_range="bytes=0-1023" # First 1024 bytes
175
)
176
partial_content = client.file.get_from_task(
177
"my-job", "task-001", "large_output.txt", get_options
178
)
179
180
with open("partial_output.txt", "wb") as f:
181
for chunk in partial_content:
182
f.write(chunk)
183
184
# Get last 500 bytes of a log file
185
get_options = FileGetFromTaskOptions(
186
ocp_range="bytes=-500" # Last 500 bytes
187
)
188
log_tail = client.file.get_from_task(
189
"my-job", "task-001", "application.log", get_options
190
)
191
```
192
193
### Managing Compute Node Files
194
195
```python
196
# List files on a compute node
197
node_files = client.file.list_from_compute_node("my-pool", "tvm-123456789")
198
for file_info in node_files:
199
if not file_info.is_directory:
200
print(f"Node file: {file_info.name} ({file_info.properties.content_length} bytes)")
201
202
# Download a file from the compute node
203
node_file = client.file.get_from_compute_node(
204
"my-pool", "tvm-123456789", "shared/data/input.txt"
205
)
206
with open("downloaded_input.txt", "wb") as f:
207
for chunk in node_file:
208
f.write(chunk)
209
210
# List batch service logs on node
211
from azure.batch.models import FileListFromComputeNodeOptions
212
list_options = FileListFromComputeNodeOptions(recursive=True)
213
log_files = client.file.list_from_compute_node(
214
"my-pool", "tvm-123456789", list_options
215
)
216
217
for file_info in log_files:
218
if "startup" in file_info.name.lower() or "stdout" in file_info.name.lower():
219
print(f"Log file: {file_info.name}")
220
```
221
222
### File Management Operations
223
224
```python
225
# Delete temporary files from task working directory
226
try:
227
client.file.delete_from_task("my-job", "task-001", "temp/intermediate.dat")
228
print("Temporary file deleted successfully")
229
except Exception as e:
230
print(f"Failed to delete file: {e}")
231
232
# Delete files from compute node
233
client.file.delete_from_compute_node("my-pool", "tvm-123456789", "temp/cache.tmp")
234
235
# Batch delete multiple task files
236
temp_files = ["temp1.txt", "temp2.txt", "cache/temp.dat"]
237
for temp_file in temp_files:
238
try:
239
client.file.delete_from_task("my-job", "task-001", temp_file)
240
except Exception as e:
241
print(f"Could not delete {temp_file}: {e}")
242
```
243
244
### Advanced File Filtering and Processing
245
246
```python
247
import os
248
249
def download_task_outputs(job_id, task_id, local_dir):
250
"""Download all output files from a task to local directory."""
251
os.makedirs(local_dir, exist_ok=True)
252
253
files = client.file.list_from_task(job_id, task_id,
254
FileListFromTaskOptions(recursive=True))
255
256
for file_info in files:
257
if not file_info.is_directory and not file_info.name.startswith("wd/"):
258
# Skip working directory files, download outputs only
259
local_path = os.path.join(local_dir, file_info.name.replace("/", "_"))
260
261
try:
262
content = client.file.get_from_task(job_id, task_id, file_info.name)
263
with open(local_path, "wb") as f:
264
for chunk in content:
265
f.write(chunk)
266
print(f"Downloaded: {file_info.name} -> {local_path}")
267
except Exception as e:
268
print(f"Failed to download {file_info.name}: {e}")
269
270
# Usage
271
download_task_outputs("my-job", "task-001", "./task_outputs")
272
273
def get_task_logs(job_id, task_id):
274
"""Get stdout and stderr content as strings."""
275
logs = {}
276
277
for log_file in ["stdout.txt", "stderr.txt"]:
278
try:
279
content = client.file.get_from_task(job_id, task_id, log_file)
280
logs[log_file] = b"".join(content).decode('utf-8')
281
except Exception as e:
282
logs[log_file] = f"Error reading {log_file}: {e}"
283
284
return logs
285
286
# Usage
287
task_logs = get_task_logs("my-job", "task-001")
288
print("STDOUT:")
289
print(task_logs["stdout.txt"])
290
print("\nSTDERR:")
291
print(task_logs["stderr.txt"])
292
```
293
294
## Types
295
296
### File Information Types
297
298
```python { .api }
299
class FileProperties:
300
"""File properties and metadata."""
301
def __init__(self):
302
self.name: str
303
self.url: str
304
self.is_directory: bool
305
self.properties: FilePropertiesDetail
306
307
class FilePropertiesDetail:
308
"""Detailed file properties."""
309
def __init__(self):
310
self.content_length: int
311
self.content_type: str
312
self.creation_time: datetime.datetime
313
self.last_modified: datetime.datetime
314
self.file_mode: str
315
```
316
317
### File Operation Option Types
318
319
```python { .api }
320
class FileListFromTaskOptions:
321
"""Options for listing files from task."""
322
def __init__(self):
323
self.filter: str
324
self.recursive: bool
325
self.max_results: int
326
self.timeout: int
327
328
class FileListFromComputeNodeOptions:
329
"""Options for listing files from compute node."""
330
def __init__(self):
331
self.filter: str
332
self.recursive: bool
333
self.max_results: int
334
self.timeout: int
335
336
class FileGetFromTaskOptions:
337
"""Options for getting file from task."""
338
def __init__(self):
339
self.ocp_range: str # Byte range like "bytes=0-1023"
340
self.if_modified_since: datetime.datetime
341
self.if_unmodified_since: datetime.datetime
342
self.timeout: int
343
344
class FileGetFromComputeNodeOptions:
345
"""Options for getting file from compute node."""
346
def __init__(self):
347
self.ocp_range: str # Byte range like "bytes=0-1023"
348
self.if_modified_since: datetime.datetime
349
self.if_unmodified_since: datetime.datetime
350
self.timeout: int
351
352
class FileDeleteFromTaskOptions:
353
"""Options for deleting file from task."""
354
def __init__(self):
355
self.recursive: bool
356
self.timeout: int
357
358
class FileDeleteFromComputeNodeOptions:
359
"""Options for deleting file from compute node."""
360
def __init__(self):
361
self.recursive: bool
362
self.timeout: int
363
364
class FileGetPropertiesFromTaskOptions:
365
"""Options for getting file properties from task."""
366
def __init__(self):
367
self.if_modified_since: datetime.datetime
368
self.if_unmodified_since: datetime.datetime
369
self.timeout: int
370
371
class FileGetPropertiesFromComputeNodeOptions:
372
"""Options for getting file properties from compute node."""
373
def __init__(self):
374
self.if_modified_since: datetime.datetime
375
self.if_unmodified_since: datetime.datetime
376
self.timeout: int
377
```