0
# Apache Spark Pool Management
1
2
Big data pool (Apache Spark) configuration, auto-scaling, library management, and lifecycle operations. Spark pools provide distributed computing capabilities for big data processing, machine learning, and data engineering workloads within Azure Synapse Analytics.
3
4
## Capabilities
5
6
### Spark Pool Operations
7
8
Core Apache Spark pool lifecycle management including creation, retrieval, updates, and deletion.
9
10
```python { .api }
11
def get(resource_group_name: str, workspace_name: str, big_data_pool_name: str) -> BigDataPoolResourceInfo:
12
"""
13
Get a Big Data pool.
14
15
Parameters:
16
- resource_group_name (str): Name of the resource group
17
- workspace_name (str): Name of the workspace
18
- big_data_pool_name (str): Name of the Big Data pool
19
20
Returns:
21
BigDataPoolResourceInfo: The Big Data pool object
22
"""
23
24
def create_or_update(resource_group_name: str, workspace_name: str, big_data_pool_name: str, big_data_pool_info: BigDataPoolResourceInfo) -> LROPoller[BigDataPoolResourceInfo]:
25
"""
26
Create or update a Big Data pool.
27
28
Parameters:
29
- resource_group_name (str): Name of the resource group
30
- workspace_name (str): Name of the workspace
31
- big_data_pool_name (str): Name of the Big Data pool
32
- big_data_pool_info (BigDataPoolResourceInfo): Big Data pool properties
33
34
Returns:
35
LROPoller[BigDataPoolResourceInfo]: Long-running operation poller
36
"""
37
38
def delete(resource_group_name: str, workspace_name: str, big_data_pool_name: str) -> LROPoller[object]:
39
"""
40
Delete a Big Data pool.
41
42
Parameters:
43
- resource_group_name (str): Name of the resource group
44
- workspace_name (str): Name of the workspace
45
- big_data_pool_name (str): Name of the Big Data pool
46
47
Returns:
48
LROPoller[object]: Long-running operation poller
49
"""
50
51
def update(resource_group_name: str, workspace_name: str, big_data_pool_name: str, big_data_pool_patch_info: BigDataPoolPatchInfo) -> BigDataPoolResourceInfo:
52
"""
53
Update Big Data pool properties.
54
55
Parameters:
56
- resource_group_name (str): Name of the resource group
57
- workspace_name (str): Name of the workspace
58
- big_data_pool_name (str): Name of the Big Data pool
59
- big_data_pool_patch_info (BigDataPoolPatchInfo): Update parameters
60
61
Returns:
62
BigDataPoolResourceInfo: Updated Big Data pool
63
"""
64
```
65
66
### Spark Pool Listing
67
68
Operations to list and discover Spark pools within workspaces.
69
70
```python { .api }
71
def list_by_workspace(resource_group_name: str, workspace_name: str) -> ItemPaged[BigDataPoolResourceInfo]:
72
"""
73
List Big Data pools in a workspace.
74
75
Parameters:
76
- resource_group_name (str): Name of the resource group
77
- workspace_name (str): Name of the workspace
78
79
Returns:
80
ItemPaged[BigDataPoolResourceInfo]: Paged collection of Big Data pools
81
"""
82
```
83
84
### Library Management
85
86
Manage custom libraries and packages for Spark pools.
87
88
```python { .api }
89
def list_libraries(resource_group_name: str, workspace_name: str) -> ItemPaged[LibraryResource]:
90
"""
91
List libraries in workspace.
92
93
Parameters:
94
- resource_group_name (str): Name of the resource group
95
- workspace_name (str): Name of the workspace
96
97
Returns:
98
ItemPaged[LibraryResource]: Paged collection of libraries
99
"""
100
101
def flush_library(resource_group_name: str, workspace_name: str, library_name: str) -> LROPoller[object]:
102
"""
103
Flush library changes.
104
105
Parameters:
106
- resource_group_name (str): Name of the resource group
107
- workspace_name (str): Name of the workspace
108
- library_name (str): Name of the library
109
110
Returns:
111
LROPoller[object]: Long-running operation poller
112
"""
113
114
def get_operation_result(resource_group_name: str, workspace_name: str, operation_id: str) -> LibraryResource:
115
"""
116
Get library operation result.
117
118
Parameters:
119
- resource_group_name (str): Name of the resource group
120
- workspace_name (str): Name of the workspace
121
- operation_id (str): Operation ID
122
123
Returns:
124
LibraryResource: Library operation result
125
"""
126
127
def delete_library(resource_group_name: str, workspace_name: str, library_name: str) -> LROPoller[LibraryResource]:
128
"""
129
Delete a library.
130
131
Parameters:
132
- resource_group_name (str): Name of the resource group
133
- workspace_name (str): Name of the workspace
134
- library_name (str): Name of the library
135
136
Returns:
137
LROPoller[LibraryResource]: Long-running operation poller
138
"""
139
140
def create_library(resource_group_name: str, workspace_name: str, library_name: str, library_resource: LibraryResource) -> LROPoller[LibraryResource]:
141
"""
142
Create or update a library.
143
144
Parameters:
145
- resource_group_name (str): Name of the resource group
146
- workspace_name (str): Name of the workspace
147
- library_name (str): Name of the library
148
- library_resource (LibraryResource): Library resource properties
149
150
Returns:
151
LROPoller[LibraryResource]: Long-running operation poller
152
"""
153
```
154
155
## Types
156
157
### BigDataPoolResourceInfo
158
159
```python { .api }
160
class BigDataPoolResourceInfo:
161
"""
162
A Big Data pool.
163
164
Attributes:
165
- id (str): Resource ID
166
- name (str): Resource name
167
- type (str): Resource type
168
- location (str): Resource location
169
- tags (dict): Resource tags
170
- provisioning_state (str): Provisioning state
171
- auto_scale (AutoScaleProperties): Auto-scale configuration
172
- creation_date (datetime): Creation date
173
- auto_pause (AutoPauseProperties): Auto-pause configuration
174
- is_compute_isolation_enabled (bool): Compute isolation enabled
175
- session_level_packages_enabled (bool): Session-level packages enabled
176
- cache_size (int): Cache size
177
- dynamic_executor_allocation (DynamicExecutorAllocation): Dynamic executor allocation
178
- spark_events_folder (str): Spark events folder
179
- node_count (int): Number of nodes
180
- library_requirements (LibraryRequirements): Library requirements
181
- custom_libraries (list): Custom libraries
182
- spark_config_properties (dict): Spark configuration properties
183
- spark_version (str): Spark version
184
- default_spark_log_folder (str): Default Spark log folder
185
- node_size (str): Node size
186
- node_size_family (str): Node size family
187
"""
188
```
189
190
### BigDataPoolPatchInfo
191
192
```python { .api }
193
class BigDataPoolPatchInfo:
194
"""
195
Properties patch for a Big Data pool.
196
197
Attributes:
198
- tags (dict): Resource tags
199
- auto_scale (AutoScaleProperties): Auto-scale configuration
200
- auto_pause (AutoPauseProperties): Auto-pause configuration
201
- is_compute_isolation_enabled (bool): Compute isolation enabled
202
- session_level_packages_enabled (bool): Session-level packages enabled
203
- cache_size (int): Cache size
204
- dynamic_executor_allocation (DynamicExecutorAllocation): Dynamic executor allocation
205
- spark_events_folder (str): Spark events folder
206
- node_count (int): Number of nodes
207
- library_requirements (LibraryRequirements): Library requirements
208
- custom_libraries (list): Custom libraries
209
- spark_config_properties (dict): Spark configuration properties
210
- spark_version (str): Spark version
211
- default_spark_log_folder (str): Default Spark log folder
212
- node_size (str): Node size
213
- node_size_family (str): Node size family
214
- force (bool): Force operation
215
"""
216
```
217
218
### AutoScaleProperties
219
220
```python { .api }
221
class AutoScaleProperties:
222
"""
223
Auto-scaling properties.
224
225
Attributes:
226
- min_node_count (int): Minimum number of nodes
227
- enabled (bool): Whether auto-scale is enabled
228
- max_node_count (int): Maximum number of nodes
229
"""
230
```
231
232
### AutoPauseProperties
233
234
```python { .api }
235
class AutoPauseProperties:
236
"""
237
Auto-pause properties.
238
239
Attributes:
240
- delay_in_minutes (int): Delay in minutes before auto-pause
241
- enabled (bool): Whether auto-pause is enabled
242
"""
243
```
244
245
### DynamicExecutorAllocation
246
247
```python { .api }
248
class DynamicExecutorAllocation:
249
"""
250
Dynamic executor allocation properties.
251
252
Attributes:
253
- enabled (bool): Whether dynamic executor allocation is enabled
254
- min_executors (int): Minimum number of executors
255
- max_executors (int): Maximum number of executors
256
"""
257
```
258
259
### LibraryRequirements
260
261
```python { .api }
262
class LibraryRequirements:
263
"""
264
Library requirements for a Big Data pool.
265
266
Attributes:
267
- time (str): Requirements file timestamp
268
- content (str): Requirements file content
269
- filename (str): Requirements filename
270
"""
271
```
272
273
### LibraryResource
274
275
```python { .api }
276
class LibraryResource:
277
"""
278
Library resource.
279
280
Attributes:
281
- id (str): Resource ID
282
- name (str): Resource name
283
- type (str): Resource type
284
- etag (str): Entity tag
285
- properties (LibraryInfo): Library properties
286
"""
287
```
288
289
### LibraryInfo
290
291
```python { .api }
292
class LibraryInfo:
293
"""
294
Library information.
295
296
Attributes:
297
- name (str): Library name
298
- path (str): Library path
299
- container_name (str): Container name
300
- uploaded_timestamp (datetime): Upload timestamp
301
- type (str): Library type
302
- provisioning_status (str): Provisioning status
303
- creator_id (str): Creator ID
304
"""
305
```
306
307
## Usage Examples
308
309
### Create a Spark Pool with Auto-scaling
310
311
```python
312
from azure.mgmt.synapse.models import (
313
BigDataPoolResourceInfo, AutoScaleProperties, AutoPauseProperties,
314
DynamicExecutorAllocation, LibraryRequirements
315
)
316
317
# Configure auto-scaling
318
auto_scale = AutoScaleProperties(
319
enabled=True,
320
min_node_count=3,
321
max_node_count=10
322
)
323
324
# Configure auto-pause
325
auto_pause = AutoPauseProperties(
326
enabled=True,
327
delay_in_minutes=15
328
)
329
330
# Configure dynamic executor allocation
331
dynamic_executor = DynamicExecutorAllocation(
332
enabled=True,
333
min_executors=1,
334
max_executors=4
335
)
336
337
# Create Spark pool
338
spark_pool_info = BigDataPoolResourceInfo(
339
location="East US",
340
node_size="Small",
341
node_size_family="MemoryOptimized",
342
spark_version="3.1",
343
auto_scale=auto_scale,
344
auto_pause=auto_pause,
345
dynamic_executor_allocation=dynamic_executor,
346
is_compute_isolation_enabled=False,
347
session_level_packages_enabled=True
348
)
349
350
operation = client.big_data_pools.create_or_update(
351
resource_group_name="my-resource-group",
352
workspace_name="my-synapse-workspace",
353
big_data_pool_name="my-spark-pool",
354
big_data_pool_info=spark_pool_info
355
)
356
357
spark_pool = operation.result()
358
print(f"Created Spark pool: {spark_pool.name}")
359
```
360
361
### Configure Library Requirements
362
363
```python
364
from azure.mgmt.synapse.models import LibraryRequirements
365
366
# Define requirements.txt content
367
requirements_content = """
368
pandas==1.3.3
369
numpy==1.21.2
370
scikit-learn==0.24.2
371
matplotlib==3.4.3
372
"""
373
374
library_requirements = LibraryRequirements(
375
filename="requirements.txt",
376
content=requirements_content
377
)
378
379
# Update the Spark pool with library requirements
380
updated_pool_info = BigDataPoolPatchInfo(
381
library_requirements=library_requirements
382
)
383
384
updated_pool = client.big_data_pools.update(
385
resource_group_name="my-resource-group",
386
workspace_name="my-synapse-workspace",
387
big_data_pool_name="my-spark-pool",
388
big_data_pool_patch_info=updated_pool_info
389
)
390
391
print("Updated library requirements")
392
```
393
394
### Upload Custom Library
395
396
```python
397
from azure.mgmt.synapse.models import LibraryResource, LibraryInfo
398
399
# Create library resource
400
library_info = LibraryInfo(
401
name="my-custom-library.jar",
402
path="abfss://container@storage.dfs.core.windows.net/libraries/my-custom-library.jar",
403
container_name="libraries",
404
type="jar"
405
)
406
407
library_resource = LibraryResource(
408
properties=library_info
409
)
410
411
# Upload the library
412
operation = client.library.create_library(
413
resource_group_name="my-resource-group",
414
workspace_name="my-synapse-workspace",
415
library_name="my-custom-library.jar",
416
library_resource=library_resource
417
)
418
419
library = operation.result()
420
print(f"Uploaded library: {library.name}")
421
```