or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

index.mdintegration-runtime-management.mdspark-pool-management.mdsql-pool-management.mdsql-pool-security.mdworkspace-management.md

spark-pool-management.mddocs/

0

# Apache Spark Pool Management

1

2

Big data pool (Apache Spark) configuration, auto-scaling, library management, and lifecycle operations. Spark pools provide distributed computing capabilities for big data processing, machine learning, and data engineering workloads within Azure Synapse Analytics.

3

4

## Capabilities

5

6

### Spark Pool Operations

7

8

Core Apache Spark pool lifecycle management including creation, retrieval, updates, and deletion.

9

10

```python { .api }

11

def get(resource_group_name: str, workspace_name: str, big_data_pool_name: str) -> BigDataPoolResourceInfo:

12

"""

13

Get a Big Data pool.

14

15

Parameters:

16

- resource_group_name (str): Name of the resource group

17

- workspace_name (str): Name of the workspace

18

- big_data_pool_name (str): Name of the Big Data pool

19

20

Returns:

21

BigDataPoolResourceInfo: The Big Data pool object

22

"""

23

24

def create_or_update(resource_group_name: str, workspace_name: str, big_data_pool_name: str, big_data_pool_info: BigDataPoolResourceInfo) -> LROPoller[BigDataPoolResourceInfo]:

25

"""

26

Create or update a Big Data pool.

27

28

Parameters:

29

- resource_group_name (str): Name of the resource group

30

- workspace_name (str): Name of the workspace

31

- big_data_pool_name (str): Name of the Big Data pool

32

- big_data_pool_info (BigDataPoolResourceInfo): Big Data pool properties

33

34

Returns:

35

LROPoller[BigDataPoolResourceInfo]: Long-running operation poller

36

"""

37

38

def delete(resource_group_name: str, workspace_name: str, big_data_pool_name: str) -> LROPoller[object]:

39

"""

40

Delete a Big Data pool.

41

42

Parameters:

43

- resource_group_name (str): Name of the resource group

44

- workspace_name (str): Name of the workspace

45

- big_data_pool_name (str): Name of the Big Data pool

46

47

Returns:

48

LROPoller[object]: Long-running operation poller

49

"""

50

51

def update(resource_group_name: str, workspace_name: str, big_data_pool_name: str, big_data_pool_patch_info: BigDataPoolPatchInfo) -> BigDataPoolResourceInfo:

52

"""

53

Update Big Data pool properties.

54

55

Parameters:

56

- resource_group_name (str): Name of the resource group

57

- workspace_name (str): Name of the workspace

58

- big_data_pool_name (str): Name of the Big Data pool

59

- big_data_pool_patch_info (BigDataPoolPatchInfo): Update parameters

60

61

Returns:

62

BigDataPoolResourceInfo: Updated Big Data pool

63

"""

64

```

65

66

### Spark Pool Listing

67

68

Operations to list and discover Spark pools within workspaces.

69

70

```python { .api }

71

def list_by_workspace(resource_group_name: str, workspace_name: str) -> ItemPaged[BigDataPoolResourceInfo]:

72

"""

73

List Big Data pools in a workspace.

74

75

Parameters:

76

- resource_group_name (str): Name of the resource group

77

- workspace_name (str): Name of the workspace

78

79

Returns:

80

ItemPaged[BigDataPoolResourceInfo]: Paged collection of Big Data pools

81

"""

82

```

83

84

### Library Management

85

86

Manage custom libraries and packages for Spark pools.

87

88

```python { .api }

89

def list_libraries(resource_group_name: str, workspace_name: str) -> ItemPaged[LibraryResource]:

90

"""

91

List libraries in workspace.

92

93

Parameters:

94

- resource_group_name (str): Name of the resource group

95

- workspace_name (str): Name of the workspace

96

97

Returns:

98

ItemPaged[LibraryResource]: Paged collection of libraries

99

"""

100

101

def flush_library(resource_group_name: str, workspace_name: str, library_name: str) -> LROPoller[object]:

102

"""

103

Flush library changes.

104

105

Parameters:

106

- resource_group_name (str): Name of the resource group

107

- workspace_name (str): Name of the workspace

108

- library_name (str): Name of the library

109

110

Returns:

111

LROPoller[object]: Long-running operation poller

112

"""

113

114

def get_operation_result(resource_group_name: str, workspace_name: str, operation_id: str) -> LibraryResource:

115

"""

116

Get library operation result.

117

118

Parameters:

119

- resource_group_name (str): Name of the resource group

120

- workspace_name (str): Name of the workspace

121

- operation_id (str): Operation ID

122

123

Returns:

124

LibraryResource: Library operation result

125

"""

126

127

def delete_library(resource_group_name: str, workspace_name: str, library_name: str) -> LROPoller[LibraryResource]:

128

"""

129

Delete a library.

130

131

Parameters:

132

- resource_group_name (str): Name of the resource group

133

- workspace_name (str): Name of the workspace

134

- library_name (str): Name of the library

135

136

Returns:

137

LROPoller[LibraryResource]: Long-running operation poller

138

"""

139

140

def create_library(resource_group_name: str, workspace_name: str, library_name: str, library_resource: LibraryResource) -> LROPoller[LibraryResource]:

141

"""

142

Create or update a library.

143

144

Parameters:

145

- resource_group_name (str): Name of the resource group

146

- workspace_name (str): Name of the workspace

147

- library_name (str): Name of the library

148

- library_resource (LibraryResource): Library resource properties

149

150

Returns:

151

LROPoller[LibraryResource]: Long-running operation poller

152

"""

153

```

154

155

## Types

156

157

### BigDataPoolResourceInfo

158

159

```python { .api }

160

class BigDataPoolResourceInfo:

161

"""

162

A Big Data pool.

163

164

Attributes:

165

- id (str): Resource ID

166

- name (str): Resource name

167

- type (str): Resource type

168

- location (str): Resource location

169

- tags (dict): Resource tags

170

- provisioning_state (str): Provisioning state

171

- auto_scale (AutoScaleProperties): Auto-scale configuration

172

- creation_date (datetime): Creation date

173

- auto_pause (AutoPauseProperties): Auto-pause configuration

174

- is_compute_isolation_enabled (bool): Compute isolation enabled

175

- session_level_packages_enabled (bool): Session-level packages enabled

176

- cache_size (int): Cache size

177

- dynamic_executor_allocation (DynamicExecutorAllocation): Dynamic executor allocation

178

- spark_events_folder (str): Spark events folder

179

- node_count (int): Number of nodes

180

- library_requirements (LibraryRequirements): Library requirements

181

- custom_libraries (list): Custom libraries

182

- spark_config_properties (dict): Spark configuration properties

183

- spark_version (str): Spark version

184

- default_spark_log_folder (str): Default Spark log folder

185

- node_size (str): Node size

186

- node_size_family (str): Node size family

187

"""

188

```

189

190

### BigDataPoolPatchInfo

191

192

```python { .api }

193

class BigDataPoolPatchInfo:

194

"""

195

Properties patch for a Big Data pool.

196

197

Attributes:

198

- tags (dict): Resource tags

199

- auto_scale (AutoScaleProperties): Auto-scale configuration

200

- auto_pause (AutoPauseProperties): Auto-pause configuration

201

- is_compute_isolation_enabled (bool): Compute isolation enabled

202

- session_level_packages_enabled (bool): Session-level packages enabled

203

- cache_size (int): Cache size

204

- dynamic_executor_allocation (DynamicExecutorAllocation): Dynamic executor allocation

205

- spark_events_folder (str): Spark events folder

206

- node_count (int): Number of nodes

207

- library_requirements (LibraryRequirements): Library requirements

208

- custom_libraries (list): Custom libraries

209

- spark_config_properties (dict): Spark configuration properties

210

- spark_version (str): Spark version

211

- default_spark_log_folder (str): Default Spark log folder

212

- node_size (str): Node size

213

- node_size_family (str): Node size family

214

- force (bool): Force operation

215

"""

216

```

217

218

### AutoScaleProperties

219

220

```python { .api }

221

class AutoScaleProperties:

222

"""

223

Auto-scaling properties.

224

225

Attributes:

226

- min_node_count (int): Minimum number of nodes

227

- enabled (bool): Whether auto-scale is enabled

228

- max_node_count (int): Maximum number of nodes

229

"""

230

```

231

232

### AutoPauseProperties

233

234

```python { .api }

235

class AutoPauseProperties:

236

"""

237

Auto-pause properties.

238

239

Attributes:

240

- delay_in_minutes (int): Delay in minutes before auto-pause

241

- enabled (bool): Whether auto-pause is enabled

242

"""

243

```

244

245

### DynamicExecutorAllocation

246

247

```python { .api }

248

class DynamicExecutorAllocation:

249

"""

250

Dynamic executor allocation properties.

251

252

Attributes:

253

- enabled (bool): Whether dynamic executor allocation is enabled

254

- min_executors (int): Minimum number of executors

255

- max_executors (int): Maximum number of executors

256

"""

257

```

258

259

### LibraryRequirements

260

261

```python { .api }

262

class LibraryRequirements:

263

"""

264

Library requirements for a Big Data pool.

265

266

Attributes:

267

- time (str): Requirements file timestamp

268

- content (str): Requirements file content

269

- filename (str): Requirements filename

270

"""

271

```

272

273

### LibraryResource

274

275

```python { .api }

276

class LibraryResource:

277

"""

278

Library resource.

279

280

Attributes:

281

- id (str): Resource ID

282

- name (str): Resource name

283

- type (str): Resource type

284

- etag (str): Entity tag

285

- properties (LibraryInfo): Library properties

286

"""

287

```

288

289

### LibraryInfo

290

291

```python { .api }

292

class LibraryInfo:

293

"""

294

Library information.

295

296

Attributes:

297

- name (str): Library name

298

- path (str): Library path

299

- container_name (str): Container name

300

- uploaded_timestamp (datetime): Upload timestamp

301

- type (str): Library type

302

- provisioning_status (str): Provisioning status

303

- creator_id (str): Creator ID

304

"""

305

```

306

307

## Usage Examples

308

309

### Create a Spark Pool with Auto-scaling

310

311

```python

312

from azure.mgmt.synapse.models import (

313

BigDataPoolResourceInfo, AutoScaleProperties, AutoPauseProperties,

314

DynamicExecutorAllocation, LibraryRequirements

315

)

316

317

# Configure auto-scaling

318

auto_scale = AutoScaleProperties(

319

enabled=True,

320

min_node_count=3,

321

max_node_count=10

322

)

323

324

# Configure auto-pause

325

auto_pause = AutoPauseProperties(

326

enabled=True,

327

delay_in_minutes=15

328

)

329

330

# Configure dynamic executor allocation

331

dynamic_executor = DynamicExecutorAllocation(

332

enabled=True,

333

min_executors=1,

334

max_executors=4

335

)

336

337

# Create Spark pool

338

spark_pool_info = BigDataPoolResourceInfo(

339

location="East US",

340

node_size="Small",

341

node_size_family="MemoryOptimized",

342

spark_version="3.1",

343

auto_scale=auto_scale,

344

auto_pause=auto_pause,

345

dynamic_executor_allocation=dynamic_executor,

346

is_compute_isolation_enabled=False,

347

session_level_packages_enabled=True

348

)

349

350

operation = client.big_data_pools.create_or_update(

351

resource_group_name="my-resource-group",

352

workspace_name="my-synapse-workspace",

353

big_data_pool_name="my-spark-pool",

354

big_data_pool_info=spark_pool_info

355

)

356

357

spark_pool = operation.result()

358

print(f"Created Spark pool: {spark_pool.name}")

359

```

360

361

### Configure Library Requirements

362

363

```python

364

from azure.mgmt.synapse.models import LibraryRequirements

365

366

# Define requirements.txt content

367

requirements_content = """

368

pandas==1.3.3

369

numpy==1.21.2

370

scikit-learn==0.24.2

371

matplotlib==3.4.3

372

"""

373

374

library_requirements = LibraryRequirements(

375

filename="requirements.txt",

376

content=requirements_content

377

)

378

379

# Update the Spark pool with library requirements

380

updated_pool_info = BigDataPoolPatchInfo(

381

library_requirements=library_requirements

382

)

383

384

updated_pool = client.big_data_pools.update(

385

resource_group_name="my-resource-group",

386

workspace_name="my-synapse-workspace",

387

big_data_pool_name="my-spark-pool",

388

big_data_pool_patch_info=updated_pool_info

389

)

390

391

print("Updated library requirements")

392

```

393

394

### Upload Custom Library

395

396

```python

397

from azure.mgmt.synapse.models import LibraryResource, LibraryInfo

398

399

# Create library resource

400

library_info = LibraryInfo(

401

name="my-custom-library.jar",

402

path="abfss://container@storage.dfs.core.windows.net/libraries/my-custom-library.jar",

403

container_name="libraries",

404

type="jar"

405

)

406

407

library_resource = LibraryResource(

408

properties=library_info

409

)

410

411

# Upload the library

412

operation = client.library.create_library(

413

resource_group_name="my-resource-group",

414

workspace_name="my-synapse-workspace",

415

library_name="my-custom-library.jar",

416

library_resource=library_resource

417

)

418

419

library = operation.result()

420

print(f"Uploaded library: {library.name}")

421

```