or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

asset-management.mdautoml.mdclient-auth.mdcompute-management.mdhyperparameter-tuning.mdindex.mdjob-management.mdmodel-deployment.md

compute-management.mddocs/

0

# Compute Management

1

2

Comprehensive compute resource management including Azure ML compute clusters, compute instances, Kubernetes compute, and various compute configurations for running ML workloads.

3

4

## Capabilities

5

6

### Azure ML Compute

7

8

Managed compute clusters for scalable ML workloads with automatic scaling and node management.

9

10

```python { .api }

11

class AmlCompute:

12

def __init__(

13

self,

14

*,

15

name: str,

16

type: str = "amlcompute",

17

size: str,

18

location: str = None,

19

min_instances: int = 0,

20

max_instances: int = 1,

21

idle_time_before_scale_down: int = 1800,

22

tier: str = "dedicated",

23

identity: IdentityConfiguration = None,

24

ssh_public_access_enabled: bool = False,

25

ssh_settings: AmlComputeSshSettings = None,

26

network_settings: NetworkSettings = None,

27

**kwargs

28

):

29

"""

30

Azure ML compute cluster for scalable workloads.

31

32

Parameters:

33

- name: Compute cluster name

34

- type: Compute type ("amlcompute")

35

- size: VM size (e.g., "Standard_DS3_v2", "Standard_NC6")

36

- location: Azure region for compute

37

- min_instances: Minimum number of nodes

38

- max_instances: Maximum number of nodes

39

- idle_time_before_scale_down: Scale down time in seconds

40

- tier: Compute tier ("dedicated", "low_priority")

41

- identity: Managed identity configuration

42

- ssh_public_access_enabled: Enable SSH access

43

- ssh_settings: SSH configuration

44

- network_settings: Virtual network settings

45

"""

46

47

class AmlComputeSshSettings:

48

def __init__(

49

self,

50

*,

51

admin_username: str,

52

admin_password: str = None,

53

ssh_public_keys: str = None

54

):

55

"""

56

SSH settings for AML compute.

57

58

Parameters:

59

- admin_username: Administrator username

60

- admin_password: Administrator password

61

- ssh_public_keys: SSH public keys for authentication

62

"""

63

```

64

65

#### Usage Example

66

67

```python

68

from azure.ai.ml.entities import AmlCompute, AmlComputeSshSettings

69

70

# Create a CPU compute cluster

71

cpu_cluster = AmlCompute(

72

name="cpu-cluster",

73

size="Standard_DS3_v2",

74

min_instances=0,

75

max_instances=10,

76

idle_time_before_scale_down=1800,

77

tier="dedicated"

78

)

79

80

# Create a GPU compute cluster

81

gpu_cluster = AmlCompute(

82

name="gpu-cluster",

83

size="Standard_NC6",

84

min_instances=0,

85

max_instances=4,

86

idle_time_before_scale_down=1200,

87

tier="dedicated"

88

)

89

90

# Create cluster with SSH access

91

ssh_settings = AmlComputeSshSettings(

92

admin_username="azureuser",

93

ssh_public_keys="ssh-rsa AAAAB3NzaC1yc2EAAAA..."

94

)

95

96

ssh_cluster = AmlCompute(

97

name="ssh-cluster",

98

size="Standard_DS3_v2",

99

max_instances=5,

100

ssh_public_access_enabled=True,

101

ssh_settings=ssh_settings

102

)

103

104

# Create the compute cluster

105

ml_client.compute.begin_create_or_update(cpu_cluster).result()

106

```

107

108

### Compute Instances

109

110

Managed compute instances for development and experimentation with pre-configured environments.

111

112

```python { .api }

113

class ComputeInstance:

114

def __init__(

115

self,

116

*,

117

name: str,

118

size: str,

119

location: str = None,

120

ssh_public_access_enabled: bool = False,

121

ssh_settings: ComputeInstanceSshSettings = None,

122

assigned_user: AssignedUserConfiguration = None,

123

idle_time_before_shutdown_minutes: int = None,

124

custom_applications: list = None,

125

setup_scripts: SetupScripts = None,

126

network_settings: NetworkSettings = None,

127

**kwargs

128

):

129

"""

130

Azure ML compute instance for development.

131

132

Parameters:

133

- name: Compute instance name

134

- size: VM size (e.g., "Standard_DS3_v2")

135

- location: Azure region

136

- ssh_public_access_enabled: Enable SSH access

137

- ssh_settings: SSH configuration

138

- assigned_user: User assignment configuration

139

- idle_time_before_shutdown_minutes: Auto-shutdown time

140

- custom_applications: Custom applications to install

141

- setup_scripts: Startup scripts

142

- network_settings: Virtual network settings

143

"""

144

145

class ComputeInstanceSshSettings:

146

def __init__(

147

self,

148

*,

149

ssh_public_keys: str = None

150

):

151

"""

152

SSH settings for compute instances.

153

154

Parameters:

155

- ssh_public_keys: SSH public keys for authentication

156

"""

157

158

class AssignedUserConfiguration:

159

def __init__(

160

self,

161

*,

162

user_tenant_id: str,

163

user_object_id: str

164

):

165

"""

166

User assignment for compute instance.

167

168

Parameters:

169

- user_tenant_id: Azure AD tenant ID

170

- user_object_id: Azure AD user object ID

171

"""

172

```

173

174

#### Usage Example

175

176

```python

177

from azure.ai.ml.entities import ComputeInstance, AssignedUserConfiguration

178

179

# Create a compute instance

180

compute_instance = ComputeInstance(

181

name="my-compute-instance",

182

size="Standard_DS3_v2",

183

idle_time_before_shutdown_minutes=30,

184

assigned_user=AssignedUserConfiguration(

185

user_tenant_id="your-tenant-id",

186

user_object_id="your-user-object-id"

187

)

188

)

189

190

# Create the compute instance

191

ml_client.compute.begin_create_or_update(compute_instance).result()

192

```

193

194

### Kubernetes Compute

195

196

Attach existing Kubernetes clusters for running ML workloads.

197

198

```python { .api }

199

class KubernetesCompute:

200

def __init__(

201

self,

202

*,

203

name: str,

204

resource_id: str,

205

namespace: str = "default",

206

identity: IdentityConfiguration = None,

207

**kwargs

208

):

209

"""

210

Kubernetes compute for custom container orchestration.

211

212

Parameters:

213

- name: Compute target name

214

- resource_id: Azure resource ID of the Kubernetes cluster

215

- namespace: Kubernetes namespace to use

216

- identity: Managed identity configuration

217

"""

218

```

219

220

### Virtual Machine Compute

221

222

Attach existing virtual machines as compute targets.

223

224

```python { .api }

225

class VirtualMachineCompute:

226

def __init__(

227

self,

228

*,

229

name: str,

230

resource_id: str,

231

ssh_settings: VirtualMachineSshSettings,

232

**kwargs

233

):

234

"""

235

Virtual machine compute for custom VM environments.

236

237

Parameters:

238

- name: Compute target name

239

- resource_id: Azure resource ID of the VM

240

- ssh_settings: SSH connection settings

241

"""

242

243

class VirtualMachineSshSettings:

244

def __init__(

245

self,

246

*,

247

username: str,

248

password: str = None,

249

private_key_file: str = None,

250

public_key_file: str = None,

251

port: int = 22

252

):

253

"""

254

SSH settings for virtual machine compute.

255

256

Parameters:

257

- username: SSH username

258

- password: SSH password (if using password auth)

259

- private_key_file: Path to private key file

260

- public_key_file: Path to public key file

261

- port: SSH port number

262

"""

263

```

264

265

### Synapse Spark Compute

266

267

Integration with Azure Synapse Analytics for big data processing.

268

269

```python { .api }

270

class SynapseSparkCompute:

271

def __init__(

272

self,

273

*,

274

name: str,

275

resource_id: str,

276

identity: IdentityConfiguration = None,

277

auto_scale_settings: AutoScaleSettings = None,

278

auto_pause_settings: AutoPauseSettings = None,

279

**kwargs

280

):

281

"""

282

Synapse Spark compute for big data processing.

283

284

Parameters:

285

- name: Compute target name

286

- resource_id: Synapse workspace resource ID

287

- identity: Managed identity configuration

288

- auto_scale_settings: Auto-scaling configuration

289

- auto_pause_settings: Auto-pause configuration

290

"""

291

292

class AutoScaleSettings:

293

def __init__(

294

self,

295

*,

296

min_node_count: int,

297

max_node_count: int,

298

enabled: bool = True

299

):

300

"""

301

Auto-scaling settings for Synapse Spark.

302

303

Parameters:

304

- min_node_count: Minimum number of nodes

305

- max_node_count: Maximum number of nodes

306

- enabled: Enable auto-scaling

307

"""

308

309

class AutoPauseSettings:

310

def __init__(

311

self,

312

*,

313

delay_in_minutes: int,

314

enabled: bool = True

315

):

316

"""

317

Auto-pause settings for Synapse Spark.

318

319

Parameters:

320

- delay_in_minutes: Delay before pausing in minutes

321

- enabled: Enable auto-pause

322

"""

323

```

324

325

### Network Settings

326

327

Virtual network configuration for compute resources.

328

329

```python { .api }

330

class NetworkSettings:

331

def __init__(

332

self,

333

*,

334

vnet_name: str = None,

335

subnet: str = None

336

):

337

"""

338

Virtual network settings for compute.

339

340

Parameters:

341

- vnet_name: Virtual network name

342

- subnet: Subnet name or resource ID

343

"""

344

```

345

346

### Compute Operations

347

348

Operations for managing compute resources through the MLClient.

349

350

```python { .api }

351

class ComputeOperations:

352

def begin_create_or_update(self, compute: Compute) -> LROPoller: ...

353

def get(self, name: str) -> Compute: ...

354

def list(self) -> list: ...

355

def begin_delete(self, name: str) -> LROPoller: ...

356

def begin_start(self, name: str) -> LROPoller: ...

357

def begin_stop(self, name: str) -> LROPoller: ...

358

def begin_restart(self, name: str) -> LROPoller: ...

359

def list_sizes(self, location: str = None) -> list: ...

360

def list_usage(self, location: str) -> list: ...

361

```

362

363

#### Usage Example

364

365

```python

366

# List all compute targets

367

compute_targets = ml_client.compute.list()

368

for compute in compute_targets:

369

print(f"Compute: {compute.name}, Type: {compute.type}, State: {compute.provisioning_state}")

370

371

# Get specific compute target

372

compute = ml_client.compute.get("cpu-cluster")

373

print(f"Compute size: {compute.size}")

374

print(f"Max instances: {compute.max_instances}")

375

376

# Start a compute instance

377

ml_client.compute.begin_start("my-compute-instance").result()

378

379

# Stop a compute instance

380

ml_client.compute.begin_stop("my-compute-instance").result()

381

382

# List available VM sizes

383

sizes = ml_client.compute.list_sizes(location="eastus")

384

for size in sizes:

385

print(f"Size: {size.name}, vCPUs: {size.v_cpus}, Memory: {size.memory_gb}GB")

386

```

387

388

### Custom Applications

389

390

Custom applications for compute instances.

391

392

```python { .api }

393

class CustomApplications:

394

def __init__(

395

self,

396

*,

397

image: ImageSettings,

398

endpoint: EndpointsSettings,

399

volumes: list = None

400

):

401

"""

402

Custom application configuration for compute instances.

403

404

Parameters:

405

- image: Docker image settings

406

- endpoint: Endpoint configuration

407

- volumes: Volume mount settings

408

"""

409

410

class ImageSettings:

411

def __init__(

412

self,

413

*,

414

reference: str,

415

type: str = "docker"

416

):

417

"""

418

Docker image settings.

419

420

Parameters:

421

- reference: Docker image reference

422

- type: Image type ("docker")

423

"""

424

425

class EndpointsSettings:

426

def __init__(

427

self,

428

*,

429

target: int,

430

published: int,

431

protocol: str = "tcp"

432

):

433

"""

434

Endpoint configuration for custom applications.

435

436

Parameters:

437

- target: Target port in container

438

- published: Published port on host

439

- protocol: Network protocol ("tcp", "udp")

440

"""

441

```

442

443

### Setup Scripts

444

445

Startup scripts for compute instances and clusters.

446

447

```python { .api }

448

class SetupScripts:

449

def __init__(

450

self,

451

*,

452

creation_script: ScriptReference = None,

453

startup_script: ScriptReference = None

454

):

455

"""

456

Setup scripts for compute resources.

457

458

Parameters:

459

- creation_script: Script to run on creation

460

- startup_script: Script to run on startup

461

"""

462

463

class ScriptReference:

464

def __init__(

465

self,

466

*,

467

script_source: str,

468

script_data: str = None,

469

script_arguments: str = None,

470

timeout: str = "30m"

471

):

472

"""

473

Reference to a setup script.

474

475

Parameters:

476

- script_source: Path to script file

477

- script_data: Inline script content

478

- script_arguments: Script arguments

479

- timeout: Script execution timeout

480

"""

481

```

482

483

#### Usage Example

484

485

```python

486

from azure.ai.ml.entities import ComputeInstance, SetupScripts, ScriptReference

487

488

# Setup script to install additional packages

489

setup_script = SetupScripts(

490

startup_script=ScriptReference(

491

script_source="./scripts/setup.sh",

492

script_arguments="--install-packages",

493

timeout="10m"

494

)

495

)

496

497

# Compute instance with setup script

498

compute_instance = ComputeInstance(

499

name="instance-with-setup",

500

size="Standard_DS3_v2",

501

setup_scripts=setup_script

502

)

503

504

ml_client.compute.begin_create_or_update(compute_instance).result()

505

```