or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdendpoint-management.mdindex.mdpod-management.mdserverless-worker.md

pod-management.mddocs/

0

# Pod Management

1

2

Comprehensive pod lifecycle management for creating, monitoring, controlling, and cleaning up GPU and CPU cloud instances on the RunPod platform. Includes template management, container registry authentication, and hardware discovery capabilities.

3

4

## Capabilities

5

6

### Pod Lifecycle Management

7

8

Complete pod lifecycle operations from creation through termination, with support for various cloud types, GPU configurations, and persistent storage options.

9

10

```python { .api }

11

def create_pod(

12

name: str,

13

image_name: str,

14

gpu_type_id: str = None,

15

cloud_type: str = "ALL",

16

support_public_ip: bool = True,

17

start_ssh: bool = True,

18

data_center_id: str = None,

19

country_code: str = None,

20

gpu_count: int = 1,

21

volume_in_gb: int = 0,

22

container_disk_in_gb: int = None,

23

min_vcpu_count: int = 1,

24

min_memory_in_gb: int = 1,

25

docker_args: str = "",

26

ports: str = None,

27

volume_mount_path: str = "/runpod-volume",

28

env: dict = None,

29

template_id: str = None,

30

network_volume_id: str = None,

31

allowed_cuda_versions: list = None,

32

min_download: int = None,

33

min_upload: int = None,

34

instance_id: str = None

35

) -> dict:

36

"""

37

Create a new GPU or CPU pod instance.

38

39

Parameters:

40

- name: Display name for the pod

41

- image_name: Docker image to run (e.g., "runpod/pytorch:1.13.1-py3.10-cuda11.8.0-devel-ubuntu22.04")

42

- gpu_type_id: GPU type identifier (e.g., "NVIDIA GeForce RTX 3070"), optional for CPU pods

43

- cloud_type: Cloud type ("ALL", "SECURE", "COMMUNITY")

44

- support_public_ip: Whether to assign public IP

45

- start_ssh: Whether to enable SSH access

46

- data_center_id: Specific data center to deploy in

47

- country_code: Country code preference for deployment

48

- gpu_count: Number of GPUs to allocate

49

- volume_in_gb: Persistent volume size in GB (0 for no volume)

50

- container_disk_in_gb: Container disk size in GB (None for default)

51

- min_vcpu_count: Minimum CPU cores required

52

- min_memory_in_gb: Minimum RAM in GB required

53

- docker_args: Additional Docker arguments

54

- ports: Port mapping configuration (e.g., "8888/http,22/tcp")

55

- volume_mount_path: Where to mount the persistent volume

56

- env: Environment variables dictionary

57

- template_id: Pod template ID to use

58

- network_volume_id: Network volume ID to attach

59

- allowed_cuda_versions: List of allowed CUDA versions

60

- min_download: Minimum download speed requirement (Mbps)

61

- min_upload: Minimum upload speed requirement (Mbps)

62

- instance_id: Specific instance ID to use

63

64

Returns:

65

dict: Pod creation response with pod ID and configuration

66

"""

67

68

def get_pod(pod_id: str) -> dict:

69

"""

70

Get details of a specific pod by ID.

71

72

Parameters:

73

- pod_id: Unique pod identifier

74

75

Returns:

76

dict: Pod details including status, configuration, and runtime info

77

"""

78

79

def get_pods() -> list:

80

"""

81

Get list of all user's pods.

82

83

Returns:

84

list: List of pod dictionaries with basic information

85

"""

86

87

def stop_pod(pod_id: str) -> dict:

88

"""

89

Stop a running pod without terminating it.

90

91

Parameters:

92

- pod_id: Pod identifier to stop

93

94

Returns:

95

dict: Operation result with updated pod status

96

"""

97

98

def resume_pod(pod_id: str) -> dict:

99

"""

100

Resume a stopped pod.

101

102

Parameters:

103

- pod_id: Pod identifier to resume

104

105

Returns:

106

dict: Operation result with updated pod status

107

"""

108

109

def terminate_pod(pod_id: str) -> dict:

110

"""

111

Permanently terminate a pod and release all resources.

112

113

Parameters:

114

- pod_id: Pod identifier to terminate

115

116

Returns:

117

dict: Termination confirmation

118

"""

119

```

120

121

### GPU and Hardware Discovery

122

123

Retrieve information about available GPU types, pricing, and real-time availability across different cloud regions.

124

125

```python { .api }

126

def get_gpu(gpu_id: str) -> dict:

127

"""

128

Get details of a specific GPU type.

129

130

Parameters:

131

- gpu_id: GPU type identifier

132

133

Returns:

134

dict: GPU specifications including memory, compute capability, and pricing

135

"""

136

137

def get_gpus() -> list:

138

"""

139

Get list of all available GPU types.

140

141

Returns:

142

list: GPU type information including availability and pricing

143

"""

144

```

145

146

### Template Management

147

148

Create and manage pod templates for consistent deployments with predefined configurations.

149

150

```python { .api }

151

def create_template(

152

name: str,

153

image_name: str,

154

is_public: bool = False,

155

readme: str = None,

156

docker_args: str = None,

157

container_disk_in_gb: int = 10,

158

volume_in_gb: int = 0,

159

volume_mount_path: str = "/workspace",

160

ports: str = None,

161

env: dict = None,

162

start_jupyter: bool = True,

163

start_ssh: bool = True

164

) -> dict:

165

"""

166

Create a new pod template for reusable configurations.

167

168

Parameters:

169

- name: Template name

170

- image_name: Docker image for the template

171

- is_public: Whether template is publicly available

172

- readme: Template description and usage instructions

173

- docker_args: Docker run arguments

174

- container_disk_in_gb: Default container disk size

175

- volume_in_gb: Default persistent volume size

176

- volume_mount_path: Default volume mount path

177

- ports: Default port configuration

178

- env: Default environment variables

179

- start_jupyter: Default Jupyter server setting

180

- start_ssh: Default SSH access setting

181

182

Returns:

183

dict: Created template information

184

"""

185

```

186

187

### Container Registry Authentication

188

189

Manage authentication credentials for private container registries to use custom Docker images.

190

191

```python { .api }

192

def create_container_registry_auth(

193

name: str,

194

username: str,

195

password: str,

196

registry: str = "docker.io"

197

) -> dict:

198

"""

199

Create container registry authentication credentials.

200

201

Parameters:

202

- name: Friendly name for the auth configuration

203

- username: Registry username

204

- password: Registry password or access token

205

- registry: Registry URL (defaults to Docker Hub)

206

207

Returns:

208

dict: Created authentication configuration

209

"""

210

211

def update_container_registry_auth(

212

auth_id: str,

213

name: str = None,

214

username: str = None,

215

password: str = None,

216

registry: str = None

217

) -> dict:

218

"""

219

Update existing container registry authentication.

220

221

Parameters:

222

- auth_id: Authentication configuration ID

223

- name: New friendly name (optional)

224

- username: New username (optional)

225

- password: New password (optional)

226

- registry: New registry URL (optional)

227

228

Returns:

229

dict: Updated authentication configuration

230

"""

231

232

def delete_container_registry_auth(auth_id: str) -> dict:

233

"""

234

Delete container registry authentication credentials.

235

236

Parameters:

237

- auth_id: Authentication configuration ID to delete

238

239

Returns:

240

dict: Deletion confirmation

241

"""

242

```

243

244

## Usage Examples

245

246

### Creating a Basic GPU Pod

247

248

```python

249

import runpod

250

251

# Set credentials

252

runpod.set_credentials("your-api-key")

253

254

# Create a PyTorch GPU pod

255

pod = runpod.create_pod(

256

name="pytorch-training",

257

image_name="runpod/pytorch:1.13.1-py3.10-cuda11.8.0-devel-ubuntu22.04",

258

gpu_type_id="NVIDIA GeForce RTX 3070",

259

cloud_type="SECURE",

260

container_disk_in_gb=20,

261

volume_in_gb=50,

262

env={"WANDB_API_KEY": "your-wandb-key"}

263

)

264

265

print(f"Pod created: {pod['id']}")

266

```

267

268

### Managing Pod Lifecycle

269

270

```python

271

import runpod

272

import time

273

274

# Get pod details

275

pod_info = runpod.get_pod("your-pod-id")

276

print(f"Pod status: {pod_info['desiredStatus']}")

277

278

# Stop pod temporarily

279

runpod.stop_pod("your-pod-id")

280

time.sleep(30) # Wait for stop to complete

281

282

# Resume pod

283

runpod.resume_pod("your-pod-id")

284

285

# List all pods

286

pods = runpod.get_pods()

287

for pod in pods:

288

print(f"{pod['name']}: {pod['desiredStatus']}")

289

290

# Terminate when done

291

runpod.terminate_pod("your-pod-id")

292

```

293

294

### Using Templates

295

296

```python

297

import runpod

298

299

# Create a reusable template

300

template = runpod.create_template(

301

name="ml-training-template",

302

image_name="runpod/pytorch:latest",

303

container_disk_in_gb=30,

304

volume_in_gb=100,

305

env={"CUDA_VISIBLE_DEVICES": "0"},

306

start_jupyter=True,

307

readme="Template for ML training with PyTorch"

308

)

309

310

# Use template to create pod

311

pod = runpod.create_pod(

312

name="training-session-1",

313

image_name="placeholder", # Will be overridden by template

314

gpu_type_id="NVIDIA GeForce RTX 3070",

315

template_id=template['id']

316

)

317

```

318

319

### Private Registry Authentication

320

321

```python

322

import runpod

323

324

# Set up private registry auth

325

auth = runpod.create_container_registry_auth(

326

name="my-private-registry",

327

username="myuser",

328

password="mytoken",

329

registry="registry.company.com"

330

)

331

332

# Create pod with private image

333

pod = runpod.create_pod(

334

name="private-image-pod",

335

image_name="registry.company.com/myuser/custom-ml-image:latest",

336

gpu_type_id="NVIDIA GeForce RTX 3070"

337

)

338

```