or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-distributed.mddata-processing.mddistributed-training.mdhyperparameter-tuning.mdindex.mdmodel-serving.mdreinforcement-learning.mdutilities-advanced.md

core-distributed.mddocs/

0

# Core Distributed Computing

1

2

Core Ray functionality for distributed task execution, actor management, and object storage. This provides the foundation for all Ray applications, enabling parallel and distributed execution of Python code with minimal modifications.

3

4

## Capabilities

5

6

### Ray Initialization and Cluster Management

7

8

Initialize Ray runtime and manage cluster connections.

9

10

```python { .api }

11

def init(address=None, num_cpus=None, num_gpus=None, memory=None,

12

object_store_memory=None, resources=None, runtime_env=None,

13

logging_level="INFO", log_to_driver=True, configure_logging=True,

14

**kwargs):

15

"""

16

Initialize Ray runtime.

17

18

Args:

19

address (str, optional): Ray cluster address to connect to

20

num_cpus (int, optional): Number of CPUs to use

21

num_gpus (int, optional): Number of GPUs to use

22

memory (int, optional): Amount of memory to use in bytes

23

object_store_memory (int, optional): Object store memory in bytes

24

resources (dict, optional): Custom resources dictionary

25

runtime_env (dict, optional): Runtime environment configuration

26

logging_level (str): Logging level ("DEBUG", "INFO", "WARNING", "ERROR")

27

log_to_driver (bool): Whether to log to driver

28

configure_logging (bool): Whether to configure logging

29

30

Returns:

31

ray.ClientContext: Ray context information

32

"""

33

34

def is_initialized():

35

"""Check if Ray is initialized."""

36

37

def shutdown():

38

"""Shutdown Ray runtime and cleanup resources."""

39

40

def get_runtime_context():

41

"""Get current Ray runtime context information."""

42

```

43

44

### Remote Task Execution

45

46

Execute functions remotely as distributed tasks.

47

48

```python { .api }

49

def remote(num_cpus=None, num_gpus=None, memory=None, resources=None,

50

max_calls=None, max_retries=3, retry_exceptions=False,

51

runtime_env=None, **kwargs):

52

"""

53

Decorator to make functions executable as Ray tasks.

54

55

Args:

56

num_cpus (float, optional): Number of CPUs required

57

num_gpus (float, optional): Number of GPUs required

58

memory (int, optional): Memory required in MB

59

resources (dict, optional): Custom resources required

60

max_calls (int, optional): Maximum calls before actor restart

61

max_retries (int): Maximum number of retries on failure

62

retry_exceptions (bool/list): Exceptions to retry on

63

runtime_env (dict, optional): Runtime environment

64

65

Returns:

66

RemoteFunction: Ray remote function

67

"""

68

```

69

70

### Object Store Management

71

72

Manage objects in Ray's distributed object store.

73

74

```python { .api }

75

def put(value, _owner=None):

76

"""

77

Store object in Ray object store.

78

79

Args:

80

value: Object to store

81

_owner (ActorHandle, optional): Actor that owns this object

82

83

Returns:

84

ObjectRef: Reference to stored object

85

"""

86

87

def get(object_refs, timeout=None):

88

"""

89

Retrieve objects from object store.

90

91

Args:

92

object_refs (ObjectRef/list): Object references to retrieve

93

timeout (float, optional): Timeout in seconds

94

95

Returns:

96

Object or list of objects

97

"""

98

99

def wait(object_refs, num_returns=1, timeout=None, fetch_local=True):

100

"""

101

Wait for subset of object references to become ready.

102

103

Args:

104

object_refs (list): List of object references

105

num_returns (int): Number of objects to wait for

106

timeout (float, optional): Timeout in seconds

107

fetch_local (bool): Whether to fetch objects locally

108

109

Returns:

110

tuple: (ready_refs, remaining_refs)

111

"""

112

```

113

114

### Actor Management

115

116

Create and manage stateful distributed actors.

117

118

```python { .api }

119

def get_actor(name, namespace=None):

120

"""

121

Get handle to named actor.

122

123

Args:

124

name (str): Actor name

125

namespace (str, optional): Actor namespace

126

127

Returns:

128

ActorHandle: Handle to the actor

129

"""

130

131

def kill(actor, no_restart=True):

132

"""

133

Kill an actor, task, or placement group.

134

135

Args:

136

actor: Actor handle, task, or placement group to kill

137

no_restart (bool): Whether to prevent actor restart

138

"""

139

140

def cancel(object_ref, force=False, recursive=True):

141

"""

142

Cancel task execution.

143

144

Args:

145

object_ref (ObjectRef): Task to cancel

146

force (bool): Force cancellation

147

recursive (bool): Cancel dependent tasks

148

149

Returns:

150

bool: True if task was cancelled

151

"""

152

```

153

154

### Cluster State and Resources

155

156

Monitor cluster state and resource usage.

157

158

```python { .api }

159

def cluster_resources():

160

"""

161

Get total cluster resources.

162

163

Returns:

164

dict: Resource counts by type

165

"""

166

167

def available_resources():

168

"""

169

Get available cluster resources.

170

171

Returns:

172

dict: Available resource counts by type

173

"""

174

175

def nodes():

176

"""

177

Get information about cluster nodes.

178

179

Returns:

180

list: List of node information dictionaries

181

"""

182

183

def timeline():

184

"""

185

Get Ray timeline for profiling and debugging.

186

187

Returns:

188

list: Timeline events

189

"""

190

191

def get_gpu_ids():

192

"""

193

Get GPU IDs visible to current worker.

194

195

Returns:

196

list: List of GPU IDs

197

"""

198

```

199

200

### Cross-Language Support

201

202

Support for Java and C++ integration.

203

204

```python { .api }

205

def java_function(class_name: str, function_name: str):

206

"""

207

Create Ray remote function from Java class.

208

209

Args:

210

class_name (str): Java class name

211

function_name (str): Java function name

212

213

Returns:

214

JavaFunction: Ray Java function

215

"""

216

217

def cpp_function(function_name: str):

218

"""

219

Create Ray remote function from C++ function.

220

221

Args:

222

function_name (str): C++ function name

223

224

Returns:

225

CppFunction: Ray C++ function

226

"""

227

228

def java_actor_class(class_name):

229

"""

230

Create Ray actor class from Java class.

231

232

Args:

233

class_name (str): Java class name

234

235

Returns:

236

JavaActorClass: Ray Java actor class

237

"""

238

```

239

240

### Dashboard and Debugging

241

242

Display information in Ray dashboard and debugging utilities.

243

244

```python { .api }

245

def show_in_dashboard(message: str, key: str = "", dtype: str = "text"):

246

"""

247

Display message in Ray dashboard.

248

249

Args:

250

message (str): Message to be displayed

251

key (str): Key name for the message

252

dtype (str): Message type ("text" or "html")

253

"""

254

```

255

256

### Runtime Configuration

257

258

Configure Ray runtime behavior.

259

260

```python { .api }

261

class LoggingConfig:

262

"""Ray logging configuration."""

263

264

def __init__(self, encoding="TEXT", log_level="INFO", logs_dir=None,

265

enable_default_setup=True):

266

"""

267

Initialize logging configuration.

268

269

Args:

270

encoding (str): Log encoding ("TEXT" or "JSON")

271

log_level (str): Log level

272

logs_dir (str, optional): Directory for log files

273

enable_default_setup (bool): Enable default logging setup

274

"""

275

```

276

277

## Types

278

279

```python { .api }

280

# Core Object Types

281

class ObjectRef:

282

"""Reference to object in Ray object store."""

283

284

class ObjectRefGenerator:

285

"""Generator for streaming object references."""

286

287

class DynamicObjectRefGenerator:

288

"""Dynamic generator for object references."""

289

290

# ID Types

291

class ActorID:

292

"""Unique identifier for Ray actor."""

293

294

class TaskID:

295

"""Unique identifier for Ray task."""

296

297

class JobID:

298

"""Unique identifier for Ray job."""

299

300

class NodeID:

301

"""Unique identifier for Ray node."""

302

303

class WorkerID:

304

"""Unique identifier for Ray worker."""

305

306

class FunctionID:

307

"""Unique identifier for Ray function."""

308

309

class UniqueID:

310

"""Base class for Ray unique identifiers."""

311

312

class PlacementGroupID:

313

"""Unique identifier for placement group."""

314

315

class ClusterID:

316

"""Unique identifier for Ray cluster."""

317

318

class ActorClassID:

319

"""Unique identifier for Ray actor class."""

320

321

# Language Support

322

class Language:

323

"""Supported programming languages in Ray."""

324

PYTHON = "PYTHON"

325

JAVA = "JAVA"

326

CPP = "CPP"

327

328

# Mode Constants

329

LOCAL_MODE = "LOCAL_MODE"

330

SCRIPT_MODE = "SCRIPT_MODE"

331

WORKER_MODE = "WORKER_MODE"

332

```

333

334

## Usage Examples

335

336

### Basic Task Execution

337

338

```python

339

import ray

340

341

ray.init()

342

343

@ray.remote

344

def square(x):

345

return x * x

346

347

# Execute task

348

future = square.remote(4)

349

result = ray.get(future)

350

print(result) # 16

351

352

ray.shutdown()

353

```

354

355

### Actor Usage

356

357

```python

358

import ray

359

360

ray.init()

361

362

@ray.remote

363

class Counter:

364

def __init__(self):

365

self.count = 0

366

367

def increment(self):

368

self.count += 1

369

return self.count

370

371

def get_count(self):

372

return self.count

373

374

# Create actor

375

counter = Counter.remote()

376

377

# Call actor methods

378

ray.get(counter.increment.remote())

379

count = ray.get(counter.get_count.remote())

380

print(count) # 1

381

382

ray.shutdown()

383

```

384

385

### Parallel Processing

386

387

```python

388

import ray

389

import time

390

391

ray.init()

392

393

@ray.remote

394

def slow_function(i):

395

time.sleep(1)

396

return i * i

397

398

# Execute in parallel

399

futures = [slow_function.remote(i) for i in range(4)]

400

results = ray.get(futures)

401

print(results) # [0, 1, 4, 9]

402

403

ray.shutdown()

404

```