or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

converters.mdindex.mdinference.mdspecialized.mdspecifications.mdutilities.md

utilities.mddocs/

0

# Utilities and Configuration

1

2

Helper functions for model management, device configuration, logging, and tensor operations. CTranslate2 provides utilities for checking model compatibility, managing computational resources, and working with multi-dimensional arrays efficiently.

3

4

## Capabilities

5

6

### Model Management

7

8

Utilities for checking and managing CTranslate2 model directories and compatibility.

9

10

```python { .api }

11

def contains_model(path: str) -> bool:

12

"""

13

Check if a directory contains a valid CTranslate2 model.

14

15

Args:

16

path (str): Path to directory to check

17

18

Returns:

19

bool: True if directory contains a valid CTranslate2 model

20

"""

21

```

22

23

### Device and Hardware Management

24

25

Functions for managing computational resources and checking hardware capabilities.

26

27

```python { .api }

28

def get_cuda_device_count() -> int:

29

"""

30

Get the number of available CUDA devices.

31

32

Returns:

33

int: Number of CUDA devices available

34

"""

35

36

def get_supported_compute_types(device: str, device_index: int = 0) -> list:

37

"""

38

Get supported compute types for a specific device.

39

40

Args:

41

device (str): Device type ("cpu" or "cuda")

42

device_index (int): Device index for multi-device setups

43

44

Returns:

45

list: List of supported compute types for the device

46

"""

47

```

48

49

### Random Number Generation

50

51

Control random number generation for reproducible results.

52

53

```python { .api }

54

def set_random_seed(seed: int):

55

"""

56

Set random seed for reproducible inference.

57

58

Args:

59

seed (int): Random seed value

60

"""

61

```

62

63

### Logging Configuration

64

65

Functions for configuring CTranslate2 logging levels and output.

66

67

```python { .api }

68

def get_log_level() -> str:

69

"""

70

Get current logging level.

71

72

Returns:

73

str: Current log level ("trace", "debug", "info", "warning", "error", "critical", "off")

74

"""

75

76

def set_log_level(level: str):

77

"""

78

Set logging level for CTranslate2.

79

80

Args:

81

level (str): Log level to set ("trace", "debug", "info", "warning", "error", "critical", "off")

82

"""

83

```

84

85

### Tensor Operations

86

87

The `StorageView` class provides efficient multi-dimensional array operations with device management.

88

89

```python { .api }

90

class StorageView:

91

def __init__(self, array=None, dtype=None):

92

"""

93

Initialize a StorageView for efficient tensor operations.

94

95

Args:

96

array: Input array data (numpy array, list, etc.)

97

dtype: Data type for the storage ("float32", "float16", "int32", "int16", "int8")

98

"""

99

100

def numpy(self):

101

"""

102

Convert StorageView to NumPy array.

103

104

Returns:

105

numpy.ndarray: NumPy array representation

106

"""

107

108

def copy(self) -> 'StorageView':

109

"""

110

Create a copy of the StorageView.

111

112

Returns:

113

StorageView: Copied StorageView object

114

"""

115

116

def to(self, dtype: str) -> 'StorageView':

117

"""

118

Convert StorageView to different data type.

119

120

Args:

121

dtype (str): Target data type

122

123

Returns:

124

StorageView: New StorageView with converted data type

125

"""

126

127

@property

128

def device(self) -> str:

129

"""Device where the storage is located."""

130

131

@property

132

def device_index(self) -> int:

133

"""Device index for multi-device setups."""

134

135

@property

136

def dtype(self) -> str:

137

"""Data type of the stored elements."""

138

139

@property

140

def shape(self) -> tuple:

141

"""Shape of the multi-dimensional array."""

142

143

@property

144

def size(self) -> int:

145

"""Total number of elements in the array."""

146

147

@property

148

def rank(self) -> int:

149

"""Number of dimensions in the array."""

150

```

151

152

### Execution Statistics

153

154

Monitor performance and resource usage during model inference.

155

156

```python { .api }

157

class ExecutionStats:

158

"""Statistics from model execution."""

159

160

@property

161

def num_tokens(self) -> int:

162

"""Total number of tokens processed."""

163

164

@property

165

def num_examples(self) -> int:

166

"""Total number of examples processed."""

167

168

@property

169

def total_time_in_ms(self) -> float:

170

"""Total execution time in milliseconds."""

171

```

172

173

### Multi-Process Information

174

175

Information about distributed processing setups.

176

177

```python { .api }

178

class MpiInfo:

179

"""MPI (Message Passing Interface) information."""

180

181

@property

182

def rank(self) -> int:

183

"""Current process rank in MPI setup."""

184

185

@property

186

def size(self) -> int:

187

"""Total number of processes in MPI setup."""

188

```

189

190

### Data Type and Device Enumerations

191

192

Constants for specifying data types and devices.

193

194

```python { .api }

195

class DataType:

196

"""Data type constants for StorageView and model operations."""

197

FLOAT32: str = "float32"

198

FLOAT16: str = "float16"

199

BFLOAT16: str = "bfloat16"

200

INT32: str = "int32"

201

INT16: str = "int16"

202

INT8: str = "int8"

203

204

class Device:

205

"""Device constants for model placement."""

206

CPU: str = "cpu"

207

CUDA: str = "cuda"

208

AUTO: str = "auto"

209

```

210

211

## Usage Examples

212

213

### Model Directory Validation

214

215

```python

216

import ctranslate2

217

218

# Check if directory contains valid model

219

model_path = "path/to/potential/model"

220

if ctranslate2.contains_model(model_path):

221

print("Valid CTranslate2 model found")

222

translator = ctranslate2.Translator(model_path)

223

else:

224

print("No valid model found in directory")

225

```

226

227

### Device Management

228

229

```python

230

import ctranslate2

231

232

# Check available CUDA devices

233

cuda_count = ctranslate2.get_cuda_device_count()

234

print(f"Available CUDA devices: {cuda_count}")

235

236

if cuda_count > 0:

237

# Check supported compute types for GPU

238

gpu_compute_types = ctranslate2.get_supported_compute_types("cuda", 0)

239

print(f"GPU compute types: {gpu_compute_types}")

240

241

# Use optimal compute type

242

if "int8" in gpu_compute_types:

243

translator = ctranslate2.Translator(

244

"model_path",

245

device="cuda",

246

compute_type="int8"

247

)

248

249

# Check supported compute types for CPU

250

cpu_compute_types = ctranslate2.get_supported_compute_types("cpu")

251

print(f"CPU compute types: {cpu_compute_types}")

252

```

253

254

### Reproducible Results

255

256

```python

257

import ctranslate2

258

259

# Set seed for reproducible inference

260

ctranslate2.set_random_seed(42)

261

262

# Now all inference will be deterministic

263

generator = ctranslate2.Generator("model_path", device="cpu")

264

results1 = generator.generate_batch([["Hello"]], sampling_temperature=0.8)

265

266

# Reset seed and run again - should get same results

267

ctranslate2.set_random_seed(42)

268

results2 = generator.generate_batch([["Hello"]], sampling_temperature=0.8)

269

270

assert results1[0].sequences == results2[0].sequences

271

```

272

273

### Logging Configuration

274

275

```python

276

import ctranslate2

277

278

# Set logging level to see detailed information

279

ctranslate2.set_log_level("debug")

280

281

# Load model with debug logging

282

translator = ctranslate2.Translator("model_path", device="cpu")

283

284

# Get current log level

285

current_level = ctranslate2.get_log_level()

286

print(f"Current log level: {current_level}")

287

288

# Reduce logging for production

289

ctranslate2.set_log_level("warning")

290

```

291

292

### Working with StorageView

293

294

```python

295

import ctranslate2

296

import numpy as np

297

298

# Create StorageView from numpy array

299

np_array = np.random.randn(3, 4).astype(np.float32)

300

storage = ctranslate2.StorageView(np_array)

301

302

print(f"Shape: {storage.shape}")

303

print(f"Size: {storage.size}")

304

print(f"Data type: {storage.dtype}")

305

print(f"Device: {storage.device}")

306

print(f"Rank: {storage.rank}")

307

308

# Convert to different data type

309

storage_fp16 = storage.to("float16")

310

print(f"New data type: {storage_fp16.dtype}")

311

312

# Convert back to numpy

313

np_result = storage_fp16.numpy()

314

print(f"Result shape: {np_result.shape}")

315

316

# Create copy

317

storage_copy = storage.copy()

318

print(f"Copy device: {storage_copy.device}")

319

```

320

321

### Performance Monitoring

322

323

```python

324

import ctranslate2

325

326

# Create translator with statistics enabled

327

translator = ctranslate2.Translator("model_path", device="cpu")

328

329

# Perform translation

330

source = [["Hello", "world"] for _ in range(100)]

331

results = translator.translate_batch(source)

332

333

# Note: ExecutionStats would be available through specific API calls

334

# or integrated profiling tools (implementation-specific)

335

```

336

337

### Multi-GPU Setup

338

339

```python

340

import ctranslate2

341

342

# Check available devices

343

cuda_count = ctranslate2.get_cuda_device_count()

344

345

if cuda_count >= 2:

346

# Use specific GPU device

347

translator_gpu0 = ctranslate2.Translator(

348

"model_path",

349

device="cuda",

350

device_index=0

351

)

352

353

translator_gpu1 = ctranslate2.Translator(

354

"model_path",

355

device="cuda",

356

device_index=1

357

)

358

359

# Or use multiple devices with tensor parallelism

360

translator_parallel = ctranslate2.Translator(

361

"model_path",

362

device="cuda",

363

device_index=[0, 1], # Use both GPUs

364

tensor_parallel=True

365

)

366

```

367

368

### Advanced Configuration

369

370

```python

371

import ctranslate2

372

373

# Configure for optimal performance

374

ctranslate2.set_log_level("warning") # Reduce logging overhead

375

ctranslate2.set_random_seed(42) # Reproducible results

376

377

# Check optimal compute type for device

378

device = "cuda" if ctranslate2.get_cuda_device_count() > 0 else "cpu"

379

compute_types = ctranslate2.get_supported_compute_types(device)

380

381

# Select best compute type (prefer quantized for speed)

382

compute_type = "int8" if "int8" in compute_types else "default"

383

384

# Create optimized model instance

385

model = ctranslate2.Translator(

386

"model_path",

387

device=device,

388

compute_type=compute_type,

389

inter_threads=4, # Parallel processing

390

max_queued_batches=16, # Larger batch queue

391

flash_attention=True # Enable Flash Attention if available

392

)

393

394

print(f"Model loaded on {model.device} with {model.compute_type} precision")

395

```

396

397

## Types

398

399

```python { .api }

400

# Core utility types

401

class StorageView:

402

"""Multi-dimensional array container for efficient tensor operations."""

403

device: str # Device location ("cpu", "cuda")

404

device_index: int # Device index for multi-device setups

405

dtype: str # Data type of elements

406

shape: tuple # Array dimensions

407

size: int # Total number of elements

408

rank: int # Number of dimensions

409

410

class ExecutionStats:

411

"""Performance statistics from model execution."""

412

num_tokens: int # Number of tokens processed

413

num_examples: int # Number of examples processed

414

total_time_in_ms: float # Total execution time

415

416

class MpiInfo:

417

"""Multi-process interface information."""

418

rank: int # Process rank in distributed setup

419

size: int # Total number of processes

420

421

# Enumeration classes

422

class DataType:

423

"""Available data types for tensors and computations."""

424

FLOAT32: str = "float32"

425

FLOAT16: str = "float16"

426

BFLOAT16: str = "bfloat16"

427

INT32: str = "int32"

428

INT16: str = "int16"

429

INT8: str = "int8"

430

431

class Device:

432

"""Available device types for model execution."""

433

CPU: str = "cpu"

434

CUDA: str = "cuda"

435

AUTO: str = "auto"

436

```