or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

converters.mdindex.mdinference.mdspecialized.mdspecifications.mdutilities.md

specifications.mddocs/

0

# Model Specifications

1

2

Programmatically define and build Transformer model architectures from scratch using CTranslate2's specification system. Model specifications enable creating custom models, modifying existing architectures, and building models without relying on external frameworks.

3

4

## Capabilities

5

6

### Base Model Specifications

7

8

Core specification classes that provide the foundation for building different types of Transformer models.

9

10

```python { .api }

11

class ModelSpec:

12

"""Abstract base class for all model specifications."""

13

14

def save(self, output_dir: str):

15

"""

16

Save the model specification to a directory.

17

18

Args:

19

output_dir (str): Directory to save the model

20

"""

21

22

def validate(self):

23

"""Validate the model specification for correctness."""

24

25

def optimize(self, quantization: str = None):

26

"""

27

Optimize model weights with optional quantization.

28

29

Args:

30

quantization (str): Quantization type ("int8", "float16", etc.)

31

"""

32

33

def register_file(self, path: str, filename: str = None):

34

"""

35

Register additional files to include with the model.

36

37

Args:

38

path (str): Path to the file to register

39

filename (str): Optional custom filename in model directory

40

"""

41

42

class LayerSpec:

43

"""Abstract base class for layer specifications."""

44

45

def variables(self, prefix: str = "", ordered: bool = False) -> dict:

46

"""

47

Get layer variables with optional prefix.

48

49

Args:

50

prefix (str): Prefix for variable names

51

ordered (bool): Whether to return ordered dictionary

52

53

Returns:

54

dict: Dictionary of layer variables

55

"""

56

57

def validate(self):

58

"""Validate the layer specification."""

59

60

class SequenceToSequenceModelSpec(ModelSpec):

61

"""Base class for sequence-to-sequence model specifications."""

62

63

def register_source_vocabulary(self, tokens: list):

64

"""

65

Register source vocabulary tokens.

66

67

Args:

68

tokens (list): List of source vocabulary tokens

69

"""

70

71

def register_target_vocabulary(self, tokens: list):

72

"""

73

Register target vocabulary tokens.

74

75

Args:

76

tokens (list): List of target vocabulary tokens

77

"""

78

79

def register_vocabulary_mapping(self, path: str):

80

"""

81

Register vocabulary mapping file.

82

83

Args:

84

path (str): Path to vocabulary mapping file

85

"""

86

87

class LanguageModelSpec(ModelSpec):

88

"""Base class for language model specifications."""

89

90

def register_vocabulary(self, tokens: list):

91

"""

92

Register vocabulary tokens.

93

94

Args:

95

tokens (list): List of vocabulary tokens

96

"""

97

```

98

99

### Transformer Model Specifications

100

101

Specific implementations for different Transformer model architectures.

102

103

```python { .api }

104

class TransformerSpec(SequenceToSequenceModelSpec):

105

"""Specification for sequence-to-sequence Transformer models."""

106

107

def __init__(self, encoder: 'TransformerEncoderSpec', decoder: 'TransformerDecoderSpec'):

108

"""

109

Initialize Transformer specification.

110

111

Args:

112

encoder (TransformerEncoderSpec): Encoder specification

113

decoder (TransformerDecoderSpec): Decoder specification

114

"""

115

116

@classmethod

117

def from_config(cls, num_layers: int, num_heads: int,

118

d_model: int = 512, d_ff: int = 2048, **kwargs):

119

"""

120

Create Transformer specification from configuration.

121

122

Args:

123

num_layers (int): Number of encoder/decoder layers

124

num_heads (int): Number of attention heads

125

d_model (int): Model dimension

126

d_ff (int): Feed-forward dimension

127

**kwargs: Additional configuration parameters

128

129

Returns:

130

TransformerSpec: Configured Transformer specification

131

"""

132

133

class TransformerDecoderModelSpec(LanguageModelSpec):

134

"""Specification for decoder-only Transformer models (GPT-style)."""

135

136

def __init__(self, decoder: 'TransformerDecoderSpec'):

137

"""

138

Initialize decoder-only Transformer specification.

139

140

Args:

141

decoder (TransformerDecoderSpec): Decoder specification

142

"""

143

144

@classmethod

145

def from_config(cls, num_layers: int, num_heads: int,

146

d_model: int = 512, vocab_size: int = 50257, **kwargs):

147

"""

148

Create decoder-only Transformer from configuration.

149

150

Args:

151

num_layers (int): Number of decoder layers

152

num_heads (int): Number of attention heads

153

d_model (int): Model dimension

154

vocab_size (int): Vocabulary size

155

**kwargs: Additional configuration parameters

156

157

Returns:

158

TransformerDecoderModelSpec: Configured decoder model

159

"""

160

161

class TransformerEncoderModelSpec(ModelSpec):

162

"""Specification for encoder-only Transformer models (BERT-style)."""

163

164

def __init__(self, encoder: 'TransformerEncoderSpec', pooling_layer: bool = False):

165

"""

166

Initialize encoder-only Transformer specification.

167

168

Args:

169

encoder (TransformerEncoderSpec): Encoder specification

170

pooling_layer (bool): Whether to include pooling layer

171

"""

172

```

173

174

### Transformer Layer Specifications

175

176

Detailed specifications for Transformer encoder and decoder layers.

177

178

```python { .api }

179

class TransformerEncoderSpec(LayerSpec):

180

"""Specification for Transformer encoder layers."""

181

182

def __init__(self, num_layers: int, num_heads: int,

183

pre_norm: bool = True, activation: str = "relu",

184

num_source_embeddings: int = None,

185

embeddings_merge: str = "concat",

186

layernorm_embedding: bool = False,

187

relative_position: bool = False,

188

relative_attention_bias: bool = False,

189

ffn_glu: bool = False, rms_norm: bool = False,

190

multi_query_attention: bool = False):

191

"""

192

Initialize Transformer encoder specification.

193

194

Args:

195

num_layers (int): Number of encoder layers

196

num_heads (int): Number of attention heads

197

pre_norm (bool): Whether to use pre-normalization

198

activation (str): Activation function ("relu", "gelu", etc.)

199

num_source_embeddings (int): Number of source embeddings

200

embeddings_merge (str): How to merge embeddings ("concat", "add")

201

layernorm_embedding (bool): Whether to normalize embeddings

202

relative_position (bool): Whether to use relative position

203

relative_attention_bias (bool): Whether to use attention bias

204

ffn_glu (bool): Whether to use GLU in feed-forward

205

rms_norm (bool): Whether to use RMS normalization

206

multi_query_attention (bool): Whether to use multi-query attention

207

"""

208

209

class TransformerDecoderSpec(LayerSpec):

210

"""Specification for Transformer decoder layers."""

211

212

def __init__(self, num_layers: int, num_heads: int,

213

pre_norm: bool = True, activation: str = "relu",

214

layernorm_embedding: bool = False,

215

with_encoder_attention: bool = True,

216

no_final_norm: bool = False,

217

project_in_out: bool = False,

218

relative_position: bool = False,

219

relative_attention_bias: bool = False,

220

alignment_layer: int = None,

221

alignment_heads: int = None,

222

ffn_glu: bool = False, rms_norm: bool = False,

223

alibi: bool = False,

224

alibi_use_positive_positions: bool = False,

225

scale_alibi: bool = False,

226

rotary_dim: int = None,

227

rotary_interleave: bool = True,

228

rotary_scaling_type: str = None,

229

rotary_scaling_factor: float = 1.0,

230

rotary_base: float = 10000.0,

231

parallel_residual: bool = False,

232

shared_layer_norm: bool = False,

233

pre_post_layer_norm: bool = False,

234

multi_query_attention: bool = False,

235

num_heads_kv: int = None,

236

head_dim: int = None,

237

sliding_window: int = None):

238

"""

239

Initialize Transformer decoder specification.

240

241

Args:

242

num_layers (int): Number of decoder layers

243

num_heads (int): Number of attention heads

244

pre_norm (bool): Whether to use pre-normalization

245

activation (str): Activation function

246

layernorm_embedding (bool): Whether to normalize embeddings

247

with_encoder_attention (bool): Whether to use encoder-decoder attention

248

no_final_norm (bool): Whether to skip final normalization

249

project_in_out (bool): Whether to project input/output

250

relative_position (bool): Whether to use relative position

251

relative_attention_bias (bool): Whether to use attention bias

252

alignment_layer (int): Layer for alignment attention

253

alignment_heads (int): Number of alignment heads

254

ffn_glu (bool): Whether to use GLU in feed-forward

255

rms_norm (bool): Whether to use RMS normalization

256

alibi (bool): Whether to use ALiBi position encoding

257

alibi_use_positive_positions (bool): Use positive positions in ALiBi

258

scale_alibi (bool): Whether to scale ALiBi

259

rotary_dim (int): Rotary embedding dimension

260

rotary_interleave (bool): Whether to interleave rotary embeddings

261

rotary_scaling_type (str): Type of rotary scaling

262

rotary_scaling_factor (float): Rotary scaling factor

263

rotary_base (float): Rotary base frequency

264

parallel_residual (bool): Whether to use parallel residual

265

shared_layer_norm (bool): Whether to share layer norm

266

pre_post_layer_norm (bool): Pre and post layer normalization

267

multi_query_attention (bool): Whether to use multi-query attention

268

num_heads_kv (int): Number of key-value heads

269

head_dim (int): Dimension per attention head

270

sliding_window (int): Sliding window size for attention

271

"""

272

```

273

274

### Common Layer Specifications

275

276

Building blocks for constructing Transformer architectures.

277

278

```python { .api }

279

class LayerNormSpec(LayerSpec):

280

"""Layer normalization specification."""

281

282

def __init__(self, normalized_shape: int, eps: float = 1e-5):

283

"""

284

Initialize layer normalization.

285

286

Args:

287

normalized_shape (int): Size of normalized dimensions

288

eps (float): Epsilon for numerical stability

289

"""

290

291

class LinearSpec(LayerSpec):

292

"""Linear/dense layer specification."""

293

294

def __init__(self, in_features: int, out_features: int, bias: bool = True):

295

"""

296

Initialize linear layer.

297

298

Args:

299

in_features (int): Input feature dimension

300

out_features (int): Output feature dimension

301

bias (bool): Whether to include bias term

302

"""

303

304

class Conv1DSpec(LayerSpec):

305

"""1D convolution layer specification."""

306

307

def __init__(self, in_channels: int, out_channels: int,

308

kernel_size: int, stride: int = 1, padding: int = 0):

309

"""

310

Initialize 1D convolution layer.

311

312

Args:

313

in_channels (int): Number of input channels

314

out_channels (int): Number of output channels

315

kernel_size (int): Convolution kernel size

316

stride (int): Convolution stride

317

padding (int): Convolution padding

318

"""

319

320

class EmbeddingsSpec(LayerSpec):

321

"""Embedding layer specification."""

322

323

def __init__(self, num_embeddings: int, embedding_dim: int,

324

padding_idx: int = None):

325

"""

326

Initialize embedding layer.

327

328

Args:

329

num_embeddings (int): Vocabulary size

330

embedding_dim (int): Embedding dimension

331

padding_idx (int): Index for padding token

332

"""

333

334

class MultiHeadAttentionSpec(LayerSpec):

335

"""Multi-head attention layer specification."""

336

337

def __init__(self, d_model: int, num_heads: int, dropout: float = 0.0):

338

"""

339

Initialize multi-head attention.

340

341

Args:

342

d_model (int): Model dimension

343

num_heads (int): Number of attention heads

344

dropout (float): Dropout probability

345

"""

346

```

347

348

### Configuration Classes

349

350

Configuration objects for different model types.

351

352

```python { .api }

353

class ModelConfig:

354

"""Base configuration class for models."""

355

356

def to_dict(self) -> dict:

357

"""Convert configuration to dictionary."""

358

359

def save_as_json(self, path: str):

360

"""

361

Save configuration as JSON file.

362

363

Args:

364

path (str): Path to save JSON file

365

"""

366

367

class SequenceToSequenceModelConfig(ModelConfig):

368

"""Configuration for sequence-to-sequence models."""

369

370

def __init__(self, unk_token: str = "<unk>", bos_token: str = "<s>",

371

eos_token: str = "</s>", decoder_start_token: str = None,

372

add_source_bos: bool = False, add_source_eos: bool = False):

373

"""

374

Initialize seq2seq model configuration.

375

376

Args:

377

unk_token (str): Unknown token

378

bos_token (str): Beginning of sequence token

379

eos_token (str): End of sequence token

380

decoder_start_token (str): Decoder start token

381

add_source_bos (bool): Add BOS to source sequences

382

add_source_eos (bool): Add EOS to source sequences

383

"""

384

385

class LanguageModelConfig(ModelConfig):

386

"""Configuration for language models."""

387

388

def __init__(self, unk_token: str = "<unk>", bos_token: str = "<s>",

389

eos_token: str = "</s>"):

390

"""

391

Initialize language model configuration.

392

393

Args:

394

unk_token (str): Unknown token

395

bos_token (str): Beginning of sequence token

396

eos_token (str): End of sequence token

397

"""

398

```

399

400

### Specialized Model Specifications

401

402

Specifications for domain-specific models like Whisper and Wav2Vec2.

403

404

```python { .api }

405

class WhisperSpec(ModelSpec):

406

"""Specification for Whisper speech recognition models."""

407

408

def __init__(self, num_encoder_layers: int, num_encoder_heads: int,

409

num_decoder_layers: int, num_decoder_heads: int,

410

d_model: int = 512, vocab_size: int = 51865):

411

"""

412

Initialize Whisper specification.

413

414

Args:

415

num_encoder_layers (int): Number of encoder layers

416

num_encoder_heads (int): Number of encoder attention heads

417

num_decoder_layers (int): Number of decoder layers

418

num_decoder_heads (int): Number of decoder attention heads

419

d_model (int): Model dimension

420

vocab_size (int): Vocabulary size

421

"""

422

423

class WhisperConfig(ModelConfig):

424

"""Configuration for Whisper models."""

425

426

def __init__(self, suppress_ids: list = None, suppress_ids_begin: list = None,

427

lang_ids: dict = None, alignment_heads: list = None):

428

"""

429

Initialize Whisper configuration.

430

431

Args:

432

suppress_ids (list): Token IDs to suppress during generation

433

suppress_ids_begin (list): Token IDs to suppress at beginning

434

lang_ids (dict): Language ID mappings

435

alignment_heads (list): Attention heads for alignment

436

"""

437

438

class Wav2Vec2Spec(ModelSpec):

439

"""Specification for Wav2Vec2 models."""

440

441

def __init__(self, feat_layers: list, num_layers: int, num_heads: int,

442

vocab_size: int, return_hidden: bool = False):

443

"""

444

Initialize Wav2Vec2 specification.

445

446

Args:

447

feat_layers (list): Feature extraction layer configuration

448

num_layers (int): Number of transformer layers

449

num_heads (int): Number of attention heads

450

vocab_size (int): Vocabulary size

451

return_hidden (bool): Whether to return hidden states

452

"""

453

454

class Wav2Vec2BertSpec(ModelSpec):

455

"""Specification for Wav2Vec2-BERT models."""

456

457

def __init__(self, num_hidden_layers: int, num_adapter_layers: int,

458

vocab_size: int, return_hidden: bool = False):

459

"""

460

Initialize Wav2Vec2-BERT specification.

461

462

Args:

463

num_hidden_layers (int): Number of hidden layers

464

num_adapter_layers (int): Number of adapter layers

465

vocab_size (int): Vocabulary size

466

return_hidden (bool): Whether to return hidden states

467

"""

468

```

469

470

## Usage Examples

471

472

### Building a Custom Transformer

473

474

```python

475

import ctranslate2.specs as specs

476

477

# Create encoder specification

478

encoder_spec = specs.TransformerEncoderSpec(

479

num_layers=6,

480

num_heads=8,

481

pre_norm=True,

482

activation="gelu",

483

ffn_glu=True

484

)

485

486

# Create decoder specification

487

decoder_spec = specs.TransformerDecoderSpec(

488

num_layers=6,

489

num_heads=8,

490

pre_norm=True,

491

activation="gelu",

492

with_encoder_attention=True,

493

ffn_glu=True

494

)

495

496

# Create full transformer specification

497

transformer_spec = specs.TransformerSpec(encoder_spec, decoder_spec)

498

499

# Register vocabularies

500

source_vocab = ["<unk>", "<s>", "</s>"] + ["token_" + str(i) for i in range(1000)]

501

target_vocab = ["<unk>", "<s>", "</s>"] + ["token_" + str(i) for i in range(1000)]

502

503

transformer_spec.register_source_vocabulary(source_vocab)

504

transformer_spec.register_target_vocabulary(target_vocab)

505

506

# Save the model

507

transformer_spec.save("custom_transformer_model")

508

```

509

510

### Building a Language Model

511

512

```python

513

import ctranslate2.specs as specs

514

515

# Create decoder-only model (GPT-style)

516

decoder_spec = specs.TransformerDecoderSpec(

517

num_layers=12,

518

num_heads=12,

519

pre_norm=True,

520

activation="gelu",

521

with_encoder_attention=False, # No encoder for language models

522

rotary_dim=64, # Use rotary position embeddings

523

parallel_residual=True

524

)

525

526

# Create language model specification

527

lm_spec = specs.TransformerDecoderModelSpec(decoder_spec)

528

529

# Register vocabulary

530

vocab = ["<unk>", "<s>", "</s>"] + ["token_" + str(i) for i in range(50000)]

531

lm_spec.register_vocabulary(vocab)

532

533

# Configure model

534

config = specs.LanguageModelConfig(

535

unk_token="<unk>",

536

bos_token="<s>",

537

eos_token="</s>"

538

)

539

540

# Save the model

541

lm_spec.save("custom_language_model")

542

```

543

544

### Using Factory Methods

545

546

```python

547

import ctranslate2.specs as specs

548

549

# Create transformer using factory method

550

transformer_spec = specs.TransformerSpec.from_config(

551

num_layers=6,

552

num_heads=8,

553

d_model=512,

554

d_ff=2048,

555

activation="gelu",

556

pre_norm=True

557

)

558

559

# Create decoder-only model using factory method

560

decoder_spec = specs.TransformerDecoderModelSpec.from_config(

561

num_layers=12,

562

num_heads=12,

563

d_model=768,

564

vocab_size=50257,

565

activation="gelu"

566

)

567

```

568

569

## Types

570

571

```python { .api }

572

# Enumerations for specifications

573

class Activation:

574

RELU: str = "relu"

575

GELU: str = "gelu"

576

SWISH: str = "swish"

577

SILU: str = "silu"

578

TANH: str = "tanh"

579

SIGMOID: str = "sigmoid"

580

581

class EmbeddingsMerge:

582

CONCAT: str = "concat"

583

ADD: str = "add"

584

585

class RotaryScalingType:

586

LINEAR: str = "linear"

587

SU: str = "su"

588

LLAMA3: str = "llama3"

589

590

class Quantization:

591

CT2: str = "ct2"

592

AWQ_GEMM: str = "awq_gemm"

593

AWQ_GEMV: str = "awq_gemv"

594

```