Tessl Tile for pypi/transformers@4.56.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

feature-extraction.md generation.md index.md models.md optimization.md pipelines.md tokenization.md training.md

models.mddocs/

0
# Models
1

2
Comprehensive model management with automatic selection and loading for 350+ architectures. The model system provides consistent APIs across text, vision, audio, and multimodal domains while supporting PyTorch, TensorFlow, and JAX frameworks.
3

4
## Capabilities
5

6
### Auto Model Classes
7

8
Automatic model selection based on model names or configurations, eliminating the need to know specific architecture classes.
9

10
```python { .api }
11
class AutoModel:
12
    @classmethod
13
    def from_pretrained(
14
        cls,
15
        pretrained_model_name_or_path: Union[str, os.PathLike],
16
        *model_args,
17
        config: PretrainedConfig = None,
18
        cache_dir: Union[str, os.PathLike] = None,
19
        ignore_mismatched_sizes: bool = False,
20
        force_download: bool = False,
21
        local_files_only: bool = False,
22
        token: Union[bool, str] = None,
23
        revision: str = "main",
24
        use_safetensors: bool = None,
25
        **kwargs
26
    ) -> PreTrainedModel:
27
        """
28
        Load a pretrained model automatically detecting the architecture.
29
        
30
        Args:
31
            pretrained_model_name_or_path: Model name or local path
32
            config: Model configuration (auto-detected if None)
33
            cache_dir: Custom cache directory
34
            ignore_mismatched_sizes: Ignore size mismatches when loading
35
            force_download: Force fresh download
36
            local_files_only: Only use local files
37
            token: Hugging Face authentication token
38
            revision: Model revision/branch
39
            use_safetensors: Use safetensors format when available
40
        
41
        Returns:
42
            Loaded model instance
43
        """
44
```
45

46
### Task-Specific Auto Models
47

48
Pre-configured models for common tasks with appropriate heads and loss functions.
49

50
```python { .api }
51
class AutoModelForSequenceClassification:
52
    @classmethod
53
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs) -> PreTrainedModel:
54
        """Load model for sequence classification tasks."""
55

56
class AutoModelForTokenClassification:
57
    @classmethod 
58
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs) -> PreTrainedModel:
59
        """Load model for token classification (NER, POS tagging)."""
60

61
class AutoModelForQuestionAnswering:
62
    @classmethod
63
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs) -> PreTrainedModel:
64
        """Load model for extractive question answering."""
65

66
class AutoModelForMaskedLM:
67
    @classmethod
68
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs) -> PreTrainedModel:
69
        """Load model for masked language modeling."""
70

71
class AutoModelForCausalLM:
72
    @classmethod
73
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs) -> PreTrainedModel:
74
        """Load model for causal language modeling (text generation)."""
75

76
class AutoModelForSeq2SeqLM:
77
    @classmethod
78
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs) -> PreTrainedModel:
79
        """Load model for sequence-to-sequence tasks."""
80

81
class AutoModelForImageClassification:
82
    @classmethod
83
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs) -> PreTrainedModel:
84
        """Load model for image classification."""
85

86
class AutoModelForObjectDetection:
87
    @classmethod
88
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs) -> PreTrainedModel:
89
        """Load model for object detection."""
90
```
91

92
Usage examples:
93
```python
94
# Text classification
95
model = AutoModelForSequenceClassification.from_pretrained(
96
    "bert-base-uncased",
97
    num_labels=3
98
)
99

100
# Text generation  
101
model = AutoModelForCausalLM.from_pretrained("gpt2")
102

103
# Image classification
104
model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224")
105
```
106

107
### Base Model Classes
108

109
Foundation classes that all specific model implementations inherit from.
110

111
```python { .api }
112
class PreTrainedModel:
113
    """Base class for all PyTorch models."""
114
    
115
    def __init__(self, config: PretrainedConfig, *inputs, **kwargs)
116
    
117
    @classmethod
118
    def from_pretrained(
119
        cls,
120
        pretrained_model_name_or_path: Union[str, os.PathLike],
121
        **kwargs
122
    ) -> 'PreTrainedModel':
123
        """Load pretrained model weights and configuration."""
124
    
125
    def save_pretrained(
126
        self,
127
        save_directory: Union[str, os.PathLike],
128
        is_main_process: bool = True,
129
        state_dict: Dict[str, torch.Tensor] = None,
130
        save_function: Callable = None,
131
        push_to_hub: bool = False,
132
        max_shard_size: Union[int, str] = "5GB",
133
        safe_serialization: bool = True,
134
        **kwargs
135
    ) -> None:
136
        """Save model weights and configuration."""
137
    
138
    def push_to_hub(
139
        self,
140
        repo_id: str,
141
        use_temp_dir: bool = None,
142
        commit_message: str = None,
143
        private: bool = None,
144
        token: Union[bool, str] = None,
145
        **kwargs
146
    ) -> str:
147
        """Upload model to Hugging Face Hub."""
148
    
149
    def forward(self, **kwargs) -> Union[torch.Tensor, ModelOutput]:
150
        """Forward pass through the model."""
151
    
152
    def generate(self, **kwargs) -> torch.Tensor:
153
        """Generate sequences (available on generative models)."""
154
    
155
    def resize_token_embeddings(
156
        self, 
157
        new_num_tokens: int = None
158
    ) -> torch.nn.Embedding:
159
        """Resize input token embeddings matrix."""
160
    
161
    def get_input_embeddings(self) -> torch.nn.Module:
162
        """Get input embeddings layer."""
163
    
164
    def set_input_embeddings(self, value: torch.nn.Module) -> None:
165
        """Set input embeddings layer."""
166
    
167
    def tie_weights(self) -> None:
168
        """Tie input and output embeddings if specified in config."""
169
    
170
    def gradient_checkpointing_enable(self) -> None:
171
        """Enable gradient checkpointing for training."""
172
    
173
    def gradient_checkpointing_disable(self) -> None:
174
        """Disable gradient checkpointing."""
175
```
176

177
### TensorFlow Models
178

179
TensorFlow implementations of all model architectures with Keras compatibility.
180

181
```python { .api }
182
class TFPreTrainedModel:
183
    """Base class for all TensorFlow models."""
184
    
185
    @classmethod
186
    def from_pretrained(
187
        cls,
188
        pretrained_model_name_or_path: Union[str, os.PathLike],
189
        **kwargs
190
    ) -> 'TFPreTrainedModel':
191
        """Load pretrained TensorFlow model."""
192
    
193
    def save_pretrained(
194
        self,
195
        save_directory: Union[str, os.PathLike],
196
        **kwargs
197
    ) -> None:
198
        """Save TensorFlow model."""
199
    
200
    def call(self, **kwargs) -> Union[tf.Tensor, TFModelOutput]:
201
        """Forward pass through TensorFlow model."""
202

203
# Task-specific TF models
204
class TFAutoModel:
205
    @classmethod
206
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs) -> TFPreTrainedModel
207

208
class TFAutoModelForSequenceClassification:
209
    @classmethod
210
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs) -> TFPreTrainedModel
211

212
class TFAutoModelForCausalLM:
213
    @classmethod  
214
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs) -> TFPreTrainedModel
215
```
216

217
### Flax/JAX Models
218

219
JAX implementations with Flax for high-performance training and inference.
220

221
```python { .api }
222
class FlaxPreTrainedModel:
223
    """Base class for all Flax/JAX models."""
224
    
225
    @classmethod
226
    def from_pretrained(
227
        cls,
228
        pretrained_model_name_or_path: Union[str, os.PathLike],
229
        **kwargs
230
    ) -> 'FlaxPreTrainedModel':
231
        """Load pretrained Flax model."""
232
    
233
    def save_pretrained(
234
        self,
235
        save_directory: Union[str, os.PathLike],
236
        **kwargs
237
    ) -> None:
238
        """Save Flax model."""
239
    
240
    def __call__(self, **kwargs) -> Union[jnp.ndarray, FlaxModelOutput]:
241
        """Forward pass through Flax model."""
242

243
# Task-specific Flax models  
244
class FlaxAutoModel:
245
    @classmethod
246
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs) -> FlaxPreTrainedModel
247

248
class FlaxAutoModelForCausalLM:
249
    @classmethod
250
    def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs) -> FlaxPreTrainedModel
251
```
252

253
### Model Configurations
254

255
Configuration classes that define model architectures and hyperparameters.
256

257
```python { .api }
258
class AutoConfig:
259
    @classmethod
260
    def from_pretrained(
261
        cls,
262
        pretrained_model_name_or_path: Union[str, os.PathLike],
263
        **kwargs
264
    ) -> PretrainedConfig:
265
        """Load model configuration automatically."""
266

267
class PretrainedConfig:
268
    """Base configuration class for all models."""
269
    
270
    def __init__(self, **kwargs)
271
    
272
    @classmethod
273
    def from_pretrained(
274
        cls,
275
        pretrained_model_name_or_path: Union[str, os.PathLike],
276
        **kwargs
277
    ) -> 'PretrainedConfig':
278
        """Load configuration from pretrained model."""
279
    
280
    def save_pretrained(
281
        self,
282
        save_directory: Union[str, os.PathLike],
283
        push_to_hub: bool = False,
284
        **kwargs
285
    ) -> None:
286
        """Save configuration to directory."""
287
    
288
    def to_dict(self) -> Dict[str, Any]:
289
        """Convert configuration to dictionary."""
290
    
291
    def to_json_file(self, json_file_path: Union[str, os.PathLike]) -> None:
292
        """Save configuration to JSON file."""
293
```
294

295
### Popular Model Architectures
296

297
#### BERT Family
298
```python { .api }
299
class BertModel(PreTrainedModel):
300
    """BERT model for encoding tasks."""
301

302
class BertForSequenceClassification(PreTrainedModel):
303
    """BERT model with sequence classification head."""
304

305
class BertForTokenClassification(PreTrainedModel):
306
    """BERT model with token classification head."""
307

308
class BertForQuestionAnswering(PreTrainedModel):
309
    """BERT model with question answering head."""
310

311
class BertForMaskedLM(PreTrainedModel):
312
    """BERT model with masked language modeling head."""
313
```
314

315
#### GPT Family  
316
```python { .api }
317
class GPT2Model(PreTrainedModel):
318
    """GPT-2 model for generation tasks."""
319

320
class GPT2LMHeadModel(PreTrainedModel):
321
    """GPT-2 model with language modeling head."""
322

323
class GPTNeoModel(PreTrainedModel):
324
    """GPT-Neo model architecture."""
325

326
class GPTNeoXModel(PreTrainedModel):
327
    """GPT-NeoX model architecture."""
328

329
class GPTJModel(PreTrainedModel):
330
    """GPT-J model architecture."""
331
```
332

333
#### T5 Family
334
```python { .api }
335
class T5Model(PreTrainedModel):
336
    """T5 encoder-decoder model."""
337

338
class T5ForConditionalGeneration(PreTrainedModel):
339
    """T5 model with conditional generation head."""
340

341
class T5EncoderModel(PreTrainedModel):
342
    """T5 encoder-only model."""
343
```
344

345
#### Vision Models
346
```python { .api }
347
class ViTModel(PreTrainedModel):
348
    """Vision Transformer model."""
349

350
class ViTForImageClassification(PreTrainedModel):
351
    """ViT model with image classification head."""
352

353
class DetrModel(PreTrainedModel):
354
    """DETR object detection model."""
355

356
class DetrForObjectDetection(PreTrainedModel):
357
    """DETR model with object detection head."""
358
```
359

360
#### Multimodal Models
361
```python { .api }
362
class CLIPModel(PreTrainedModel):
363
    """CLIP vision-language model."""
364

365
class CLIPTextModel(PreTrainedModel):
366
    """CLIP text encoder."""
367

368
class CLIPVisionModel(PreTrainedModel):
369
    """CLIP vision encoder."""
370

371
class BlipModel(PreTrainedModel):
372
    """BLIP multimodal model."""
373

374
class BlipForConditionalGeneration(PreTrainedModel):
375
    """BLIP model with conditional generation."""
376
```
377

378
## Model Output Types
379

380
Standard output formats for different model types:
381

382
```python { .api }
383
class BaseModelOutput:
384
    """Base output type for encoder models."""
385
    last_hidden_state: torch.Tensor
386
    hidden_states: Optional[Tuple[torch.Tensor]] = None
387
    attentions: Optional[Tuple[torch.Tensor]] = None
388

389
class BaseModelOutputWithPooling:
390
    """Base output with pooling for classification models."""
391
    last_hidden_state: torch.Tensor
392
    pooler_output: torch.Tensor
393
    hidden_states: Optional[Tuple[torch.Tensor]] = None
394
    attentions: Optional[Tuple[torch.Tensor]] = None
395

396
class CausalLMOutput:
397
    """Output for causal language models."""
398
    loss: Optional[torch.Tensor] = None
399
    logits: torch.Tensor
400
    hidden_states: Optional[Tuple[torch.Tensor]] = None
401
    attentions: Optional[Tuple[torch.Tensor]] = None
402

403
class SequenceClassifierOutput:
404
    """Output for sequence classification models."""
405
    loss: Optional[torch.Tensor] = None
406
    logits: torch.Tensor
407
    hidden_states: Optional[Tuple[torch.Tensor]] = None
408
    attentions: Optional[Tuple[torch.Tensor]] = None
409

410
class TokenClassifierOutput:
411
    """Output for token classification models."""
412
    loss: Optional[torch.Tensor] = None
413
    logits: torch.Tensor  
414
    hidden_states: Optional[Tuple[torch.Tensor]] = None
415
    attentions: Optional[Tuple[torch.Tensor]] = None
416

417
class QuestionAnsweringModelOutput:
418
    """Output for question answering models."""
419
    loss: Optional[torch.Tensor] = None
420
    start_logits: torch.Tensor
421
    end_logits: torch.Tensor
422
    hidden_states: Optional[Tuple[torch.Tensor]] = None
423
    attentions: Optional[Tuple[torch.Tensor]] = None
424
```
425

426
## Loading and Saving Patterns
427

428
Common patterns for working with models:
429

430
```python
431
# Load model with custom configuration
432
config = AutoConfig.from_pretrained("bert-base-uncased")
433
config.num_labels = 3
434
model = AutoModelForSequenceClassification.from_pretrained(
435
    "bert-base-uncased", 
436
    config=config
437
)
438

439
# Load model with custom dtype and device
440
model = AutoModelForCausalLM.from_pretrained(
441
    "gpt2",
442
    torch_dtype=torch.float16,
443
    device_map="auto"
444
)
445

446
# Save model locally
447
model.save_pretrained("./my-model")
448

449
# Upload to Hub
450
model.push_to_hub("username/my-model", private=True)
451

452
# Load from local directory
453
model = AutoModel.from_pretrained("./my-model")
454
```

Version

Tile

Files

models.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

models.mddocs/