Tessl Tile for pypi/setfit@1.1.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

absa.md core-model-training.md data-utilities.md index.md knowledge-distillation.md model-cards.md model-export.md

model-export.mddocs/

0
# Model Export
1

2
Export functionality for ONNX and OpenVINO formats to enable efficient deployment and inference. These utilities allow SetFit models to be deployed in production environments with optimized inference engines.
3

4
**Note**: Export functions are available in submodules and require explicit imports:
5
- ONNX functions: `from setfit.exporters.onnx import ...`
6
- OpenVINO functions: `from setfit.exporters.openvino import ...`
7
- Utility functions: `from setfit.exporters.utils import ...`
8

9
## Capabilities
10

11
### ONNX Export
12

13
Export SetFit models to ONNX format for cross-platform deployment and hardware optimization.
14

15
```python { .api }
16
def export_onnx(
17
    model_body: SentenceTransformer,
18
    model_head: Union[torch.nn.Module, LogisticRegression],
19
    opset: int,
20
    output_path: str = "model.onnx",
21
    ignore_ir_version: bool = True,
22
    use_hummingbird: bool = False
23
) -> None:
24
    """
25
    Export SetFit model to ONNX IR format.
26

27
    Parameters:
28
    - model_body: Sentence transformer body to export
29
    - model_head: Classification head (PyTorch module or sklearn)
30
    - opset: ONNX opset version to use (required)
31
    - output_path: Path to save the ONNX model
32
    - ignore_ir_version: Whether to ignore IR version warnings
33
    - use_hummingbird: Whether to use Hummingbird ML for sklearn conversion
34

35
    Returns:
36
    None (saves model to output_path)
37
    """
38

39
def export_onnx_setfit_model(
40
    setfit_model: "OnnxSetFitModel",
41
    inputs: Dict[str, torch.Tensor],
42
    output_path: str,
43
    opset: int = 12
44
) -> None:
45
    """
46
    Export SetFit model wrapper to ONNX format.
47

48
    Parameters:
49
    - setfit_model: ONNX-compatible SetFit model wrapper
50
    - inputs: Sample inputs for tracing the model
51
    - output_path: Path to save the ONNX model
52
    - opset: ONNX opset version
53
    """
54

55
def export_sklearn_head_to_onnx(
56
    model_head: LogisticRegression,
57
    opset: int = 11
58
) -> "onnx.onnx_ml_pb2.ModelProto":
59
    """
60
    Convert sklearn classification head to ONNX format.
61

62
    Parameters:
63
    - model_head: Trained sklearn LogisticRegression model
64
    - opset: ONNX opset version
65

66
    Returns:
67
    ONNX model proto for the sklearn head
68
    """
69
```
70

71
### OpenVINO Export
72

73
Export SetFit models to OpenVINO IR format for Intel hardware optimization.
74

75
```python { .api }
76
def export_to_openvino(
77
    model: SetFitModel,
78
    output_path: str = "model.xml"
79
) -> None:
80
    """
81
    Export SetFit model to OpenVINO IR format.
82

83
    Parameters:
84
    - model: Trained SetFit model to export
85
    - output_path: Path to save the OpenVINO IR file (default: "model.xml")
86

87
    Returns:
88
    None (saves model to output_path)
89
    """
90

91
def hummingbird_export(
92
    model: Union[LogisticRegression, SetFitModel],
93
    data_sample: Union[np.ndarray, torch.Tensor]
94
) -> torch.jit.ScriptModule:
95
    """
96
    Export model using Hummingbird ML for PyTorch conversion.
97

98
    Parameters:
99
    - model: Model to export (sklearn or SetFit)
100
    - data_sample: Sample data for tracing
101

102
    Returns:
103
    TorchScript model ready for deployment
104
    """
105
```
106

107
### ONNX Model Wrapper
108

109
Wrapper class that prepares SetFit models for ONNX export with proper input/output handling.
110

111
```python { .api }
112
class OnnxSetFitModel:
113
    def __init__(
114
        self,
115
        model_body: "PreTrainedModel",
116
        pooler: Optional[Union[torch.nn.Module, Callable]] = None,
117
        model_head: Optional[Union[torch.nn.Module, LogisticRegression]] = None
118
    ):
119
        """
120
        ONNX export wrapper for SetFit models.
121

122
        Parameters:
123
        - model_body: Pre-trained transformer model body
124
        - pooler: Pooling function/module for embeddings
125
        - model_head: Classification head for predictions
126
        """
127

128
    def forward(
129
        self,
130
        input_ids: torch.Tensor,
131
        attention_mask: torch.Tensor
132
    ) -> torch.Tensor:
133
        """
134
        Forward pass through the model for ONNX export.
135

136
        Parameters:
137
        - input_ids: Token IDs from tokenizer
138
        - attention_mask: Attention mask for input tokens
139

140
        Returns:
141
        Model predictions or embeddings
142
        """
143
```
144

145
### Utility Functions
146

147
Helper functions for model export and optimization.
148

149
```python { .api }
150
def mean_pooling(
151
    token_embeddings: torch.Tensor,
152
    attention_mask: torch.Tensor
153
) -> torch.Tensor:
154
    """
155
    Perform attention-aware mean pooling on token embeddings.
156

157
    Parameters:
158
    - token_embeddings: Token-level embeddings [batch, seq_len, hidden_size]
159
    - attention_mask: Attention mask [batch, seq_len]
160

161
    Returns:
162
    Pooled sentence embeddings [batch, hidden_size]
163
    """
164
```
165

166
## Usage Examples
167

168
### Basic ONNX Export
169

170
```python
171
from setfit import SetFitModel
172
from setfit.exporters.onnx import export_onnx
173
from transformers import AutoTokenizer
174
import torch
175

176
# Load trained SetFit model
177
model = SetFitModel.from_pretrained("path/to/your/trained/model")
178

179
# Export to ONNX
180
onnx_path = export_onnx(
181
    model_body=model.model_body,
182
    model_head=model.model_head,
183
    output_path="./setfit_model.onnx",
184
    opset=11,
185
    device="cpu"
186
)
187

188
print(f"Model exported to: {onnx_path}")
189

190
# Verify ONNX model
191
import onnxruntime as ort
192

193
# Create ONNX runtime session
194
ort_session = ort.InferenceSession(onnx_path)
195

196
# Prepare sample input
197
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
198
sample_text = "This is a test sentence."
199
inputs = tokenizer(
200
    sample_text,
201
    return_tensors="np",
202
    padding=True,
203
    truncation=True,
204
    max_length=512
205
)
206

207
# Run inference
208
onnx_outputs = ort_session.run(
209
    None,
210
    {
211
        "input_ids": inputs["input_ids"],
212
        "attention_mask": inputs["attention_mask"]
213
    }
214
)
215

216
print(f"ONNX output shape: {onnx_outputs[0].shape}")
217
print(f"ONNX predictions: {onnx_outputs[0]}")
218
```
219

220
### Advanced ONNX Export with Quantization
221

222
```python
223
from setfit import SetFitModel
224
from setfit.exporters.onnx import OnnxSetFitModel, export_onnx_setfit_model
225
import torch
226
from transformers import AutoTokenizer, AutoModel
227

228
# Load model components
229
model = SetFitModel.from_pretrained("your-model")
230
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
231

232
# Create ONNX-compatible wrapper
233
onnx_model = OnnxSetFitModel(
234
    model_body=model.model_body[0].auto_model,  # Get the transformer
235
    model_head=model.model_head
236
)
237

238
# Prepare sample inputs for tracing
239
sample_inputs = tokenizer(
240
    "Sample text for tracing",
241
    return_tensors="pt",
242
    padding=True,
243
    truncation=True,
244
    max_length=512
245
)
246

247
# Export with dynamic shapes
248
torch.onnx.export(
249
    onnx_model,
250
    (sample_inputs["input_ids"], sample_inputs["attention_mask"]),
251
    "./setfit_dynamic.onnx",
252
    export_params=True,
253
    opset_version=11,
254
    do_constant_folding=True,
255
    input_names=["input_ids", "attention_mask"],
256
    output_names=["predictions"],
257
    dynamic_axes={
258
        "input_ids": {0: "batch_size", 1: "sequence_length"},
259
        "attention_mask": {0: "batch_size", 1: "sequence_length"},
260
        "predictions": {0: "batch_size"}
261
    }
262
)
263

264
# Apply post-export quantization
265
from onnxruntime.quantization import quantize_dynamic, QuantType
266

267
quantize_dynamic(
268
    "./setfit_dynamic.onnx",
269
    "./setfit_quantized.onnx",
270
    weight_type=QuantType.QUInt8
271
)
272

273
print("Quantized ONNX model saved to: ./setfit_quantized.onnx")
274
```
275

276
### OpenVINO Export for Intel Hardware
277

278
```python
279
from setfit import SetFitModel
280
from setfit.exporters.openvino import export_to_openvino
281
import openvino as ov
282

283
# Load trained model
284
model = SetFitModel.from_pretrained("your-trained-setfit-model")
285

286
# Export to OpenVINO IR
287
ir_path = export_to_openvino(
288
    model=model,
289
    output_path="./setfit_openvino",
290
    precision="FP16",  # Use half precision for speed
291
    optimize=True      # Apply OpenVINO optimizations
292
)
293

294
print(f"OpenVINO IR exported to: {ir_path}")
295

296
# Load and use OpenVINO model
297
core = ov.Core()
298
compiled_model = core.compile_model(f"{ir_path}/model.xml", "CPU")
299

300
# Prepare input
301
from transformers import AutoTokenizer
302
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
303

304
texts = ["This is amazing!", "This is terrible."]
305
inputs = tokenizer(
306
    texts,
307
    return_tensors="np",
308
    padding=True,
309
    truncation=True,
310
    max_length=512
311
)
312

313
# Run inference
314
infer_request = compiled_model.create_infer_request()
315
infer_request.infer({
316
    "input_ids": inputs["input_ids"],
317
    "attention_mask": inputs["attention_mask"]
318
})
319

320
predictions = infer_request.get_output_tensor().data
321
print(f"OpenVINO predictions: {predictions}")
322
```
323

324
### TorchScript Export with Hummingbird
325

326
```python
327
from setfit import SetFitModel
328
from setfit.exporters.utils import hummingbird_export
329
import torch
330
import numpy as np
331

332
# Load model
333
model = SetFitModel.from_pretrained("your-model")
334

335
# If using sklearn head, convert with Hummingbird
336
if hasattr(model.model_head, 'predict'):  # sklearn model
337
    # Create sample data for tracing
338
    sample_embeddings = np.random.randn(10, 384).astype(np.float32)
339
    
340
    # Export sklearn head to PyTorch
341
    torch_head = hummingbird_export(
342
        model=model.model_head,
343
        data_sample=sample_embeddings
344
    )
345
    
346
    print("Sklearn head converted to TorchScript")
347
else:
348
    # Already a PyTorch head
349
    torch_head = torch.jit.script(model.model_head)
350

351
# Create complete TorchScript model
352
class TorchScriptSetFit(torch.nn.Module):
353
    def __init__(self, sentence_transformer, classification_head):
354
        super().__init__()
355
        self.sentence_transformer = sentence_transformer
356
        self.classification_head = classification_head
357
    
358
    def forward(self, input_ids, attention_mask):
359
        # Get embeddings
360
        outputs = self.sentence_transformer(
361
            input_ids=input_ids,
362
            attention_mask=attention_mask
363
        )
364
        embeddings = outputs.last_hidden_state.mean(dim=1)  # Mean pooling
365
        
366
        # Classify
367
        predictions = self.classification_head(embeddings)
368
        return predictions
369

370
# Create scriptable model
371
scriptable_model = TorchScriptSetFit(
372
    sentence_transformer=model.model_body[0].auto_model,
373
    classification_head=torch_head
374
)
375

376
# Convert to TorchScript
377
traced_model = torch.jit.trace(
378
    scriptable_model,
379
    (sample_inputs["input_ids"], sample_inputs["attention_mask"])
380
)
381

382
# Save TorchScript model
383
traced_model.save("./setfit_torchscript.pt")
384
print("TorchScript model saved")
385

386
# Load and use
387
loaded_model = torch.jit.load("./setfit_torchscript.pt")
388
loaded_model.eval()
389

390
with torch.no_grad():
391
    ts_predictions = loaded_model(
392
        sample_inputs["input_ids"],
393
        sample_inputs["attention_mask"]
394
    )
395

396
print(f"TorchScript predictions: {ts_predictions}")
397
```
398

399
### Deployment Performance Comparison
400

401
```python
402
import time
403
import numpy as np
404
from setfit import SetFitModel
405
import onnxruntime as ort
406
import openvino as ov
407
import torch
408

409
def benchmark_models(texts, num_runs=100):
410
    """Compare inference speed across different export formats."""
411
    
412
    # Original PyTorch model
413
    pytorch_model = SetFitModel.from_pretrained("your-model")
414
    
415
    # ONNX model
416
    ort_session = ort.InferenceSession("./setfit_model.onnx")
417
    
418
    # OpenVINO model
419
    core = ov.Core()
420
    ov_model = core.compile_model("./setfit_openvino/model.xml", "CPU")
421
    
422
    # TorchScript model
423
    ts_model = torch.jit.load("./setfit_torchscript.pt")
424
    ts_model.eval()
425
    
426
    # Prepare inputs
427
    from transformers import AutoTokenizer
428
    tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
429
    
430
    inputs = tokenizer(
431
        texts,
432
        return_tensors="pt",
433
        padding=True,
434
        truncation=True,
435
        max_length=512
436
    )
437
    
438
    np_inputs = {
439
        "input_ids": inputs["input_ids"].numpy(),
440
        "attention_mask": inputs["attention_mask"].numpy()
441
    }
442
    
443
    results = {}
444
    
445
    # Benchmark PyTorch
446
    start_time = time.time()
447
    for _ in range(num_runs):
448
        with torch.no_grad():
449
            _ = pytorch_model.predict(texts)
450
    pytorch_time = (time.time() - start_time) / num_runs
451
    results["PyTorch"] = pytorch_time
452
    
453
    # Benchmark ONNX
454
    start_time = time.time()
455
    for _ in range(num_runs):
456
        _ = ort_session.run(None, np_inputs)
457
    onnx_time = (time.time() - start_time) / num_runs
458
    results["ONNX"] = onnx_time
459
    
460
    # Benchmark OpenVINO
461
    infer_request = ov_model.create_infer_request()
462
    start_time = time.time()
463
    for _ in range(num_runs):
464
        infer_request.infer(np_inputs)
465
        _ = infer_request.get_output_tensor().data
466
    openvino_time = (time.time() - start_time) / num_runs
467
    results["OpenVINO"] = openvino_time
468
    
469
    # Benchmark TorchScript
470
    start_time = time.time()
471
    for _ in range(num_runs):
472
        with torch.no_grad():
473
            _ = ts_model(inputs["input_ids"], inputs["attention_mask"])
474
    torchscript_time = (time.time() - start_time) / num_runs
475
    results["TorchScript"] = torchscript_time
476
    
477
    return results
478

479
# Run benchmark
480
test_texts = [
481
    "This product is amazing!",
482
    "I'm not satisfied with this purchase.",
483
    "Great value for money.",
484
    "Poor quality, would not recommend."
485
]
486

487
benchmark_results = benchmark_models(test_texts, num_runs=50)
488

489
print("Inference Speed Comparison (average per batch):")
490
baseline_time = benchmark_results["PyTorch"]
491

492
for model_type, avg_time in benchmark_results.items():
493
    speedup = baseline_time / avg_time
494
    print(f"{model_type:12}: {avg_time:.4f}s ({speedup:.1f}x speedup)")
495
```
496

497
### Production Deployment Example
498

499
```python
500
from fastapi import FastAPI
501
import onnxruntime as ort
502
from typing import List
503
from pydantic import BaseModel
504
import numpy as np
505
from transformers import AutoTokenizer
506

507
# Initialize FastAPI app
508
app = FastAPI(title="SetFit ONNX Inference API")
509

510
# Load ONNX model and tokenizer
511
ort_session = ort.InferenceSession("./setfit_model.onnx")
512
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
513

514
# Define request/response models
515
class PredictionRequest(BaseModel):
516
    texts: List[str]
517
    max_length: int = 512
518

519
class PredictionResponse(BaseModel):
520
    predictions: List[int]
521
    probabilities: List[List[float]]
522
    processing_time: float
523

524
@app.post("/predict", response_model=PredictionResponse)
525
async def predict(request: PredictionRequest):
526
    import time
527
    start_time = time.time()
528
    
529
    # Tokenize inputs
530
    inputs = tokenizer(
531
        request.texts,
532
        return_tensors="np",
533
        padding=True,
534
        truncation=True,
535
        max_length=request.max_length
536
    )
537
    
538
    # Run ONNX inference
539
    outputs = ort_session.run(
540
        None,
541
        {
542
            "input_ids": inputs["input_ids"],
543
            "attention_mask": inputs["attention_mask"]
544
        }
545
    )
546
    
547
    # Process outputs
548
    logits = outputs[0]
549
    probabilities = softmax(logits, axis=1).tolist()
550
    predictions = np.argmax(logits, axis=1).tolist()
551
    
552
    processing_time = time.time() - start_time
553
    
554
    return PredictionResponse(
555
        predictions=predictions,
556
        probabilities=probabilities,
557
        processing_time=processing_time
558
    )
559

560
def softmax(x, axis=None):
561
    """Compute softmax values."""
562
    exp_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
563
    return exp_x / np.sum(exp_x, axis=axis, keepdims=True)
564

565
@app.get("/health")
566
async def health():
567
    return {"status": "healthy", "model": "setfit-onnx"}
568

569
if __name__ == "__main__":
570
    import uvicorn
571
    uvicorn.run(app, host="0.0.0.0", port=8000)
572
```

Version

Tile

Files

model-export.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

model-export.mddocs/