0
# Export and Deployment
1
2
Export trained models to various formats for deployment including ONNX, TensorRT, CoreML, TensorFlow, OpenVINO, and mobile formats with optimization options.
3
4
## Capabilities
5
6
### Model Export
7
8
Export models to different formats for deployment across various platforms and frameworks.
9
10
```python { .api }
11
def export(self, format='torchscript', imgsz=640, keras=False, optimize=False, **kwargs) -> str:
12
"""
13
Export model to various formats for deployment.
14
15
Parameters:
16
- format (str): Export format ('torchscript', 'onnx', 'openvino', 'engine',
17
'coreml', 'saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs', 'paddle')
18
- imgsz (int | List[int]): Image size for export (default: 640)
19
- keras (bool): Use Keras for TensorFlow exports (default: False)
20
- optimize (bool): TorchScript optimization (default: False)
21
- half (bool): FP16 quantization (default: False)
22
- dynamic (bool): ONNX/TensorRT dynamic axes (default: False)
23
- simplify (bool): ONNX simplification (default: False)
24
- opset (int): ONNX opset version (default: None)
25
- workspace (int): TensorRT workspace size in GB (default: 4)
26
- nms (bool): Add NMS module to model (default: False)
27
- lr (float): CoreML learning rate (default: 0.01)
28
- batch_size (int): Batch size for export (default: 1)
29
- device (str): Device to export from ('cpu', '0', etc.)
30
- verbose (bool): Verbose output (default: False)
31
32
Returns:
33
str: Path to exported model
34
"""
35
```
36
37
**Supported Export Formats:**
38
39
| Format | Description | Platform | Extension |
40
|--------|-------------|----------|-----------|
41
| `torchscript` | TorchScript | PyTorch | `.torchscript` |
42
| `onnx` | ONNX | Multi-platform | `.onnx` |
43
| `openvino` | OpenVINO | Intel | `_openvino_model/` |
44
| `engine` | TensorRT | NVIDIA | `.engine` |
45
| `coreml` | CoreML | Apple | `.mlpackage` |
46
| `saved_model` | TensorFlow SavedModel | TensorFlow | `_saved_model/` |
47
| `pb` | TensorFlow GraphDef | TensorFlow | `.pb` |
48
| `tflite` | TensorFlow Lite | Mobile/Edge | `.tflite` |
49
| `edgetpu` | Edge TPU | Google Coral | `_edgetpu.tflite` |
50
| `tfjs` | TensorFlow.js | Web | `_web_model/` |
51
| `paddle` | PaddlePaddle | Baidu | `_paddle_model/` |
52
53
**Usage Examples:**
54
55
```python
56
from ultralytics import YOLO
57
58
# Load a trained model
59
model = YOLO("yolo11n.pt")
60
61
# Export to ONNX
62
onnx_path = model.export(format='onnx')
63
64
# Export to TensorRT with optimization
65
trt_path = model.export(
66
format='engine',
67
imgsz=640,
68
half=True,
69
workspace=8,
70
verbose=True
71
)
72
73
# Export to CoreML for iOS
74
coreml_path = model.export(
75
format='coreml',
76
imgsz=640,
77
half=True,
78
nms=True
79
)
80
81
# Export to TensorFlow Lite for mobile
82
tflite_path = model.export(
83
format='tflite',
84
imgsz=320,
85
half=True
86
)
87
88
# Export with dynamic input shapes (ONNX)
89
onnx_path = model.export(
90
format='onnx',
91
dynamic=True,
92
simplify=True,
93
opset=12
94
)
95
96
# Export for Edge TPU
97
edgetpu_path = model.export(format='edgetpu')
98
99
# Export to TensorFlow.js for web deployment
100
tfjs_path = model.export(format='tfjs')
101
```
102
103
### Model Benchmarking
104
105
Benchmark exported models across different formats to compare inference performance.
106
107
```python { .api }
108
def benchmark(self, **kwargs) -> dict:
109
"""
110
Benchmark model across export formats.
111
112
Parameters:
113
- data (str): Dataset YAML path for benchmarking
114
- imgsz (int): Image size for benchmarking (default: 640)
115
- half (bool): FP16 inference (default: False)
116
- int8 (bool): INT8 quantization (default: False)
117
- device (str): Device to benchmark on ('cpu', '0', etc.)
118
- verbose (bool): Verbose output (default: False)
119
120
Returns:
121
dict: Benchmark results including speed and accuracy metrics
122
"""
123
```
124
125
**Usage Examples:**
126
127
```python
128
# Basic benchmarking
129
results = model.benchmark()
130
131
# Benchmark with specific parameters
132
results = model.benchmark(
133
data="coco8.yaml",
134
imgsz=640,
135
half=True,
136
device='0'
137
)
138
139
# Print benchmark results
140
for format_name, metrics in results.items():
141
print(f"{format_name}: {metrics['inference_time']:.2f}ms")
142
```
143
144
### Deployment Examples
145
146
#### ONNX Runtime Deployment
147
148
```python
149
import onnxruntime as ort
150
import numpy as np
151
from PIL import Image
152
import cv2
153
154
# Load ONNX model
155
session = ort.InferenceSession("yolo11n.onnx")
156
157
# Preprocess image
158
def preprocess(image_path):
159
image = cv2.imread(image_path)
160
image = cv2.resize(image, (640, 640))
161
image = image.transpose(2, 0, 1) # HWC to CHW
162
image = np.expand_dims(image, axis=0) # Add batch dimension
163
image = image.astype(np.float32) / 255.0
164
return image
165
166
# Run inference
167
input_data = preprocess("image.jpg")
168
outputs = session.run(None, {"images": input_data})
169
```
170
171
#### TensorRT Deployment
172
173
```python
174
import tensorrt as trt
175
import pycuda.driver as cuda
176
import pycuda.autoinit
177
178
# Load TensorRT engine
179
def load_engine(engine_path):
180
with open(engine_path, 'rb') as f:
181
runtime = trt.Runtime(trt.Logger(trt.Logger.WARNING))
182
return runtime.deserialize_cuda_engine(f.read())
183
184
engine = load_engine("yolo11n.engine")
185
186
# Create execution context
187
context = engine.create_execution_context()
188
189
# Allocate memory
190
def allocate_buffers(engine):
191
inputs = []
192
outputs = []
193
bindings = []
194
195
for binding in engine:
196
size = trt.volume(engine.get_binding_shape(binding))
197
dtype = trt.nptype(engine.get_binding_dtype(binding))
198
199
# Allocate host and device buffers
200
host_mem = cuda.pagelocked_empty(size, dtype)
201
device_mem = cuda.mem_alloc(host_mem.nbytes)
202
203
bindings.append(int(device_mem))
204
205
if engine.binding_is_input(binding):
206
inputs.append({'host': host_mem, 'device': device_mem})
207
else:
208
outputs.append({'host': host_mem, 'device': device_mem})
209
210
return inputs, outputs, bindings
211
212
inputs, outputs, bindings = allocate_buffers(engine)
213
```
214
215
#### CoreML Deployment (iOS)
216
217
```swift
218
import CoreML
219
import Vision
220
221
// Load CoreML model
222
guard let model = try? yolo11n(configuration: MLModelConfiguration()) else {
223
fatalError("Failed to load model")
224
}
225
226
// Create Vision request
227
let request = VNCoreMLRequest(model: model.model) { request, error in
228
guard let results = request.results as? [VNRecognizedObjectObservation] else {
229
return
230
}
231
232
// Process detection results
233
for result in results {
234
let boundingBox = result.boundingBox
235
let confidence = result.confidence
236
let label = result.labels.first?.identifier ?? "Unknown"
237
238
print("Detected: \\(label) (\\(confidence))")
239
}
240
}
241
242
// Perform inference
243
let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
244
try? handler.perform([request])
245
```
246
247
#### TensorFlow Lite Deployment (Android)
248
249
```java
250
import org.tensorflow.lite.Interpreter;
251
import org.tensorflow.lite.support.image.TensorImage;
252
import org.tensorflow.lite.support.image.ImageProcessor;
253
254
// Load TFLite model
255
Interpreter tflite = new Interpreter(loadModelFile("yolo11n.tflite"));
256
257
// Preprocess image
258
ImageProcessor imageProcessor = new ImageProcessor.Builder()
259
.add(new ResizeOp(640, 640, ResizeOp.ResizeMethod.BILINEAR))
260
.add(new NormalizeOp(0.0f, 255.0f))
261
.build();
262
263
TensorImage tensorImage = new TensorImage(DataType.FLOAT32);
264
tensorImage.load(bitmap);
265
tensorImage = imageProcessor.process(tensorImage);
266
267
// Run inference
268
float[][][][] output = new float[1][25200][85]; // Adjust based on model
269
tflite.run(tensorImage.getBuffer(), output);
270
```
271
272
### Optimization Techniques
273
274
#### Quantization
275
276
```python
277
# INT8 quantization during export
278
model.export(format='onnx', int8=True, data="calibration_data.yaml")
279
280
# Half precision (FP16)
281
model.export(format='onnx', half=True)
282
283
# Dynamic quantization (TensorFlow)
284
model.export(format='saved_model', keras=True, int8=True)
285
```
286
287
#### Model Pruning
288
289
```python
290
# Structured pruning during training
291
model.train(
292
data="dataset.yaml",
293
epochs=100,
294
prune=0.3 # Remove 30% of parameters
295
)
296
297
# Export pruned model
298
model.export(format='onnx', optimize=True)
299
```
300
301
#### Knowledge Distillation
302
303
```python
304
# Train smaller model with teacher model guidance
305
teacher_model = YOLO("yolo11x.pt")
306
student_model = YOLO("yolo11n.pt")
307
308
# Distillation training (custom implementation required)
309
student_model.train(
310
data="dataset.yaml",
311
teacher=teacher_model,
312
distillation_alpha=0.7
313
)
314
```
315
316
## Types
317
318
```python { .api }
319
from typing import Dict, Any, Optional, Union, List
320
from pathlib import Path
321
322
# Export configuration types
323
ExportFormat = str # 'onnx', 'engine', 'coreml', etc.
324
ExportConfig = Dict[str, Any]
325
BenchmarkResults = Dict[str, Dict[str, float]]
326
327
# Platform-specific types
328
class DeploymentTarget:
329
cpu: str = "cpu"
330
gpu: str = "gpu"
331
mobile: str = "mobile"
332
web: str = "web"
333
edge: str = "edge"
334
335
# Optimization options
336
class OptimizationConfig:
337
half: bool = False # FP16 precision
338
int8: bool = False # INT8 quantization
339
dynamic: bool = False # Dynamic input shapes
340
simplify: bool = False # Model simplification
341
optimize: bool = False # Framework optimization
342
```