Convert scikit-learn models to ONNX format for cross-platform inference and deployment
—
ONNX operator creation system and mixin classes for enhancing scikit-learn models with ONNX capabilities. The algebra module enables direct ONNX operator composition, sklearn integration, and creation of custom ONNX-based transformations that can be seamlessly integrated into scikit-learn pipelines.
Core class for creating and manipulating ONNX operators programmatically.
class OnnxOperator:
"""
Main class for creating ONNX operators programmatically.
Enables direct construction of ONNX computational graphs using
a Python-based API that mirrors ONNX operator specifications.
"""
def __init__(self, op_type, *inputs, **kwargs):
"""
Create an ONNX operator instance.
Parameters:
- op_type: str, ONNX operator type (e.g., 'MatMul', 'Add', 'Relu')
- inputs: Variable, input variables for the operator
- kwargs: Additional operator attributes and parameters
"""
def to_onnx(self, inputs=None, outputs=None, target_opset=None):
"""
Generate ONNX model from operator graph.
Parameters:
- inputs: list, input specifications for the model
- outputs: list, output specifications for the model
- target_opset: int, target ONNX opset version
Returns:
- ModelProto: Complete ONNX model
"""
def add_to(self, scope, container):
"""
Add operator to conversion container.
Parameters:
- scope: Scope, conversion scope context
- container: Container, conversion container for operators
"""Mixin class that adds ONNX operator capabilities to scikit-learn models.
class OnnxOperatorMixin:
"""
Mixin class for adding ONNX operator capabilities to sklearn models.
When combined with sklearn estimators, enables direct use of ONNX
operators within sklearn pipelines and provides seamless conversion
to ONNX format.
Import from: from skl2onnx.algebra.onnx_operator_mixin import OnnxOperatorMixin
"""
def to_onnx(self, X=None, name=None, options=None, white_op=None,
black_op=None, final_types=None, target_opset=None, verbose=0):
"""
Convert enhanced model to ONNX format.
Parameters:
- X: array-like, sample input for type inference (optional)
- name: str, name for the ONNX model (optional)
- options: dict, conversion options (optional)
- white_op: list, whitelist of allowed operators (optional)
- black_op: list, blacklist of forbidden operators (optional)
- final_types: list, expected output types for validation (optional)
- target_opset: int, target ONNX opset version (optional)
- verbose: int, verbosity level (default 0)
Returns:
- ModelProto: ONNX model representation
"""
def onnx_graph(self, **kwargs):
"""
Generate ONNX graph representation of the model.
Parameters:
- kwargs: Additional parameters for graph generation
Returns:
- GraphProto: ONNX graph representation
"""Pre-built ONNX-based transformers that can be used directly in sklearn pipelines.
class CastTransformer:
"""
Transformer for type casting operations using ONNX Cast operator.
Converts input data types to specified output types, useful for
ensuring type compatibility in mixed-precision pipelines.
"""
def __init__(self, dtype=None):
"""
Initialize cast transformer.
Parameters:
- dtype: numpy.dtype, target data type for casting
"""
def fit(self, X, y=None):
"""Fit the transformer (no-op for casting)."""
return self
def transform(self, X):
"""Apply type casting to input data."""
pass
class ReplaceTransformer:
"""
Transformer for value replacement using ONNX operators.
Replaces specified values in input data with new values,
useful for handling missing values or categorical mappings.
"""
def __init__(self, replace_dict=None):
"""
Initialize replace transformer.
Parameters:
- replace_dict: dict, mapping of old values to new values
"""
def fit(self, X, y=None):
"""Fit the transformer and learn replacement mappings."""
return self
def transform(self, X):
"""Apply value replacements to input data."""
pass
class WOETransformer:
"""
Weight of Evidence transformer using ONNX operators.
Computes Weight of Evidence encoding for categorical variables,
commonly used in credit scoring and risk modeling applications.
"""
def __init__(self, positive_class=1):
"""
Initialize WOE transformer.
Parameters:
- positive_class: Value representing positive class for WOE calculation
"""
def fit(self, X, y):
"""Fit WOE transformer and compute evidence weights."""
return self
def transform(self, X):
"""Apply WOE transformation to categorical features."""
passONNX-based regression models with type casting capabilities.
class CastRegressor:
"""
Regressor with built-in type casting capabilities.
Wraps any sklearn regressor and adds automatic type casting
for inputs and outputs, ensuring ONNX compatibility.
"""
def __init__(self, regressor, dtype=None):
"""
Initialize cast regressor.
Parameters:
- regressor: sklearn regressor instance to wrap
- dtype: numpy.dtype, target data type for casting
"""
def fit(self, X, y):
"""Fit the underlying regressor with type casting."""
return self
def predict(self, X):
"""Predict with automatic input/output type casting."""
passONNX-compatible text processing transformers with conversion tracing.
class TraceableCountVectorizer:
"""
Enhanced CountVectorizer with ONNX conversion tracing capabilities.
Extends sklearn's CountVectorizer with detailed logging and tracing
of the conversion process for debugging and optimization.
"""
def __init__(self, **kwargs):
"""
Initialize traceable count vectorizer.
Parameters:
- kwargs: Parameters passed to underlying CountVectorizer
"""
def fit(self, X, y=None):
"""Fit vectorizer with conversion tracing."""
return self
def transform(self, X):
"""Transform text with tracing support."""
pass
def get_conversion_trace(self):
"""Get detailed conversion trace information."""
pass
class TraceableTfidfVectorizer:
"""
Enhanced TfidfVectorizer with ONNX conversion tracing capabilities.
Extends sklearn's TfidfVectorizer with detailed logging and tracing
of the conversion process for debugging and optimization.
"""
def __init__(self, **kwargs):
"""
Initialize traceable TF-IDF vectorizer.
Parameters:
- kwargs: Parameters passed to underlying TfidfVectorizer
"""
def fit(self, X, y=None):
"""Fit vectorizer with conversion tracing."""
return self
def transform(self, X):
"""Transform text with tracing support."""
pass
def get_conversion_trace(self):
"""Get detailed conversion trace information."""
passfrom skl2onnx.algebra import OnnxOperator
from skl2onnx.common.data_types import FloatTensorType
import numpy as np
# Create input variables
X = np.random.randn(10, 5).astype(np.float32)
input_type = FloatTensorType([None, 5])
# Create simple linear transformation: Y = X @ W + b
W = np.random.randn(5, 3).astype(np.float32)
b = np.random.randn(3).astype(np.float32)
# Define ONNX operators
matmul_op = OnnxOperator('MatMul', 'X', W, name='linear_transform')
add_op = OnnxOperator('Add', matmul_op, b, name='add_bias')
# Generate ONNX model
onnx_model = add_op.to_onnx(
inputs=[('X', input_type)],
outputs=[('Y', FloatTensorType([None, 3]))],
target_opset=18
)from skl2onnx import wrap_as_onnx_mixin
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
# Create and train model
X, y = make_regression(n_samples=100, n_features=10, random_state=42)
model = LinearRegression()
model.fit(X, y)
# Enhance with ONNX capabilities
enhanced_model = wrap_as_onnx_mixin(model, target_opset=18)
# Now the model has ONNX methods
onnx_model = enhanced_model.to_onnx(X, name="enhanced_linear_regression")
# Can also generate graph representation
onnx_graph = enhanced_model.onnx_graph()from skl2onnx.sklapi import CastTransformer, ReplaceTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import numpy as np
# Create pipeline with ONNX transformers
pipeline = Pipeline([
('cast_input', CastTransformer(dtype=np.float32)),
('replace_missing', ReplaceTransformer({-999: 0.0})),
('scaler', StandardScaler()),
('regressor', RandomForestRegressor(n_estimators=10))
])
# Fit pipeline
X_train = np.random.randn(100, 5)
X_train[X_train < -2] = -999 # Add missing value indicators
y_train = np.random.randn(100)
pipeline.fit(X_train, y_train)
# Convert entire pipeline to ONNX
from skl2onnx import to_onnx
onnx_pipeline = to_onnx(pipeline, X_train.astype(np.float32))from skl2onnx.sklapi import WOETransformer
import pandas as pd
# Create categorical data
data = pd.DataFrame({
'category': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B'],
'target': [1, 0, 1, 1, 0, 0, 1, 1]
})
# Apply WOE transformation
woe_transformer = WOETransformer(positive_class=1)
woe_transformer.fit(data[['category']], data['target'])
woe_encoded = woe_transformer.transform(data[['category']])
print("WOE encoded features:", woe_encoded)from skl2onnx.sklapi import TraceableCountVectorizer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
# Create text processing pipeline with tracing
text_pipeline = Pipeline([
('vectorizer', TraceableCountVectorizer(max_features=1000, stop_words='english')),
('classifier', LogisticRegression())
])
# Sample text data
texts = [
"This is a positive example",
"This is a negative example",
"Another positive text sample",
"Another negative text sample"
]
labels = [1, 0, 1, 0]
# Fit pipeline
text_pipeline.fit(texts, labels)
# Get conversion trace
vectorizer = text_pipeline.named_steps['vectorizer']
trace_info = vectorizer.get_conversion_trace()
print("Conversion trace information:", trace_info)
# Convert to ONNX
from skl2onnx import to_onnx
onnx_text_model = to_onnx(text_pipeline, texts)from skl2onnx.algebra import OnnxOperator
import numpy as np
# Create complex mathematical operation: sigmoid(X @ W + b)
X_shape = [None, 10]
W_shape = [10, 5]
# Define computation graph
matmul = OnnxOperator('MatMul', 'X', 'W')
add_bias = OnnxOperator('Add', matmul, 'b')
sigmoid = OnnxOperator('Sigmoid', add_bias, output_names=['Y'])
# Create complete model with initializers
W_init = np.random.randn(*W_shape).astype(np.float32)
b_init = np.random.randn(5).astype(np.float32)
# Generate ONNX model with initializers
onnx_model = sigmoid.to_onnx(
inputs=[('X', FloatTensorType(X_shape))],
outputs=[('Y', FloatTensorType([None, 5]))],
target_opset=18
)
# Add initializers manually if needed
from onnx import helper, TensorProto
W_tensor = helper.make_tensor('W', TensorProto.FLOAT, W_shape, W_init.flatten())
b_tensor = helper.make_tensor('b', TensorProto.FLOAT, [5], b_init)
onnx_model.graph.initializer.extend([W_tensor, b_tensor])from skl2onnx.sklapi import CastRegressor
from sklearn.ensemble import RandomForestRegressor
import numpy as np
# Create base regressor
base_regressor = RandomForestRegressor(n_estimators=20, random_state=42)
# Wrap with type casting capabilities
cast_regressor = CastRegressor(base_regressor, dtype=np.float32)
# Train with automatic casting
X_train = np.random.randn(100, 8).astype(np.float64) # Double precision input
y_train = np.random.randn(100).astype(np.float64)
cast_regressor.fit(X_train, y_train)
# Predictions automatically cast to specified type
X_test = np.random.randn(20, 8).astype(np.float64)
predictions = cast_regressor.predict(X_test)
print(f"Prediction dtype: {predictions.dtype}") # Will be float32
# Convert to ONNX
onnx_cast_model = to_onnx(cast_regressor, X_test.astype(np.float32))# Create conditional logic: output = X if condition else Y
condition_op = OnnxOperator('Greater', 'X', 0.5)
where_op = OnnxOperator('Where', condition_op, 'X', 'Y', output_names=['result'])
# Generate model
conditional_model = where_op.to_onnx(
inputs=[('X', FloatTensorType([None, 1])), ('Y', FloatTensorType([None, 1]))],
outputs=[('result', FloatTensorType([None, 1]))],
target_opset=18
)# Create reduction operations: mean along axis
reduce_mean_op = OnnxOperator('ReduceMean', 'X', axes=[1], keepdims=1,
output_names=['mean_result'])
reduction_model = reduce_mean_op.to_onnx(
inputs=[('X', FloatTensorType([None, 10]))],
outputs=[('mean_result', FloatTensorType([None, 1]))],
target_opset=18
)wrap_as_onnx_mixin for ONNX capabilitiesInstall with Tessl CLI
npx tessl i tessl/pypi-skl2onnx