Convert scikit-learn models to ONNX format for cross-platform inference and deployment
—
Comprehensive type system for ONNX conversion that maps Python/NumPy data types to ONNX types with automatic inference and shape validation. The type system ensures accurate data representation and compatibility between scikit-learn models and ONNX runtime environments.
Foundation classes for the type system hierarchy that provide common functionality and structure for all data types.
class DataType:
"""
Base class for all data types in the conversion system.
Provides common interface for type operations and validation.
"""
class TensorType(DataType):
"""
Base class for tensor data types.
Represents multi-dimensional arrays with shape and element type information.
"""Types for complex data structures that contain multiple values or nested data.
class SequenceType(DataType):
"""
Represents sequence data containing ordered collections of elements.
Parameters:
- element_type: DataType, type of elements in the sequence
"""
class DictionaryType(DataType):
"""
Represents dictionary/map data with key-value pairs.
Parameters:
- key_type: DataType, type of dictionary keys
- value_type: DataType, type of dictionary values
"""Simple data types representing single values without dimensions.
class FloatType(DataType):
"""32-bit floating point scalar type."""
class Int64Type(DataType):
"""64-bit signed integer scalar type."""
class StringType(DataType):
"""String scalar type."""Multi-dimensional array types supporting various numeric and string data representations.
class Int8TensorType(TensorType):
"""
8-bit signed integer tensor type.
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""
class Int16TensorType(TensorType):
"""
16-bit signed integer tensor type.
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""
class Int32TensorType(TensorType):
"""
32-bit signed integer tensor type.
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""
class Int64TensorType(TensorType):
"""
64-bit signed integer tensor type.
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""
class UInt8TensorType(TensorType):
"""
8-bit unsigned integer tensor type.
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""
class UInt16TensorType(TensorType):
"""
16-bit unsigned integer tensor type.
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""
class UInt32TensorType(TensorType):
"""
32-bit unsigned integer tensor type.
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""
class UInt64TensorType(TensorType):
"""
64-bit unsigned integer tensor type.
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""class Float16TensorType(TensorType):
"""
16-bit floating point tensor type (half precision).
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""
class FloatTensorType(TensorType):
"""
32-bit floating point tensor type (single precision).
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""
class DoubleTensorType(TensorType):
"""
64-bit floating point tensor type (double precision).
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""class BooleanTensorType(TensorType):
"""
Boolean tensor type.
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""
class StringTensorType(TensorType):
"""
String tensor type.
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""
class Complex64TensorType(TensorType):
"""
64-bit complex number tensor type.
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""
class Complex128TensorType(TensorType):
"""
128-bit complex number tensor type.
Parameters:
- shape: list, tensor dimensions (None for dynamic dimensions)
"""Automatic type detection and conversion utilities that analyze Python/NumPy objects to determine appropriate ONNX types.
def guess_data_type(data_type):
"""
Infer ONNX data type from Python/NumPy type.
Parameters:
- data_type: Python type, NumPy dtype, or data sample
Returns:
- DataType: Appropriate ONNX data type
"""
def guess_numpy_type(data_type):
"""
Convert data type to NumPy equivalent.
Parameters:
- data_type: DataType instance
Returns:
- numpy.dtype: Equivalent NumPy data type
"""
def guess_proto_type(data_type):
"""
Convert data type to ONNX protobuf type.
Parameters:
- data_type: DataType instance
Returns:
- int: ONNX protobuf type identifier
"""
def guess_tensor_type(data_type):
"""
Convert scalar type to tensor type.
Parameters:
- data_type: DataType instance
Returns:
- TensorType: Corresponding tensor type
"""
def copy_type(data_type):
"""
Create a copy of existing data type.
Parameters:
- data_type: DataType instance to copy
Returns:
- DataType: Copy of the input type
"""def guess_initial_types(X, initial_types=None):
"""
Automatically infer initial types from input data.
Parameters:
- X: array-like, input data sample
- initial_types: list, existing type specifications (optional)
Returns:
- list: List of (name, type) tuples for model inputs
"""from skl2onnx.common.data_types import (
FloatTensorType, Int64TensorType, StringTensorType, BooleanTensorType
)
# Create tensor types with explicit shapes
float_input = FloatTensorType([None, 10]) # Variable batch size, 10 features
int_labels = Int64TensorType([None]) # Variable length label vector
string_features = StringTensorType([None, 5]) # Variable batch, 5 string features
bool_mask = BooleanTensorType([None, 10]) # Boolean mask tensor# Dynamic shapes (None for variable dimensions)
dynamic_input = FloatTensorType([None, None]) # Fully dynamic 2D tensor
batch_dynamic = FloatTensorType([None, 100]) # Variable batch, fixed features
# Fixed shapes
fixed_input = FloatTensorType([32, 64]) # Fixed 32x64 tensor
image_input = FloatTensorType([1, 3, 224, 224]) # Single RGB imageimport numpy as np
from skl2onnx.common.data_types import guess_data_type, guess_initial_types
# Infer type from NumPy array
X = np.random.randn(100, 20).astype(np.float32)
inferred_type = guess_data_type(X.dtype)
print(inferred_type) # FloatTensorType
# Automatically create initial types from data
initial_types = guess_initial_types(X)
print(initial_types) # [('X', FloatTensorType([None, 20]))]from skl2onnx.common.data_types import (
guess_numpy_type, guess_proto_type, copy_type
)
# Create a tensor type
tensor_type = FloatTensorType([None, 10])
# Convert to NumPy equivalent
numpy_dtype = guess_numpy_type(tensor_type)
print(numpy_dtype) # float32
# Get ONNX protobuf type
proto_type = guess_proto_type(tensor_type)
print(proto_type) # ONNX TensorProto type ID
# Create a copy
type_copy = copy_type(tensor_type)from skl2onnx.common.data_types import SequenceType, DictionaryType
# Sequence of float tensors
sequence_type = SequenceType(FloatTensorType([None, 5]))
# Dictionary with string keys and float values
dict_type = DictionaryType(StringType(), FloatTensorType([None]))# Multiple inputs with different types
initial_types = [
('numerical_features', FloatTensorType([None, 20])),
('categorical_features', Int64TensorType([None, 5])),
('text_features', StringTensorType([None, 1]))
]# Different precision levels
half_precision = Float16TensorType([None, 10]) # Memory efficient
single_precision = FloatTensorType([None, 10]) # Standard precision
double_precision = DoubleTensorType([None, 10]) # High precision
# Integer precision levels
small_ints = Int8TensorType([None]) # -128 to 127
large_ints = Int64TensorType([None]) # Full 64-bit rangeNone for variable/dynamic dimensionsNone)Install with Tessl CLI
npx tessl i tessl/pypi-skl2onnx