tessl/pypi-onnx

Open Neural Network Exchange for AI model interoperability and machine learning frameworks

—

Pending

Overview

Eval results

Files

NumPy Integration

Name: tessl/pypi-onnx
Author: tessl

Bidirectional conversion between ONNX tensors and NumPy arrays, supporting all ONNX data types including specialized formats like bfloat16 and float8 variants. This module enables seamless integration between ONNX models and NumPy-based data processing workflows.

Capabilities

Array Conversion

Convert between ONNX TensorProto and NumPy arrays with full type support.

def to_array(tensor: TensorProto, base_dir: str = "") -> np.ndarray:
    """
    Converts a tensor def object to a numpy array.
    
    Parameters:
    - tensor: a TensorProto object.
    - base_dir: if external tensor exists, base_dir can help to find the path to it

    Returns:
    numpy.ndarray: NumPy array with appropriate dtype and shape

    Raises:
    ValueError: If tensor data is invalid or unsupported
    """

def from_array(arr: np.ndarray, name: Optional[str] = None) -> TensorProto:
    """
    Converts a numpy array to a tensor def.
    
    Parameters:
    - arr: a numpy array.
    - name: (optional) the name of the tensor.

    Returns:
    TensorProto: ONNX tensor representation

    Raises:
    ValueError: If array dtype is not supported by ONNX
    """

Container Conversion

Convert between ONNX container types and Python collections.

def to_list(sequence):
    """
    Convert ONNX SequenceProto to Python list.

    Parameters:
    - sequence: SequenceProto to convert

    Returns:
    list: Python list containing sequence elements

    Raises:
    ValueError: If sequence contains unsupported element types
    """

def from_list(lst, dtype=None, name=None):
    """
    Convert Python list to ONNX SequenceProto.

    Parameters:
    - lst: Python list to convert
    - dtype: Optional element data type specification
    - name: Optional name for the sequence

    Returns:
    SequenceProto: ONNX sequence representation

    Raises:
    ValueError: If list elements cannot be converted to ONNX types
    """

def to_dict(map_proto):
    """
    Convert ONNX MapProto to Python dictionary.

    Parameters:
    - map_proto: MapProto to convert

    Returns:
    dict: Python dictionary with converted key-value pairs

    Raises:
    ValueError: If map contains unsupported key or value types
    """

def from_dict(dict_, name=None):
    """
    Convert Python dictionary to ONNX MapProto.

    Parameters:
    - dict_: Python dictionary to convert
    - name: Optional name for the map

    Returns:
    MapProto: ONNX map representation

    Raises:
    ValueError: If dictionary keys or values cannot be converted
    """

def to_optional(optional):
    """
    Convert ONNX OptionalProto to Python optional value.

    Parameters:
    - optional: OptionalProto to convert

    Returns:
    Any or None: Converted value or None if optional is empty

    Raises:
    ValueError: If optional contains unsupported element type
    """

def from_optional(value, name=None):
    """
    Convert Python value to ONNX OptionalProto.

    Parameters:
    - value: Python value to convert (None for empty optional)
    - name: Optional name for the optional

    Returns:
    OptionalProto: ONNX optional representation

    Raises:
    ValueError: If value cannot be converted to ONNX type
    """

Specialized Float Type Conversion

Convert between specialized floating point formats and standard types.

def bfloat16_to_float32(bfloat16_data, dims=None):
    """
    Convert bfloat16 data to float32.

    Parameters:
    - bfloat16_data: NumPy array of bfloat16 data (as uint16)
    - dims: Optional target dimensions for reshaping

    Returns:
    numpy.ndarray: Float32 array with converted values
    """

def float8e4m3_to_float32(float8_data, fn=True, uz=False):
    """
    Convert float8 E4M3 data to float32.

    Parameters:
    - float8_data: NumPy array of float8 E4M3 data (as uint8)
    - fn: Whether finite values only (True) or include infinities
    - uz: Whether to use unsigned zero representation

    Returns:
    numpy.ndarray: Float32 array with converted values
    """

def float8e5m2_to_float32(float8_data, fn=True, uz=False):
    """
    Convert float8 E5M2 data to float32.

    Parameters:
    - float8_data: NumPy array of float8 E5M2 data (as uint8)
    - fn: Whether finite values only (True) or include infinities
    - uz: Whether to use unsigned zero representation

    Returns:
    numpy.ndarray: Float32 array with converted values
    """

def combine_pairs_to_complex(fa):
    """
    Combine pairs of float values to complex numbers.

    Parameters:
    - fa: Sequence of float values (length must be even)

    Returns:
    list: List of complex numbers
    """

Utility Functions

Additional utilities for tensor data manipulation and conversion.

def convert_endian(tensor):
    """
    Convert tensor data endianness in place for cross-platform compatibility.

    Parameters:
    - tensor: TensorProto to modify

    Returns:
    None: Modifies tensor in place
    """

def create_random_int(shape, low=0, high=100, output_type=TensorProto.INT32, seed=None):
    """
    Create a tensor with random integer values.

    Parameters:
    - shape: Tensor shape (list of integers)
    - low: Minimum value (inclusive)
    - high: Maximum value (exclusive)
    - output_type: ONNX data type for output
    - seed: Random seed for reproducibility

    Returns:
    TensorProto: Tensor with random integer data
    """

Usage Examples

Basic Array Conversion

import onnx
from onnx import numpy_helper, TensorProto
import numpy as np

# Convert NumPy array to ONNX tensor
np_array = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32)
onnx_tensor = numpy_helper.from_array(np_array, name="my_tensor")

print(f"ONNX tensor shape: {onnx_tensor.dims}")
print(f"ONNX tensor type: {onnx_tensor.data_type}")

# Convert ONNX tensor back to NumPy array
converted_array = numpy_helper.to_array(onnx_tensor)
print(f"Converted array shape: {converted_array.shape}")
print(f"Converted array dtype: {converted_array.dtype}")
print(f"Data preserved: {np.array_equal(np_array, converted_array)}")

Working with Different Data Types

import onnx
from onnx import numpy_helper, TensorProto
import numpy as np

# Create arrays with different dtypes
int_array = np.array([1, 2, 3, 4, 5], dtype=np.int64)
float_array = np.array([1.5, 2.5, 3.5], dtype=np.float64)
bool_array = np.array([True, False, True], dtype=bool)

# Convert to ONNX tensors
int_tensor = numpy_helper.from_array(int_array, "integers")
float_tensor = numpy_helper.from_array(float_array, "floats")
bool_tensor = numpy_helper.from_array(bool_array, "booleans")

print(f"Int tensor type: {int_tensor.data_type} (should be {TensorProto.INT64})")
print(f"Float tensor type: {float_tensor.data_type} (should be {TensorProto.DOUBLE})")
print(f"Bool tensor type: {bool_tensor.data_type} (should be {TensorProto.BOOL})")

# Convert back and verify
print(f"Int conversion: {np.array_equal(int_array, numpy_helper.to_array(int_tensor))}")
print(f"Float conversion: {np.array_equal(float_array, numpy_helper.to_array(float_tensor))}")
print(f"Bool conversion: {np.array_equal(bool_array, numpy_helper.to_array(bool_tensor))}")

Container Type Conversion

import onnx
from onnx import numpy_helper
import numpy as np

# Work with sequences
python_list = [
    np.array([1, 2, 3], dtype=np.float32),
    np.array([4, 5, 6], dtype=np.float32),
    np.array([7, 8, 9], dtype=np.float32)
]

# Convert to ONNX sequence
onnx_sequence = numpy_helper.from_list(python_list, name="tensor_sequence")
print(f"Sequence has {len(onnx_sequence.tensor_values)} tensors")

# Convert back to Python list
converted_list = numpy_helper.to_list(onnx_sequence)
print(f"Converted list has {len(converted_list)} arrays")

# Work with dictionaries
python_dict = {
    "feature_1": np.array([0.1, 0.2, 0.3], dtype=np.float32),
    "feature_2": np.array([0.4, 0.5, 0.6], dtype=np.float32)
}

# Convert to ONNX map
onnx_map = numpy_helper.from_dict(python_dict, name="feature_map")
print(f"Map has {len(onnx_map.string_keys)} keys")

# Convert back to Python dict
converted_dict = numpy_helper.to_dict(onnx_map)
print(f"Converted dict keys: {list(converted_dict.keys())}")

# Work with optional values
optional_value = np.array([1, 2, 3], dtype=np.int32)
onnx_optional = numpy_helper.from_optional(optional_value, name="maybe_tensor")

# Convert back
converted_optional = numpy_helper.to_optional(onnx_optional)
print(f"Optional value shape: {converted_optional.shape}")

# Empty optional
empty_optional = numpy_helper.from_optional(None, name="empty_optional")
converted_empty = numpy_helper.to_optional(empty_optional)
print(f"Empty optional: {converted_empty}")  # Should be None

Specialized Float Type Handling

import onnx
from onnx import numpy_helper
import numpy as np

# Simulate bfloat16 data (normally this would come from a model)
# bfloat16 is stored as uint16 with the lower 16 bits truncated
float32_data = np.array([1.0, 2.5, 3.14159, -1.5], dtype=np.float32)

# Convert to bfloat16 representation (this is conceptual)
# In practice, you'd get this from an ONNX tensor
bfloat16_as_uint16 = (float32_data.view(np.uint32) >> 16).astype(np.uint16)

# Convert bfloat16 back to float32
recovered_float32 = numpy_helper.bfloat16_to_float32(bfloat16_as_uint16)
print(f"Original: {float32_data}")
print(f"Recovered: {recovered_float32}")

# Work with complex numbers
complex_data = [1.0, 2.0, 3.0, 4.0]  # Represents (1+2j) and (3+4j)
complex_numbers = numpy_helper.combine_pairs_to_complex(complex_data)
print(f"Complex numbers: {complex_numbers}")

Integration with Model Processing

import onnx
from onnx import numpy_helper
import numpy as np

def process_model_tensors(model_path, output_path):
    """Process all tensors in a model using NumPy operations."""
    
    # Load model
    model = onnx.load_model(model_path)
    
    # Process initializer tensors
    for i, tensor in enumerate(model.graph.initializer):
        print(f"Processing tensor: {tensor.name}")
        
        # Convert to NumPy array
        np_array = numpy_helper.to_array(tensor)
        print(f"  Original shape: {np_array.shape}, dtype: {np_array.dtype}")
        
        # Perform NumPy operations (example: normalize weights)
        if np_array.dtype in [np.float32, np.float64]:
            # Normalize to zero mean, unit variance
            normalized = (np_array - np_array.mean()) / (np_array.std() + 1e-8)
            
            # Convert back to ONNX tensor
            new_tensor = numpy_helper.from_array(normalized, tensor.name)
            
            # Replace in model
            model.graph.initializer[i].CopyFrom(new_tensor)
            print(f"  Normalized tensor: mean={normalized.mean():.6f}, std={normalized.std():.6f}")
    
    # Save processed model
    onnx.save_model(model, output_path)
    print(f"Processed model saved to: {output_path}")

# Example usage (commented out)
# process_model_tensors("input_model.onnx", "normalized_model.onnx")

Error Handling and Data Validation

import onnx
from onnx import numpy_helper, TensorProto
import numpy as np

def safe_tensor_conversion(np_array, tensor_name):
    """Safely convert NumPy array to ONNX tensor with error handling."""
    
    try:
        # Check for supported dtypes
        if np_array.dtype not in [np.float32, np.float64, np.int32, np.int64, 
                                  np.uint32, np.uint64, np.bool_, np.float16]:
            print(f"Warning: dtype {np_array.dtype} may not be fully supported")
        
        # Check for NaN or infinite values in float arrays
        if np.issubdtype(np_array.dtype, np.floating):
            if np.any(np.isnan(np_array)):
                print("Warning: Array contains NaN values")
            if np.any(np.isinf(np_array)):
                print("Warning: Array contains infinite values")
        
        # Convert to ONNX tensor
        onnx_tensor = numpy_helper.from_array(np_array, tensor_name)
        
        # Verify round-trip conversion
        recovered_array = numpy_helper.to_array(onnx_tensor)
        
        if not np.allclose(np_array, recovered_array, equal_nan=True):
            print("Warning: Round-trip conversion changed values")
            return None
        
        print(f"Successfully converted tensor '{tensor_name}'")
        return onnx_tensor
        
    except Exception as e:
        print(f"Error converting tensor '{tensor_name}': {e}")
        return None

# Test with various arrays
test_arrays = [
    (np.array([1, 2, 3], dtype=np.int32), "integers"),
    (np.array([1.0, 2.0, np.nan], dtype=np.float32), "with_nan"),
    (np.array([1.0, 2.0, np.inf], dtype=np.float32), "with_inf"),
    (np.array([True, False, True], dtype=bool), "booleans"),
]

for arr, name in test_arrays:
    tensor = safe_tensor_conversion(arr, name)

Install with Tessl CLI