or run

tessl search
Log in

Version

Workspace
tessl
Visibility
Public
Created
Last updated
Describes
pypipkg:pypi/kserve@0.16.x

docs

index.md
tile.json

tessl/pypi-kserve

tessl install tessl/pypi-kserve@0.16.1

KServe is a comprehensive Python SDK that provides standardized interfaces for building and deploying machine learning model serving infrastructure on Kubernetes.

protocol-types.mddocs/reference/

Protocol Data Types

Data types for structured inference requests and responses following the KServe inference protocol v2 specification.

Capabilities

InferInput

Input tensor descriptor for inference requests.

class InferInput:
    """
    Input tensor for inference request.

    Args:
        name (str): Input name
        shape (list): Input shape as list of integers
        datatype (str): Data type (e.g., "FP32", "INT64", "BYTES")
        data (any, optional): Input data
        parameters (dict, optional): Additional parameters
    """
    def __init__(
        self,
        name: str,
        shape: List[int],
        datatype: str,
        data: Any = None,
        parameters: Dict = None
    ): ...

    # Properties
    @property
    def name(self) -> str:
        """Input name"""

    @property
    def shape(self) -> List[int]:
        """Input shape"""

    @property
    def datatype(self) -> str:
        """Data type"""

    @property
    def data(self) -> Any:
        """Input data"""

    @property
    def parameters(self) -> Dict:
        """Additional parameters"""

    def set_data_from_numpy(self, input_tensor: np.ndarray, binary_data: bool = False) -> None:
        """
        Set input data from NumPy array.

        Args:
            input_tensor (np.ndarray): NumPy array with input data
            binary_data (bool): Use binary data encoding (default: False)
        """

    def as_numpy(self) -> np.ndarray:
        """
        Convert input data to NumPy array.

        Returns:
            np.ndarray: Input data as NumPy array
        """

    def as_string(self) -> List[str]:
        """
        Convert BYTES data to list of strings.

        Returns:
            list: List of strings decoded from bytes data

        Raises:
            InvalidInput: If datatype is not BYTES
        """

    def to_dict(self) -> Dict:
        """
        Convert input to dictionary representation.

        Returns:
            dict: Dictionary with name, shape, datatype, data, and parameters
        """

RequestedOutput

Descriptor for outputs requested in an inference request.

class RequestedOutput:
    """
    Output requested as part of inference request.

    Args:
        name (str): Output name
        parameters (dict, optional): Additional parameters
    """
    def __init__(self, name: str, parameters: Optional[Dict] = None): ...

    # Properties
    @property
    def name(self) -> str:
        """Output name"""

    @property
    def parameters(self) -> Optional[Dict]:
        """Additional parameters"""

    @property
    def binary_data(self) -> Optional[bool]:
        """
        Check if binary data format is requested.

        Returns:
            bool or None: True if binary format requested, False otherwise, None if not set
        """

    def set_data_format(self, binary_data: bool) -> None:
        """
        Set the data format for this output.

        Args:
            binary_data (bool): True for binary format, False for JSON format
        """

    @property
    def classification(self) -> Optional[int]:
        """
        Get classification parameter.

        Returns:
            int or None: Classification count if set
        """

InferOutput

Output tensor descriptor for inference responses.

class InferOutput:
    """
    Output tensor from inference response.

    Args:
        name (str): Output name
        shape (list): Output shape as list of integers
        datatype (str): Data type (e.g., "FP32", "INT64", "BYTES")
        data (any, optional): Output data
        parameters (dict, optional): Additional parameters
    """
    def __init__(
        self,
        name: str,
        shape: List[int],
        datatype: str,
        data: Union[List, np.ndarray, InferTensorContents] = None,
        parameters: Optional[Union[Dict, MessageMap[str, InferParameter]]] = None
    ): ...

    # Properties
    @property
    def name(self) -> str:
        """Output name"""

    @property
    def shape(self) -> List[int]:
        """Output shape"""

    @property
    def datatype(self) -> str:
        """Data type"""

    @property
    def data(self) -> Any:
        """Output data"""

    @property
    def parameters(self) -> Dict:
        """Additional parameters"""

    def set_data_from_numpy(self, output_tensor: np.ndarray, binary_data: bool = True) -> None:
        """
        Set output data from NumPy array.

        Args:
            output_tensor (np.ndarray): NumPy array with output data
            binary_data (bool): Use binary data encoding (default: True)

        Raises:
            InferenceError: If tensor shape/dtype doesn't match or conversion fails
        """

    def as_numpy(self) -> np.ndarray:
        """
        Convert output data to NumPy array.

        Returns:
            np.ndarray: Output data as NumPy array

        Raises:
            InvalidInput: If datatype is not recognized
        """

    def to_dict(self) -> Dict:
        """
        Convert output to dictionary representation.

        Returns:
            dict: Dictionary with name, shape, datatype, data, and parameters
        """

InferRequest

Inference request wrapper containing inputs and metadata.

class InferRequest:
    """
    Inference request.

    Args:
        model_name (str): Name of the model
        infer_inputs (list): List of InferInput objects
        request_id (str, optional): Request ID for tracking
        raw_inputs (list, optional): Binary data for inputs
        from_grpc (bool): Whether request is from gRPC (default: False)
        parameters (dict, optional): Additional request parameters
        request_outputs (list, optional): List of RequestedOutput objects
        model_version (str, optional): Model version
    """
    def __init__(
        self,
        model_name: str,
        infer_inputs: List[InferInput],
        request_id: Optional[str] = None,
        raw_inputs = None,
        from_grpc: Optional[bool] = False,
        parameters: Optional[Union[Dict, MessageMap[str, InferParameter]]] = None,
        request_outputs: Optional[List[RequestedOutput]] = None,
        model_version: Optional[str] = None
    ): ...

    # Properties
    @property
    def model_name(self) -> str:
        """Model name"""

    @property
    def model_version(self) -> str:
        """Model version"""

    @property
    def request_id(self) -> str:
        """Request ID"""

    @property
    def inputs(self) -> List[InferInput]:
        """List of input tensors"""

    @property
    def parameters(self) -> Dict:
        """Request parameters"""

    @property
    def from_grpc(self) -> bool:
        """Whether from gRPC"""

    def as_dataframe(self) -> pd.DataFrame:
        """
        Convert inputs to pandas DataFrame.

        Returns:
            pd.DataFrame: Input data as DataFrame

        Raises:
            InvalidInput: If inputs cannot be converted to DataFrame
        """

    def get_input_by_name(self, name: str) -> Optional[InferInput]:
        """
        Find input by name.

        Args:
            name (str): Input name to search for

        Returns:
            InferInput or None: Input with specified name, or None if not found
        """

    @classmethod
    def from_grpc(cls, request: ModelInferRequest) -> "InferRequest":
        """
        Construct InferRequest from gRPC ModelInferRequest.

        Args:
            request (ModelInferRequest): gRPC request object

        Returns:
            InferRequest: Constructed request object
        """

    @classmethod
    def from_bytes(cls, req_bytes: bytes, json_length: int, model_name: str) -> "InferRequest":
        """
        Construct InferRequest from raw bytes.

        Args:
            req_bytes (bytes): Raw request bytes
            json_length (int): Length of JSON part
            model_name (str): Model name

        Returns:
            InferRequest: Constructed request object
        """

    @classmethod
    def from_inference_request(cls, request: InferenceRequest, model_name: str) -> "InferRequest":
        """
        Construct from InferenceRequest datamodel.

        Args:
            request (InferenceRequest): InferenceRequest object
            model_name (str): Model name

        Returns:
            InferRequest: Constructed request object
        """

    def to_rest(self) -> Tuple[Union[bytes, Dict], Optional[int]]:
        """
        Convert to REST format (JSON or binary).

        Returns:
            tuple: (request_data, json_length) where request_data is bytes or dict,
                   and json_length is the JSON portion length (None if dict)
        """

    def to_grpc(self) -> ModelInferRequest:
        """
        Convert to gRPC ModelInferRequest format.

        Returns:
            ModelInferRequest: gRPC format request
        """

    @property
    def use_binary_outputs(self) -> bool:
        """
        Check if binary outputs should be used.

        Returns:
            bool: True if outputs should be in binary format
        """

InferResponse

Inference response wrapper containing outputs and metadata.

class InferResponse:
    """
    Inference response.

    Args:
        response_id (str): Response ID
        model_name (str): Name of the model
        infer_outputs (list): List of InferOutput objects
        model_version (str, optional): Model version
        raw_outputs (list, optional): Raw binary data for outputs
        from_grpc (bool): Whether response is from gRPC (default: False)
        parameters (dict, optional): Additional response parameters
        use_binary_outputs (bool): Use binary format for outputs in REST (default: False)
        requested_outputs (list, optional): List of RequestedOutput objects
    """
    def __init__(
        self,
        response_id: str,
        model_name: str,
        infer_outputs: List[InferOutput],
        model_version: Optional[str] = None,
        raw_outputs = None,
        from_grpc: Optional[bool] = False,
        parameters: Optional[Union[Dict, MessageMap[str, InferParameter]]] = None,
        use_binary_outputs: Optional[bool] = False,
        requested_outputs: Optional[List[RequestedOutput]] = None
    ): ...

    # Properties
    @property
    def model_name(self) -> str:
        """Model name"""

    @property
    def model_version(self) -> str:
        """Model version"""

    @property
    def response_id(self) -> str:
        """Response ID"""

    @property
    def outputs(self) -> List[InferOutput]:
        """List of output tensors"""

    @property
    def parameters(self) -> Dict:
        """Response parameters"""

    @property
    def from_grpc(self) -> bool:
        """Whether from gRPC"""

    def as_dataframe(self) -> pd.DataFrame:
        """
        Convert outputs to pandas DataFrame.

        Returns:
            pd.DataFrame: Output data as DataFrame

        Raises:
            InvalidInput: If outputs cannot be converted to DataFrame
        """

    def get_output_by_name(self, name: str) -> Optional[InferOutput]:
        """
        Find output by name.

        Args:
            name (str): Output name to search for

        Returns:
            InferOutput or None: Output with specified name, or None if not found
        """

    @classmethod
    def from_grpc(cls, response: ModelInferResponse) -> "InferResponse":
        """
        Construct InferResponse from gRPC ModelInferResponse.

        Args:
            response (ModelInferResponse): gRPC response object

        Returns:
            InferResponse: Constructed response object
        """

    @classmethod
    def from_rest(cls, response: Dict) -> "InferResponse":
        """
        Construct InferResponse from REST dict.

        Args:
            response (dict): REST response dictionary

        Returns:
            InferResponse: Constructed response object
        """

    @classmethod
    def from_bytes(cls, res_bytes: bytes, json_length: int) -> "InferResponse":
        """
        Construct InferResponse from raw bytes.

        Args:
            res_bytes (bytes): Raw response bytes
            json_length (int): Length of JSON part

        Returns:
            InferResponse: Constructed response object
        """

    def to_rest(self) -> Tuple[Union[bytes, Dict], Optional[int]]:
        """
        Convert to REST format (JSON or binary).

        Returns:
            tuple: (response_data, json_length) where response_data is bytes or dict,
                   and json_length is the JSON portion length (None if dict)
        """

    def to_grpc(self) -> ModelInferResponse:
        """
        Convert to gRPC ModelInferResponse format.

        Returns:
            ModelInferResponse: gRPC format response
        """

Data Types

KServe supports the following data types:

Data TypeDescriptionNumPy Equivalent
BOOLBooleannp.bool_
UINT88-bit unsigned integernp.uint8
UINT1616-bit unsigned integernp.uint16
UINT3232-bit unsigned integernp.uint32
UINT6464-bit unsigned integernp.uint64
INT88-bit signed integernp.int8
INT1616-bit signed integernp.int16
INT3232-bit signed integernp.int32
INT6464-bit signed integernp.int64
FP1616-bit floating pointnp.float16
FP3232-bit floating pointnp.float32
FP6464-bit floating pointnp.float64
BYTESVariable-length bytesnp.object_ (bytes)

Usage Examples

Basic Input/Output

from kserve import InferInput, InferOutput

# Create input tensor
input_data = InferInput(
    name="input-0",
    shape=[2, 3],
    datatype="FP32",
    data=[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
)

print(f"Input name: {input_data.name}")
print(f"Input shape: {input_data.shape}")
print(f"Input datatype: {input_data.datatype}")
print(f"Input data: {input_data.data}")

# Create output tensor
output_data = InferOutput(
    name="output-0",
    shape=[2, 1],
    datatype="FP32",
    data=[[0.8], [0.2]]
)

Using NumPy Arrays

from kserve import InferInput, InferOutput
import numpy as np

# Create NumPy array
data = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)

# Create input from NumPy
input_tensor = InferInput(
    name="input-0",
    shape=list(data.shape),
    datatype="FP32"
)
input_tensor.set_data_from_numpy(data)

print(f"Input shape: {input_tensor.shape}")
print(f"Input data: {input_tensor.data}")

# Convert back to NumPy
numpy_data = input_tensor.as_numpy()
print(f"NumPy array shape: {numpy_data.shape}")
print(f"NumPy array: {numpy_data}")

Binary Data Encoding

import numpy as np
from kserve import InferInput

# Large NumPy array
data = np.random.rand(1000, 1000).astype(np.float32)

# Use binary encoding for efficiency
input_tensor = InferInput(
    name="input-0",
    shape=list(data.shape),
    datatype="FP32"
)
input_tensor.set_data_from_numpy(data, binary_data=True)

String/Bytes Input

from kserve import InferInput

# Text data
texts = ["Hello world", "Machine learning", "KServe inference"]

# Create input with string data
input_text = InferInput(
    name="text-input",
    shape=[len(texts)],
    datatype="BYTES",
    data=texts
)

Creating Inference Request

from kserve import InferRequest, InferInput
import numpy as np

# Create inputs
input1 = InferInput(
    name="input-0",
    shape=[1, 4],
    datatype="FP32",
    data=[[5.1, 3.5, 1.4, 0.2]]
)

input2 = InferInput(
    name="input-1",
    shape=[1, 3],
    datatype="INT32",
    data=[[1, 2, 3]]
)

# Create request
request = InferRequest(
    model_name="my-model",
    infer_inputs=[input1, input2],
    request_id="123",
    parameters={"batch_size": 1}
)

print(f"Model: {request.model_name}")
print(f"Request ID: {request.request_id}")
print(f"Number of inputs: {len(request.inputs)}")

Creating Inference Response

from kserve import InferResponse, InferOutput

# Create outputs
output1 = InferOutput(
    name="output-0",
    shape=[1, 3],
    datatype="FP32",
    data=[[0.8, 0.15, 0.05]]
)

output2 = InferOutput(
    name="output-1",
    shape=[1],
    datatype="INT64",
    data=[0]
)

# Create response
response = InferResponse(
    model_name="my-model",
    infer_outputs=[output1, output2],
    response_id="123",
    model_version="1"
)

print(f"Model: {response.model_name}")
print(f"Version: {response.model_version}")
print(f"Response ID: {response.response_id}")
print(f"Number of outputs: {len(response.outputs)}")

Multiple Outputs

from kserve import InferResponse, InferOutput

# Multiple output tensors
outputs = [
    InferOutput(name="predictions", shape=[1, 10], datatype="FP32", data=[[0.1, 0.2, ...]]),
    InferOutput(name="probabilities", shape=[1, 10], datatype="FP32", data=[[0.05, 0.1, ...]]),
    InferOutput(name="classes", shape=[1], datatype="INT64", data=[5])
]

response = InferResponse(
    model_name="multi-output-model",
    infer_outputs=outputs
)

# Access outputs
for output in response.outputs:
    print(f"{output.name}: {output.data}")

Request Parameters

from kserve import InferRequest, InferInput

# Input with parameters
input_data = InferInput(
    name="input-0",
    shape=[1, 224, 224, 3],
    datatype="FP32",
    data=[...],
    parameters={
        "content_type": "image/jpeg",
        "preprocessing": "normalize"
    }
)

# Request with parameters
request = InferRequest(
    model_name="resnet-50",
    infer_inputs=[input_data],
    parameters={
        "batch_size": 1,
        "priority": "high",
        "timeout": 30
    }
)

Converting Between Formats

from kserve import InferRequest, InferInput

# Create request
input_data = InferInput(
    name="input-0",
    shape=[1, 4],
    datatype="FP32",
    data=[[1.0, 2.0, 3.0, 4.0]]
)

request = InferRequest(
    model_name="my-model",
    infer_inputs=[input_data]
)

# Convert to REST format
rest_json = request.to_rest()
print(rest_json)
# Output:
# {
#     "model_name": "my-model",
#     "inputs": [
#         {
#             "name": "input-0",
#             "shape": [1, 4],
#             "datatype": "FP32",
#             "data": [[1.0, 2.0, 3.0, 4.0]]
#         }
#     ]
# }

# Convert to gRPC format
grpc_request = request.to_grpc()

Handling Different Data Types

from kserve import InferInput
import numpy as np

# Float32 input
float_input = InferInput(
    name="float-input",
    shape=[2, 2],
    datatype="FP32"
)
float_input.set_data_from_numpy(np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32))

# Int64 input
int_input = InferInput(
    name="int-input",
    shape=[3],
    datatype="INT64"
)
int_input.set_data_from_numpy(np.array([1, 2, 3], dtype=np.int64))

# Boolean input
bool_input = InferInput(
    name="bool-input",
    shape=[4],
    datatype="BOOL"
)
bool_input.set_data_from_numpy(np.array([True, False, True, False], dtype=np.bool_))

# Bytes input
bytes_input = InferInput(
    name="bytes-input",
    shape=[2],
    datatype="BYTES",
    data=[b"hello", b"world"]
)

Working with Model Predict Method

from kserve import Model, InferRequest, InferResponse, InferInput, InferOutput
import numpy as np

class MyModel(Model):
    def predict(self, payload, headers=None):
        # payload is an InferRequest
        if isinstance(payload, InferRequest):
            # Extract input data
            input_tensor = payload.inputs[0]
            input_data = input_tensor.as_numpy()

            # Run inference
            predictions = self.model.predict(input_data)

            # Create output
            output = InferOutput(
                name="predictions",
                shape=list(predictions.shape),
                datatype="FP32"
            )
            output_tensor = np.array(predictions, dtype=np.float32)
            output.set_data_from_numpy(output_tensor)

            # Return InferResponse
            return InferResponse(
                model_name=self.name,
                infer_outputs=[output]
            )
        else:
            # Handle dict format
            instances = payload["instances"]
            predictions = self.model.predict(instances)
            return {"predictions": predictions.tolist()}

Type Conversion Utilities

import numpy as np
from kserve import InferInput, InferOutput

# Automatic dtype inference
def create_input_from_numpy(name: str, array: np.ndarray) -> InferInput:
    """Create InferInput from NumPy array with automatic type detection"""
    dtype_map = {
        np.float32: "FP32",
        np.float64: "FP64",
        np.int32: "INT32",
        np.int64: "INT64",
        np.uint8: "UINT8",
        np.bool_: "BOOL"
    }

    datatype = dtype_map.get(array.dtype.type, "FP32")

    input_tensor = InferInput(
        name=name,
        shape=list(array.shape),
        datatype=datatype
    )
    input_tensor.set_data_from_numpy(array)

    return input_tensor

# Usage
data = np.array([[1, 2, 3]], dtype=np.float32)
input_tensor = create_input_from_numpy("input-0", data)

Batch Inference

from kserve import InferRequest, InferInput
import numpy as np

# Batch of 8 images (224x224x3)
batch_images = np.random.rand(8, 224, 224, 3).astype(np.float32)

# Create input for batch
input_batch = InferInput(
    name="images",
    shape=list(batch_images.shape),
    datatype="FP32"
)
input_batch.set_data_from_numpy(batch_images)

# Create request
request = InferRequest(
    model_name="image-classifier",
    infer_inputs=[input_batch],
    parameters={"batch_size": 8}
)

Response Parameters

from kserve import InferResponse, InferOutput

# Response with metadata
output = InferOutput(
    name="predictions",
    shape=[1, 10],
    datatype="FP32",
    data=[[0.1, 0.2, 0.3, 0.15, 0.05, 0.01, 0.02, 0.03, 0.04, 0.05]],
    parameters={
        "confidence": 0.95,
        "latency_ms": 15.2
    }
)

response = InferResponse(
    model_name="classifier",
    infer_outputs=[output],
    parameters={
        "model_version": "1.0.0",
        "inference_time_ms": 15.2,
        "preprocessing_time_ms": 2.3
    }
)