tessl install tessl/pypi-kserve@0.16.1KServe is a comprehensive Python SDK that provides standardized interfaces for building and deploying machine learning model serving infrastructure on Kubernetes.
Data types for structured inference requests and responses following the KServe inference protocol v2 specification.
Input tensor descriptor for inference requests.
class InferInput:
"""
Input tensor for inference request.
Args:
name (str): Input name
shape (list): Input shape as list of integers
datatype (str): Data type (e.g., "FP32", "INT64", "BYTES")
data (any, optional): Input data
parameters (dict, optional): Additional parameters
"""
def __init__(
self,
name: str,
shape: List[int],
datatype: str,
data: Any = None,
parameters: Dict = None
): ...
# Properties
@property
def name(self) -> str:
"""Input name"""
@property
def shape(self) -> List[int]:
"""Input shape"""
@property
def datatype(self) -> str:
"""Data type"""
@property
def data(self) -> Any:
"""Input data"""
@property
def parameters(self) -> Dict:
"""Additional parameters"""
def set_data_from_numpy(self, input_tensor: np.ndarray, binary_data: bool = False) -> None:
"""
Set input data from NumPy array.
Args:
input_tensor (np.ndarray): NumPy array with input data
binary_data (bool): Use binary data encoding (default: False)
"""
def as_numpy(self) -> np.ndarray:
"""
Convert input data to NumPy array.
Returns:
np.ndarray: Input data as NumPy array
"""
def as_string(self) -> List[str]:
"""
Convert BYTES data to list of strings.
Returns:
list: List of strings decoded from bytes data
Raises:
InvalidInput: If datatype is not BYTES
"""
def to_dict(self) -> Dict:
"""
Convert input to dictionary representation.
Returns:
dict: Dictionary with name, shape, datatype, data, and parameters
"""Descriptor for outputs requested in an inference request.
class RequestedOutput:
"""
Output requested as part of inference request.
Args:
name (str): Output name
parameters (dict, optional): Additional parameters
"""
def __init__(self, name: str, parameters: Optional[Dict] = None): ...
# Properties
@property
def name(self) -> str:
"""Output name"""
@property
def parameters(self) -> Optional[Dict]:
"""Additional parameters"""
@property
def binary_data(self) -> Optional[bool]:
"""
Check if binary data format is requested.
Returns:
bool or None: True if binary format requested, False otherwise, None if not set
"""
def set_data_format(self, binary_data: bool) -> None:
"""
Set the data format for this output.
Args:
binary_data (bool): True for binary format, False for JSON format
"""
@property
def classification(self) -> Optional[int]:
"""
Get classification parameter.
Returns:
int or None: Classification count if set
"""Output tensor descriptor for inference responses.
class InferOutput:
"""
Output tensor from inference response.
Args:
name (str): Output name
shape (list): Output shape as list of integers
datatype (str): Data type (e.g., "FP32", "INT64", "BYTES")
data (any, optional): Output data
parameters (dict, optional): Additional parameters
"""
def __init__(
self,
name: str,
shape: List[int],
datatype: str,
data: Union[List, np.ndarray, InferTensorContents] = None,
parameters: Optional[Union[Dict, MessageMap[str, InferParameter]]] = None
): ...
# Properties
@property
def name(self) -> str:
"""Output name"""
@property
def shape(self) -> List[int]:
"""Output shape"""
@property
def datatype(self) -> str:
"""Data type"""
@property
def data(self) -> Any:
"""Output data"""
@property
def parameters(self) -> Dict:
"""Additional parameters"""
def set_data_from_numpy(self, output_tensor: np.ndarray, binary_data: bool = True) -> None:
"""
Set output data from NumPy array.
Args:
output_tensor (np.ndarray): NumPy array with output data
binary_data (bool): Use binary data encoding (default: True)
Raises:
InferenceError: If tensor shape/dtype doesn't match or conversion fails
"""
def as_numpy(self) -> np.ndarray:
"""
Convert output data to NumPy array.
Returns:
np.ndarray: Output data as NumPy array
Raises:
InvalidInput: If datatype is not recognized
"""
def to_dict(self) -> Dict:
"""
Convert output to dictionary representation.
Returns:
dict: Dictionary with name, shape, datatype, data, and parameters
"""Inference request wrapper containing inputs and metadata.
class InferRequest:
"""
Inference request.
Args:
model_name (str): Name of the model
infer_inputs (list): List of InferInput objects
request_id (str, optional): Request ID for tracking
raw_inputs (list, optional): Binary data for inputs
from_grpc (bool): Whether request is from gRPC (default: False)
parameters (dict, optional): Additional request parameters
request_outputs (list, optional): List of RequestedOutput objects
model_version (str, optional): Model version
"""
def __init__(
self,
model_name: str,
infer_inputs: List[InferInput],
request_id: Optional[str] = None,
raw_inputs = None,
from_grpc: Optional[bool] = False,
parameters: Optional[Union[Dict, MessageMap[str, InferParameter]]] = None,
request_outputs: Optional[List[RequestedOutput]] = None,
model_version: Optional[str] = None
): ...
# Properties
@property
def model_name(self) -> str:
"""Model name"""
@property
def model_version(self) -> str:
"""Model version"""
@property
def request_id(self) -> str:
"""Request ID"""
@property
def inputs(self) -> List[InferInput]:
"""List of input tensors"""
@property
def parameters(self) -> Dict:
"""Request parameters"""
@property
def from_grpc(self) -> bool:
"""Whether from gRPC"""
def as_dataframe(self) -> pd.DataFrame:
"""
Convert inputs to pandas DataFrame.
Returns:
pd.DataFrame: Input data as DataFrame
Raises:
InvalidInput: If inputs cannot be converted to DataFrame
"""
def get_input_by_name(self, name: str) -> Optional[InferInput]:
"""
Find input by name.
Args:
name (str): Input name to search for
Returns:
InferInput or None: Input with specified name, or None if not found
"""
@classmethod
def from_grpc(cls, request: ModelInferRequest) -> "InferRequest":
"""
Construct InferRequest from gRPC ModelInferRequest.
Args:
request (ModelInferRequest): gRPC request object
Returns:
InferRequest: Constructed request object
"""
@classmethod
def from_bytes(cls, req_bytes: bytes, json_length: int, model_name: str) -> "InferRequest":
"""
Construct InferRequest from raw bytes.
Args:
req_bytes (bytes): Raw request bytes
json_length (int): Length of JSON part
model_name (str): Model name
Returns:
InferRequest: Constructed request object
"""
@classmethod
def from_inference_request(cls, request: InferenceRequest, model_name: str) -> "InferRequest":
"""
Construct from InferenceRequest datamodel.
Args:
request (InferenceRequest): InferenceRequest object
model_name (str): Model name
Returns:
InferRequest: Constructed request object
"""
def to_rest(self) -> Tuple[Union[bytes, Dict], Optional[int]]:
"""
Convert to REST format (JSON or binary).
Returns:
tuple: (request_data, json_length) where request_data is bytes or dict,
and json_length is the JSON portion length (None if dict)
"""
def to_grpc(self) -> ModelInferRequest:
"""
Convert to gRPC ModelInferRequest format.
Returns:
ModelInferRequest: gRPC format request
"""
@property
def use_binary_outputs(self) -> bool:
"""
Check if binary outputs should be used.
Returns:
bool: True if outputs should be in binary format
"""Inference response wrapper containing outputs and metadata.
class InferResponse:
"""
Inference response.
Args:
response_id (str): Response ID
model_name (str): Name of the model
infer_outputs (list): List of InferOutput objects
model_version (str, optional): Model version
raw_outputs (list, optional): Raw binary data for outputs
from_grpc (bool): Whether response is from gRPC (default: False)
parameters (dict, optional): Additional response parameters
use_binary_outputs (bool): Use binary format for outputs in REST (default: False)
requested_outputs (list, optional): List of RequestedOutput objects
"""
def __init__(
self,
response_id: str,
model_name: str,
infer_outputs: List[InferOutput],
model_version: Optional[str] = None,
raw_outputs = None,
from_grpc: Optional[bool] = False,
parameters: Optional[Union[Dict, MessageMap[str, InferParameter]]] = None,
use_binary_outputs: Optional[bool] = False,
requested_outputs: Optional[List[RequestedOutput]] = None
): ...
# Properties
@property
def model_name(self) -> str:
"""Model name"""
@property
def model_version(self) -> str:
"""Model version"""
@property
def response_id(self) -> str:
"""Response ID"""
@property
def outputs(self) -> List[InferOutput]:
"""List of output tensors"""
@property
def parameters(self) -> Dict:
"""Response parameters"""
@property
def from_grpc(self) -> bool:
"""Whether from gRPC"""
def as_dataframe(self) -> pd.DataFrame:
"""
Convert outputs to pandas DataFrame.
Returns:
pd.DataFrame: Output data as DataFrame
Raises:
InvalidInput: If outputs cannot be converted to DataFrame
"""
def get_output_by_name(self, name: str) -> Optional[InferOutput]:
"""
Find output by name.
Args:
name (str): Output name to search for
Returns:
InferOutput or None: Output with specified name, or None if not found
"""
@classmethod
def from_grpc(cls, response: ModelInferResponse) -> "InferResponse":
"""
Construct InferResponse from gRPC ModelInferResponse.
Args:
response (ModelInferResponse): gRPC response object
Returns:
InferResponse: Constructed response object
"""
@classmethod
def from_rest(cls, response: Dict) -> "InferResponse":
"""
Construct InferResponse from REST dict.
Args:
response (dict): REST response dictionary
Returns:
InferResponse: Constructed response object
"""
@classmethod
def from_bytes(cls, res_bytes: bytes, json_length: int) -> "InferResponse":
"""
Construct InferResponse from raw bytes.
Args:
res_bytes (bytes): Raw response bytes
json_length (int): Length of JSON part
Returns:
InferResponse: Constructed response object
"""
def to_rest(self) -> Tuple[Union[bytes, Dict], Optional[int]]:
"""
Convert to REST format (JSON or binary).
Returns:
tuple: (response_data, json_length) where response_data is bytes or dict,
and json_length is the JSON portion length (None if dict)
"""
def to_grpc(self) -> ModelInferResponse:
"""
Convert to gRPC ModelInferResponse format.
Returns:
ModelInferResponse: gRPC format response
"""KServe supports the following data types:
| Data Type | Description | NumPy Equivalent |
|---|---|---|
BOOL | Boolean | np.bool_ |
UINT8 | 8-bit unsigned integer | np.uint8 |
UINT16 | 16-bit unsigned integer | np.uint16 |
UINT32 | 32-bit unsigned integer | np.uint32 |
UINT64 | 64-bit unsigned integer | np.uint64 |
INT8 | 8-bit signed integer | np.int8 |
INT16 | 16-bit signed integer | np.int16 |
INT32 | 32-bit signed integer | np.int32 |
INT64 | 64-bit signed integer | np.int64 |
FP16 | 16-bit floating point | np.float16 |
FP32 | 32-bit floating point | np.float32 |
FP64 | 64-bit floating point | np.float64 |
BYTES | Variable-length bytes | np.object_ (bytes) |
from kserve import InferInput, InferOutput
# Create input tensor
input_data = InferInput(
name="input-0",
shape=[2, 3],
datatype="FP32",
data=[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
)
print(f"Input name: {input_data.name}")
print(f"Input shape: {input_data.shape}")
print(f"Input datatype: {input_data.datatype}")
print(f"Input data: {input_data.data}")
# Create output tensor
output_data = InferOutput(
name="output-0",
shape=[2, 1],
datatype="FP32",
data=[[0.8], [0.2]]
)from kserve import InferInput, InferOutput
import numpy as np
# Create NumPy array
data = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
# Create input from NumPy
input_tensor = InferInput(
name="input-0",
shape=list(data.shape),
datatype="FP32"
)
input_tensor.set_data_from_numpy(data)
print(f"Input shape: {input_tensor.shape}")
print(f"Input data: {input_tensor.data}")
# Convert back to NumPy
numpy_data = input_tensor.as_numpy()
print(f"NumPy array shape: {numpy_data.shape}")
print(f"NumPy array: {numpy_data}")import numpy as np
from kserve import InferInput
# Large NumPy array
data = np.random.rand(1000, 1000).astype(np.float32)
# Use binary encoding for efficiency
input_tensor = InferInput(
name="input-0",
shape=list(data.shape),
datatype="FP32"
)
input_tensor.set_data_from_numpy(data, binary_data=True)from kserve import InferInput
# Text data
texts = ["Hello world", "Machine learning", "KServe inference"]
# Create input with string data
input_text = InferInput(
name="text-input",
shape=[len(texts)],
datatype="BYTES",
data=texts
)from kserve import InferRequest, InferInput
import numpy as np
# Create inputs
input1 = InferInput(
name="input-0",
shape=[1, 4],
datatype="FP32",
data=[[5.1, 3.5, 1.4, 0.2]]
)
input2 = InferInput(
name="input-1",
shape=[1, 3],
datatype="INT32",
data=[[1, 2, 3]]
)
# Create request
request = InferRequest(
model_name="my-model",
infer_inputs=[input1, input2],
request_id="123",
parameters={"batch_size": 1}
)
print(f"Model: {request.model_name}")
print(f"Request ID: {request.request_id}")
print(f"Number of inputs: {len(request.inputs)}")from kserve import InferResponse, InferOutput
# Create outputs
output1 = InferOutput(
name="output-0",
shape=[1, 3],
datatype="FP32",
data=[[0.8, 0.15, 0.05]]
)
output2 = InferOutput(
name="output-1",
shape=[1],
datatype="INT64",
data=[0]
)
# Create response
response = InferResponse(
model_name="my-model",
infer_outputs=[output1, output2],
response_id="123",
model_version="1"
)
print(f"Model: {response.model_name}")
print(f"Version: {response.model_version}")
print(f"Response ID: {response.response_id}")
print(f"Number of outputs: {len(response.outputs)}")from kserve import InferResponse, InferOutput
# Multiple output tensors
outputs = [
InferOutput(name="predictions", shape=[1, 10], datatype="FP32", data=[[0.1, 0.2, ...]]),
InferOutput(name="probabilities", shape=[1, 10], datatype="FP32", data=[[0.05, 0.1, ...]]),
InferOutput(name="classes", shape=[1], datatype="INT64", data=[5])
]
response = InferResponse(
model_name="multi-output-model",
infer_outputs=outputs
)
# Access outputs
for output in response.outputs:
print(f"{output.name}: {output.data}")from kserve import InferRequest, InferInput
# Input with parameters
input_data = InferInput(
name="input-0",
shape=[1, 224, 224, 3],
datatype="FP32",
data=[...],
parameters={
"content_type": "image/jpeg",
"preprocessing": "normalize"
}
)
# Request with parameters
request = InferRequest(
model_name="resnet-50",
infer_inputs=[input_data],
parameters={
"batch_size": 1,
"priority": "high",
"timeout": 30
}
)from kserve import InferRequest, InferInput
# Create request
input_data = InferInput(
name="input-0",
shape=[1, 4],
datatype="FP32",
data=[[1.0, 2.0, 3.0, 4.0]]
)
request = InferRequest(
model_name="my-model",
infer_inputs=[input_data]
)
# Convert to REST format
rest_json = request.to_rest()
print(rest_json)
# Output:
# {
# "model_name": "my-model",
# "inputs": [
# {
# "name": "input-0",
# "shape": [1, 4],
# "datatype": "FP32",
# "data": [[1.0, 2.0, 3.0, 4.0]]
# }
# ]
# }
# Convert to gRPC format
grpc_request = request.to_grpc()from kserve import InferInput
import numpy as np
# Float32 input
float_input = InferInput(
name="float-input",
shape=[2, 2],
datatype="FP32"
)
float_input.set_data_from_numpy(np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32))
# Int64 input
int_input = InferInput(
name="int-input",
shape=[3],
datatype="INT64"
)
int_input.set_data_from_numpy(np.array([1, 2, 3], dtype=np.int64))
# Boolean input
bool_input = InferInput(
name="bool-input",
shape=[4],
datatype="BOOL"
)
bool_input.set_data_from_numpy(np.array([True, False, True, False], dtype=np.bool_))
# Bytes input
bytes_input = InferInput(
name="bytes-input",
shape=[2],
datatype="BYTES",
data=[b"hello", b"world"]
)from kserve import Model, InferRequest, InferResponse, InferInput, InferOutput
import numpy as np
class MyModel(Model):
def predict(self, payload, headers=None):
# payload is an InferRequest
if isinstance(payload, InferRequest):
# Extract input data
input_tensor = payload.inputs[0]
input_data = input_tensor.as_numpy()
# Run inference
predictions = self.model.predict(input_data)
# Create output
output = InferOutput(
name="predictions",
shape=list(predictions.shape),
datatype="FP32"
)
output_tensor = np.array(predictions, dtype=np.float32)
output.set_data_from_numpy(output_tensor)
# Return InferResponse
return InferResponse(
model_name=self.name,
infer_outputs=[output]
)
else:
# Handle dict format
instances = payload["instances"]
predictions = self.model.predict(instances)
return {"predictions": predictions.tolist()}import numpy as np
from kserve import InferInput, InferOutput
# Automatic dtype inference
def create_input_from_numpy(name: str, array: np.ndarray) -> InferInput:
"""Create InferInput from NumPy array with automatic type detection"""
dtype_map = {
np.float32: "FP32",
np.float64: "FP64",
np.int32: "INT32",
np.int64: "INT64",
np.uint8: "UINT8",
np.bool_: "BOOL"
}
datatype = dtype_map.get(array.dtype.type, "FP32")
input_tensor = InferInput(
name=name,
shape=list(array.shape),
datatype=datatype
)
input_tensor.set_data_from_numpy(array)
return input_tensor
# Usage
data = np.array([[1, 2, 3]], dtype=np.float32)
input_tensor = create_input_from_numpy("input-0", data)from kserve import InferRequest, InferInput
import numpy as np
# Batch of 8 images (224x224x3)
batch_images = np.random.rand(8, 224, 224, 3).astype(np.float32)
# Create input for batch
input_batch = InferInput(
name="images",
shape=list(batch_images.shape),
datatype="FP32"
)
input_batch.set_data_from_numpy(batch_images)
# Create request
request = InferRequest(
model_name="image-classifier",
infer_inputs=[input_batch],
parameters={"batch_size": 8}
)from kserve import InferResponse, InferOutput
# Response with metadata
output = InferOutput(
name="predictions",
shape=[1, 10],
datatype="FP32",
data=[[0.1, 0.2, 0.3, 0.15, 0.05, 0.01, 0.02, 0.03, 0.04, 0.05]],
parameters={
"confidence": 0.95,
"latency_ms": 15.2
}
)
response = InferResponse(
model_name="classifier",
infer_outputs=[output],
parameters={
"model_version": "1.0.0",
"inference_time_ms": 15.2,
"preprocessing_time_ms": 2.3
}
)