Measure accuracy, performance, latency, and reliability.
from agno.eval import AccuracyEval, AccuracyEvaluation, AccuracyResult
class AccuracyEval:
def __init__(
self,
evaluations: List[AccuracyEvaluation],
**kwargs
): ...
def run(self) -> AccuracyResult:
"""Run accuracy evaluation."""
@dataclass
class AccuracyEvaluation:
input: str
expected_output: Any
agent: Agentfrom agno.eval import PerformanceEval, PerformanceResult
class PerformanceEval:
def __init__(
self,
agent: Agent,
num_runs: int = 10,
**kwargs
): ...
def run(self, input: str) -> PerformanceResult:
"""Run performance evaluation."""from agno.eval import ReliabilityEval, ReliabilityResult
class ReliabilityEval:
def __init__(
self,
agent: Agent,
num_runs: int = 10,
**kwargs
): ...
def run(self, input: str) -> ReliabilityResult:
"""Run reliability evaluation."""from agno.eval import AccuracyEval, AccuracyEvaluation
eval = AccuracyEval(
evaluations=[
AccuracyEvaluation(
input="What is 2+2?",
expected_output="4",
agent=agent
),
AccuracyEvaluation(
input="What is the capital of France?",
expected_output="Paris",
agent=agent
)
]
)
result = eval.run()
print(f"Accuracy: {result.accuracy}%")