or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

agent.mdagentos.mdeval.mdexceptions.mdguardrails.mdindex.mdknowledge.mdmedia.mdmemory.mdmodels.mdsessions.mdstorage.mdteam.mdtools.mdworkflow.md
tile.json

eval.mddocs/

Evaluation API

Measure accuracy, performance, latency, and reliability.

Capabilities

Accuracy Evaluation

from agno.eval import AccuracyEval, AccuracyEvaluation, AccuracyResult

class AccuracyEval:
    def __init__(
        self,
        evaluations: List[AccuracyEvaluation],
        **kwargs
    ): ...
    
    def run(self) -> AccuracyResult:
        """Run accuracy evaluation."""

@dataclass
class AccuracyEvaluation:
    input: str
    expected_output: Any
    agent: Agent

Performance Evaluation

from agno.eval import PerformanceEval, PerformanceResult

class PerformanceEval:
    def __init__(
        self,
        agent: Agent,
        num_runs: int = 10,
        **kwargs
    ): ...
    
    def run(self, input: str) -> PerformanceResult:
        """Run performance evaluation."""

Reliability Evaluation

from agno.eval import ReliabilityEval, ReliabilityResult

class ReliabilityEval:
    def __init__(
        self,
        agent: Agent,
        num_runs: int = 10,
        **kwargs
    ): ...
    
    def run(self, input: str) -> ReliabilityResult:
        """Run reliability evaluation."""

Usage Examples

Accuracy Eval

from agno.eval import AccuracyEval, AccuracyEvaluation

eval = AccuracyEval(
    evaluations=[
        AccuracyEvaluation(
            input="What is 2+2?",
            expected_output="4",
            agent=agent
        ),
        AccuracyEvaluation(
            input="What is the capital of France?",
            expected_output="Paris",
            agent=agent
        )
    ]
)

result = eval.run()
print(f"Accuracy: {result.accuracy}%")