Tessl Tile for pypi/deepeval@3.7.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

agentic-metrics.md benchmarks.md content-quality-metrics.md conversational-metrics.md core-evaluation.md custom-metrics.md dataset.md index.md integrations.md models.md multimodal-metrics.md rag-metrics.md synthesizer.md test-cases.md tracing.md

tracing.mddocs/

0
# Tracing
1

2
Component-level observability for evaluating nested LLM components using the `@observe` decorator and trace management. Enable tracing to evaluate individual components within your LLM application.
3

4
## Imports
5

6
```python
7
from deepeval.tracing import (
8
    observe,
9
    trace,
10
    trace_manager,
11
    update_current_span,
12
    update_current_trace,
13
    update_retriever_span,
14
    update_llm_span,
15
    evaluate_trace,
16
    evaluate_span,
17
    evaluate_thread
18
)
19
```
20

21
## Capabilities
22

23
### Observe Decorator
24

25
Decorator for observing function execution and applying metrics to components.
26

27
```python { .api }
28
def observe(
29
    metrics: Optional[List[BaseMetric]] = None,
30
    name: Optional[str] = None,
31
    type: Optional[str] = None
32
):
33
    """
34
    Decorator for observing function execution.
35

36
    Parameters:
37
    - metrics (List[BaseMetric], optional): Metrics to apply to this component
38
    - name (str, optional): Name for the span
39
    - type (str, optional): Type of component (e.g., "llm", "retriever", "tool")
40

41
    Usage:
42
    - Decorate any function to create a traced span
43
    - Use update_current_span() within function to add test case data
44
    - Metrics are evaluated automatically on the component
45
    """
46
```
47

48
Usage example:
49

50
```python
51
from deepeval.tracing import observe, update_current_span
52
from deepeval.metrics import AnswerRelevancyMetric, FaithfulnessMetric
53
from deepeval.test_case import LLMTestCase
54

55
# Define metrics for components
56
answer_relevancy = AnswerRelevancyMetric(threshold=0.7)
57
faithfulness = FaithfulnessMetric(threshold=0.8)
58

59
@observe(metrics=[answer_relevancy, faithfulness])
60
def llm_component(query: str, context: list):
61
    """LLM component that generates answer from context."""
62
    # Your LLM call
63
    answer = call_llm(query, context)
64

65
    # Update span with test case data
66
    update_current_span(
67
        test_case=LLMTestCase(
68
            input=query,
69
            actual_output=answer,
70
            retrieval_context=context
71
        )
72
    )
73

74
    return answer
75

76
@observe(name="retrieval", type="retriever")
77
def retriever_component(query: str):
78
    """Retrieval component."""
79
    results = vector_search(query)
80

81
    update_retriever_span(
82
        embedder="text-embedding-ada-002",
83
        top_k=10,
84
        chunk_size=512
85
    )
86

87
    return results
88

89
@observe(name="rag_pipeline")
90
def rag_pipeline(user_query: str):
91
    """Full RAG pipeline with traced components."""
92
    # Each component is traced
93
    context = retriever_component(user_query)
94
    answer = llm_component(user_query, context)
95

96
    return answer
97

98
# Execute and automatically evaluate components
99
result = rag_pipeline("What is quantum computing?")
100
```
101

102
### Update Span Functions
103

104
Functions to update span data during execution.
105

106
```python { .api }
107
def update_current_span(
108
    test_case: Optional[LLMTestCase] = None,
109
    **kwargs
110
):
111
    """
112
    Updates the current span with additional data.
113

114
    Parameters:
115
    - test_case (LLMTestCase, optional): Test case data for the span
116
    - **kwargs: Additional span attributes (metadata, tags, etc.)
117
    """
118

119
def update_current_trace(
120
    **kwargs
121
):
122
    """
123
    Updates the current trace with additional data.
124

125
    Parameters:
126
    - **kwargs: Trace-level attributes
127
    """
128

129
def update_retriever_span(
130
    embedder: Optional[str] = None,
131
    top_k: Optional[int] = None,
132
    chunk_size: Optional[int] = None
133
):
134
    """
135
    Updates retriever-specific span data.
136

137
    Parameters:
138
    - embedder (str, optional): Name of the embedding model used
139
    - top_k (int, optional): Number of top results retrieved
140
    - chunk_size (int, optional): Size of chunks used in retrieval
141
    """
142

143
def update_llm_span(
144
    model: Optional[str] = None,
145
    input_token_count: Optional[float] = None,
146
    output_token_count: Optional[float] = None,
147
    cost_per_input_token: Optional[float] = None,
148
    cost_per_output_token: Optional[float] = None,
149
    token_intervals: Optional[Dict[float, str]] = None,
150
    prompt: Optional[Prompt] = None
151
):
152
    """
153
    Updates LLM-specific span data.
154

155
    Parameters:
156
    - model (str, optional): Model name
157
    - input_token_count (float, optional): Number of input tokens
158
    - output_token_count (float, optional): Number of output tokens
159
    - cost_per_input_token (float, optional): Cost per input token
160
    - cost_per_output_token (float, optional): Cost per output token
161
    - token_intervals (Dict[float, str], optional): Token timing intervals
162
    - prompt (Prompt, optional): Prompt object used
163
    """
164
```
165

166
### Trace Context Manager
167

168
Context manager for creating trace scopes.
169

170
```python { .api }
171
def trace(name: Optional[str] = None):
172
    """
173
    Context manager for tracing execution.
174

175
    Parameters:
176
    - name (str, optional): Name for the trace
177
    """
178
```
179

180
Usage:
181

182
```python
183
from deepeval.tracing import trace, observe
184

185
@observe
186
def process_document(doc):
187
    # Processing logic
188
    return result
189

190
def main():
191
    with trace(name="document_processing"):
192
        for doc in documents:
193
            process_document(doc)
194
```
195

196
### Offline Evaluation
197

198
Evaluate traces after execution.
199

200
```python { .api }
201
def evaluate_trace(
202
    trace_uuid: str,
203
    metric_collection: str
204
):
205
    """
206
    Evaluates a specific trace using a Confident AI metric collection.
207

208
    Parameters:
209
    - trace_uuid (str): UUID of the trace to evaluate
210
    - metric_collection (str): Name of the metric collection on Confident AI
211
    """
212

213
def evaluate_span(
214
    span_uuid: str,
215
    metric_collection: str
216
):
217
    """
218
    Evaluates a specific span using a Confident AI metric collection.
219

220
    Parameters:
221
    - span_uuid (str): UUID of the span to evaluate
222
    - metric_collection (str): Name of the metric collection on Confident AI
223
    """
224

225
def evaluate_thread(
226
    thread_id: str,
227
    metric_collection: str,
228
    overwrite_metrics: bool = False
229
):
230
    """
231
    Evaluates a traced thread using a Confident AI metric collection.
232

233
    Parameters:
234
    - thread_id (str): ID of the thread to evaluate
235
    - metric_collection (str): Name of the metric collection on Confident AI
236
    - overwrite_metrics (bool): Whether to overwrite existing metrics (default: False)
237
    """
238
```
239

240
## Usage Examples
241

242
### Component-Level Evaluation
243

244
```python
245
from deepeval import evaluate
246
from deepeval.tracing import observe, update_current_span
247
from deepeval.metrics import FaithfulnessMetric, AnswerRelevancyMetric
248
from deepeval.test_case import LLMTestCase
249
from deepeval.dataset import Golden
250

251
# Define component metrics
252
faithfulness = FaithfulnessMetric(threshold=0.8)
253
relevancy = AnswerRelevancyMetric(threshold=0.7)
254

255
@observe(metrics=[faithfulness, relevancy])
256
def answer_generator(question: str, context: list):
257
    """Generate answer from context."""
258
    answer = llm_generate(question, context)
259

260
    # Provide test case data for evaluation
261
    update_current_span(
262
        test_case=LLMTestCase(
263
            input=question,
264
            actual_output=answer,
265
            retrieval_context=context
266
        )
267
    )
268

269
    return answer
270

271
@observe(name="rag_app")
272
def rag_application(question: str):
273
    """Main RAG application."""
274
    context = retrieve_context(question)
275
    answer = answer_generator(question, context)
276
    return answer
277

278
# Evaluate using observed callback
279
goldens = [
280
    Golden(input="What is Python?"),
281
    Golden(input="What is JavaScript?")
282
]
283

284
result = evaluate(
285
    observed_callback=rag_application,
286
    goldens=goldens
287
)
288
```
289

290
### Multi-Component Pipeline
291

292
```python
293
from deepeval.tracing import observe, update_current_span
294
from deepeval.metrics import ToolCorrectnessMetric
295
from deepeval.test_case import LLMTestCase, ToolCall
296

297
tool_metric = ToolCorrectnessMetric(threshold=0.8)
298

299
@observe(name="tool_selector")
300
def select_tools(query: str):
301
    """Select appropriate tools."""
302
    tools = analyze_and_select_tools(query)
303
    return tools
304

305
@observe(metrics=[tool_metric])
306
def tool_executor(query: str, tools: list):
307
    """Execute tools."""
308
    results = []
309
    tool_calls = []
310

311
    for tool in tools:
312
        result = execute_tool(tool, query)
313
        results.append(result)
314
        tool_calls.append(ToolCall(
315
            name=tool.name,
316
            input_parameters=tool.params,
317
            output=result
318
        ))
319

320
    update_current_span(
321
        test_case=LLMTestCase(
322
            input=query,
323
            actual_output=str(results),
324
            tools_called=tool_calls
325
        )
326
    )
327

328
    return results
329

330
@observe(name="agent")
331
def agent_pipeline(query: str):
332
    """Full agent pipeline."""
333
    tools = select_tools(query)
334
    results = tool_executor(query, tools)
335
    final_answer = synthesize_answer(results)
336
    return final_answer
337

338
# Execute with tracing
339
answer = agent_pipeline("Book a flight to NYC")
340
```
341

342
### Accessing Current Golden
343

344
```python
345
from deepeval.tracing import observe, update_current_span
346
from deepeval.dataset import Golden, get_current_golden
347
from deepeval.test_case import LLMTestCase
348

349
@observe
350
def my_component(input_text: str):
351
    """Component that accesses current golden."""
352
    # Get current golden from context
353
    golden = get_current_golden()
354

355
    # Process input
356
    output = process(input_text)
357

358
    # Use golden data in test case
359
    update_current_span(
360
        test_case=LLMTestCase(
361
            input=input_text,
362
            actual_output=output,
363
            expected_output=golden.expected_output if golden else None,
364
            retrieval_context=golden.retrieval_context if golden else None
365
        )
366
    )
367

368
    return output
369

370
# Evaluate with goldens
371
from deepeval import evaluate
372

373
goldens = [Golden(input="test", expected_output="result")]
374
result = evaluate(observed_callback=my_component, goldens=goldens)
375
```
376

377
### Trace Management
378

379
```python
380
from deepeval.tracing import trace_manager
381

382
# Get all traces
383
traces = trace_manager.get_traces()
384

385
# Get specific trace
386
trace = trace_manager.get_trace(trace_id="abc123")
387

388
# Get spans for a trace
389
spans = trace_manager.get_spans(trace_id="abc123")
390

391
# Clear traces
392
trace_manager.clear()
393
```
394

395
### Integration with Confident AI
396

397
Traces are automatically synced to Confident AI when logged in:
398

399
```bash
400
deepeval login
401
```
402

403
```python
404
from deepeval.tracing import observe
405

406
@observe
407
def my_function(input):
408
    # This trace will be synced to Confident AI
409
    return process(input)
410

411
my_function("test")
412
# View traces at app.confident-ai.com
413
```
414

Version

Tile

Files

tracing.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

tracing.mddocs/