or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agentic-metrics.mdbenchmarks.mdcontent-quality-metrics.mdconversational-metrics.mdcore-evaluation.mdcustom-metrics.mddataset.mdindex.mdintegrations.mdmodels.mdmultimodal-metrics.mdrag-metrics.mdsynthesizer.mdtest-cases.mdtracing.md

integrations.mddocs/

0

# Integrations

1

2

Native integrations with popular LLM frameworks for automatic tracing and evaluation. DeepEval integrates with LangChain, LlamaIndex, CrewAI, and PydanticAI to provide seamless evaluation capabilities.

3

4

## Imports

5

6

```python

7

# LangChain

8

from deepeval.integrations.langchain import CallbackHandler, tool

9

10

# LlamaIndex

11

from deepeval.integrations.llama_index import instrument_llama_index

12

13

# CrewAI

14

from deepeval.integrations.crewai import (

15

instrument_crewai,

16

Crew,

17

Agent,

18

LLM,

19

tool

20

)

21

22

# PydanticAI

23

from deepeval.integrations.pydantic_ai import (

24

instrument_pydantic_ai,

25

ConfidentInstrumentationSettings,

26

Agent

27

)

28

```

29

30

## Capabilities

31

32

### LangChain Integration

33

34

Integrate DeepEval with LangChain applications using callback handlers.

35

36

```python { .api }

37

class CallbackHandler:

38

"""

39

LangChain callback handler for DeepEval tracing.

40

41

Usage:

42

- Add to LangChain chain/agent callbacks

43

- Automatically traces LangChain executions

44

- Syncs with Confident AI

45

"""

46

47

def tool(func):

48

"""

49

Decorator for marking LangChain tools for tracing.

50

51

Parameters:

52

- func: Tool function to decorate

53

54

Returns:

55

- Decorated tool with tracing

56

"""

57

```

58

59

Usage example:

60

61

```python

62

from langchain.chains import RetrievalQA

63

from langchain_openai import ChatOpenAI

64

from deepeval.integrations.langchain import CallbackHandler

65

66

# Create callback handler

67

callback = CallbackHandler()

68

69

# Use with LangChain

70

llm = ChatOpenAI(temperature=0)

71

qa_chain = RetrievalQA.from_chain_type(

72

llm=llm,

73

retriever=retriever,

74

callbacks=[callback]

75

)

76

77

# Executions are automatically traced

78

result = qa_chain.run("What is quantum computing?")

79

```

80

81

LangChain tool tracing:

82

83

```python

84

from langchain.tools import tool as langchain_tool

85

from deepeval.integrations.langchain import tool as deepeval_tool

86

87

@deepeval_tool

88

@langchain_tool

89

def search_database(query: str) -> str:

90

"""Search the product database."""

91

results = db.search(query)

92

return results

93

94

# Tool calls are automatically traced

95

```

96

97

### LlamaIndex Integration

98

99

Instrument LlamaIndex applications for automatic tracing.

100

101

```python { .api }

102

def instrument_llama_index():

103

"""

104

Instruments LlamaIndex for automatic tracing.

105

106

Usage:

107

- Call once at the start of your application

108

- All LlamaIndex operations are automatically traced

109

- Traces include retrieval, LLM calls, and synthesis

110

"""

111

```

112

113

Usage example:

114

115

```python

116

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

117

from deepeval.integrations.llama_index import instrument_llama_index

118

119

# Instrument LlamaIndex

120

instrument_llama_index()

121

122

# All operations are now traced

123

documents = SimpleDirectoryReader('./data').load_data()

124

index = VectorStoreIndex.from_documents(documents)

125

query_engine = index.as_query_engine()

126

127

# Queries are automatically traced

128

response = query_engine.query("What is the main topic?")

129

```

130

131

Evaluate LlamaIndex with metrics:

132

133

```python

134

from deepeval.integrations.llama_index import instrument_llama_index

135

from deepeval.tracing import observe

136

from deepeval.metrics import AnswerRelevancyMetric

137

from deepeval.test_case import LLMTestCase

138

from deepeval import evaluate

139

140

instrument_llama_index()

141

142

answer_relevancy = AnswerRelevancyMetric(threshold=0.7)

143

144

@observe(metrics=[answer_relevancy])

145

def query_llama_index(question: str):

146

"""Query LlamaIndex with tracing."""

147

response = query_engine.query(question)

148

149

from deepeval.tracing import update_current_span

150

update_current_span(

151

test_case=LLMTestCase(

152

input=question,

153

actual_output=str(response)

154

)

155

)

156

157

return response

158

159

# Evaluate

160

from deepeval.dataset import Golden

161

goldens = [Golden(input="What is AI?")]

162

result = evaluate(observed_callback=query_llama_index, goldens=goldens)

163

```

164

165

### CrewAI Integration

166

167

Instrument CrewAI agents and crews for tracing.

168

169

```python { .api }

170

def instrument_crewai():

171

"""

172

Instruments CrewAI for automatic tracing.

173

174

Usage:

175

- Call once at the start of your application

176

- All CrewAI agent operations are traced

177

- Includes task execution, tool usage, and collaboration

178

"""

179

180

class Crew:

181

"""

182

DeepEval-wrapped Crew class.

183

184

Usage:

185

- Use instead of crewai.Crew for automatic tracing

186

- Same API as CrewAI Crew

187

"""

188

189

class Agent:

190

"""

191

DeepEval-wrapped Agent class.

192

193

Usage:

194

- Use instead of crewai.Agent for automatic tracing

195

- Same API as CrewAI Agent

196

"""

197

198

class LLM:

199

"""

200

DeepEval-wrapped LLM class for CrewAI.

201

"""

202

203

def tool(func):

204

"""

205

Decorator for marking CrewAI tools.

206

"""

207

```

208

209

Usage example:

210

211

```python

212

from deepeval.integrations.crewai import instrument_crewai, Crew, Agent, tool

213

214

# Instrument CrewAI

215

instrument_crewai()

216

217

# Define tools with tracing

218

@tool

219

def search_web(query: str) -> str:

220

"""Search the web for information."""

221

return perform_search(query)

222

223

# Create agents (automatically traced)

224

from crewai import Task

225

226

researcher = Agent(

227

role="Researcher",

228

goal="Research information",

229

backstory="Expert researcher",

230

tools=[search_web]

231

)

232

233

writer = Agent(

234

role="Writer",

235

goal="Write content",

236

backstory="Expert writer"

237

)

238

239

# Create tasks

240

research_task = Task(

241

description="Research quantum computing",

242

agent=researcher

243

)

244

245

write_task = Task(

246

description="Write an article about quantum computing",

247

agent=writer

248

)

249

250

# Create and run crew (automatically traced)

251

crew = Crew(

252

agents=[researcher, writer],

253

tasks=[research_task, write_task]

254

)

255

256

result = crew.kickoff()

257

```

258

259

Alternative using wrapped classes:

260

261

```python

262

from deepeval.integrations.crewai import Crew, Agent, LLM, tool

263

264

@tool

265

def calculator(expression: str) -> float:

266

"""Calculate mathematical expressions."""

267

return eval(expression)

268

269

# Use wrapped classes

270

agent = Agent(

271

role="Math Expert",

272

goal="Solve math problems",

273

tools=[calculator],

274

llm=LLM(model="gpt-4")

275

)

276

277

crew = Crew(agents=[agent], tasks=[...])

278

result = crew.kickoff()

279

# All operations are traced

280

```

281

282

### PydanticAI Integration

283

284

Instrument PydanticAI agents for tracing.

285

286

```python { .api }

287

def instrument_pydantic_ai(

288

settings: Optional[ConfidentInstrumentationSettings] = None

289

):

290

"""

291

Instruments PydanticAI for automatic tracing.

292

293

Parameters:

294

- settings (ConfidentInstrumentationSettings, optional): Instrumentation configuration

295

"""

296

297

class ConfidentInstrumentationSettings:

298

"""

299

Configuration for PydanticAI instrumentation.

300

301

Parameters:

302

- trace_runs (bool): Trace agent runs (default: True)

303

- trace_tools (bool): Trace tool calls (default: True)

304

- trace_prompts (bool): Trace prompts (default: True)

305

"""

306

307

class Agent:

308

"""

309

DeepEval-wrapped PydanticAI Agent class.

310

311

Usage:

312

- Use instead of pydantic_ai.Agent for automatic tracing

313

- Same API as PydanticAI Agent

314

"""

315

```

316

317

Usage example:

318

319

```python

320

from deepeval.integrations.pydantic_ai import (

321

instrument_pydantic_ai,

322

ConfidentInstrumentationSettings

323

)

324

325

# Instrument with settings

326

instrument_pydantic_ai(

327

settings=ConfidentInstrumentationSettings(

328

trace_runs=True,

329

trace_tools=True,

330

trace_prompts=True

331

)

332

)

333

334

# Use PydanticAI as normal

335

from pydantic_ai import Agent

336

337

agent = Agent(

338

"openai:gpt-4",

339

system_prompt="You are a helpful assistant"

340

)

341

342

# Agent runs are automatically traced

343

result = agent.run_sync("What is quantum computing?")

344

```

345

346

Using wrapped Agent:

347

348

```python

349

from deepeval.integrations.pydantic_ai import Agent

350

351

agent = Agent(

352

"openai:gpt-4",

353

system_prompt="You are a helpful assistant"

354

)

355

356

# Automatically traced

357

result = agent.run_sync("Explain machine learning")

358

```

359

360

## Evaluation with Integrations

361

362

### LangChain + Metrics

363

364

```python

365

from langchain.chains import RetrievalQA

366

from deepeval.integrations.langchain import CallbackHandler

367

from deepeval.metrics import FaithfulnessMetric, AnswerRelevancyMetric

368

from deepeval import evaluate

369

from deepeval.test_case import LLMTestCase

370

from deepeval.dataset import Golden

371

372

callback = CallbackHandler()

373

374

qa_chain = RetrievalQA.from_chain_type(

375

llm=llm,

376

retriever=retriever,

377

callbacks=[callback]

378

)

379

380

# Wrap for evaluation

381

from deepeval.tracing import observe, update_current_span

382

383

@observe(metrics=[

384

FaithfulnessMetric(threshold=0.8),

385

AnswerRelevancyMetric(threshold=0.7)

386

])

387

def query_with_langchain(question: str):

388

result = qa_chain.run(question)

389

390

update_current_span(

391

test_case=LLMTestCase(

392

input=question,

393

actual_output=result

394

)

395

)

396

397

return result

398

399

# Evaluate

400

goldens = [Golden(input="What is AI?")]

401

result = evaluate(observed_callback=query_with_langchain, goldens=goldens)

402

```

403

404

### CrewAI + Metrics

405

406

```python

407

from deepeval.integrations.crewai import instrument_crewai, Crew

408

from deepeval.metrics import TaskCompletionMetric, ToolCorrectnessMetric

409

from deepeval.tracing import observe, update_current_span

410

from deepeval.test_case import LLMTestCase

411

from deepeval import evaluate

412

413

instrument_crewai()

414

415

task_completion = TaskCompletionMetric(threshold=0.8)

416

417

@observe(metrics=[task_completion])

418

def run_crew_task(task_description: str):

419

"""Run CrewAI task with evaluation."""

420

crew = Crew(agents=[...], tasks=[...])

421

result = crew.kickoff()

422

423

update_current_span(

424

test_case=LLMTestCase(

425

input=task_description,

426

actual_output=str(result)

427

)

428

)

429

430

return result

431

432

# Evaluate crew performance

433

from deepeval.dataset import Golden

434

goldens = [Golden(input="Research and write about AI")]

435

result = evaluate(observed_callback=run_crew_task, goldens=goldens)

436

```

437

438

## Integration Best Practices

439

440

1. **Instrument Early**: Call instrumentation functions at the start of your application

441

2. **Use Callbacks**: For LangChain, always use CallbackHandler

442

3. **Combine with Metrics**: Use `@observe` decorator with metrics for component evaluation

443

4. **Monitor Traces**: Log in to Confident AI to view detailed traces

444

5. **Evaluate Regularly**: Run evaluations during development and in CI/CD

445