or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

cli.mdembeddings.mdindex.mdlangchain-integration.mdmodel-operations.mdutilities.mdweb-ui.md

langchain-integration.mddocs/

0

# LangChain Integration

1

2

LangChain-compatible wrapper class enabling seamless integration with LangChain workflows and chains. The PyllamacppLLM class provides the same interface as other LangChain LLM implementations, allowing drop-in replacement in existing LangChain applications.

3

4

## Capabilities

5

6

### LangChain LLM Wrapper

7

8

A full-featured LangChain LLM implementation that wraps pyllamacpp's Model class, providing compatibility with LangChain's ecosystem of tools, chains, and agents.

9

10

```python { .api }

11

class PyllamacppLLM(LLM):

12

"""

13

LangChain-compatible wrapper for Pyllamacpp models.

14

15

Inherits from langchain.llms.base.LLM and provides

16

full compatibility with LangChain workflows.

17

"""

18

19

# Required model path

20

model: str

21

"""Path to the GGML model file."""

22

23

# Context and model parameters

24

n_ctx: int = 512

25

"""Token context window size."""

26

27

seed: int = 0

28

"""Random seed for generation. If -1, uses random seed."""

29

30

f16_kv: bool = False

31

"""Use half-precision for key/value cache."""

32

33

logits_all: bool = False

34

"""Return logits for all tokens, not just the last token."""

35

36

vocab_only: bool = False

37

"""Only load the vocabulary, no weights."""

38

39

use_mlock: bool = False

40

"""Force system to keep model in RAM."""

41

42

embedding: bool = False

43

"""Use embedding mode only."""

44

45

# Generation parameters

46

n_threads: int = 4

47

"""Number of CPU threads to use."""

48

49

n_predict: int = 50

50

"""Maximum number of tokens to generate."""

51

52

temp: float = 0.8

53

"""Temperature for sampling (higher = more random)."""

54

55

top_p: float = 0.95

56

"""Top-p nucleus sampling parameter."""

57

58

top_k: int = 40

59

"""Top-k sampling parameter."""

60

61

echo: bool = False

62

"""Whether to echo the input prompt in output."""

63

64

stop: List[str] = []

65

"""List of strings to stop generation when encountered."""

66

67

repeat_last_n: int = 64

68

"""Last n tokens to consider for repetition penalty."""

69

70

repeat_penalty: float = 1.3

71

"""Penalty factor for repeated tokens."""

72

73

n_batch: int = 1

74

"""Batch size for prompt processing."""

75

76

streaming: bool = False

77

"""Whether to stream results (not yet implemented)."""

78

```

79

80

### Basic Usage

81

82

```python

83

from pyllamacpp.langchain_llm import PyllamacppLLM

84

from langchain.prompts import PromptTemplate

85

from langchain.chains import LLMChain

86

87

# Initialize the LLM

88

llm = PyllamacppLLM(

89

model="/path/to/model.ggml",

90

temp=0.7,

91

n_predict=100,

92

top_p=0.9,

93

top_k=40

94

)

95

96

# Use with LangChain PromptTemplate

97

template = """

98

Question: {question}

99

100

Answer: Let me think about this step by step.

101

"""

102

103

prompt = PromptTemplate(template=template, input_variables=["question"])

104

llm_chain = LLMChain(prompt=prompt, llm=llm)

105

106

# Generate response

107

question = "What are the benefits of renewable energy?"

108

answer = llm_chain.run(question)

109

print(answer)

110

```

111

112

### Advanced LangChain Integration

113

114

```python

115

from pyllamacpp.langchain_llm import PyllamacppLLM

116

from langchain.chains import ConversationChain

117

from langchain.memory import ConversationBufferMemory

118

from langchain.prompts import PromptTemplate

119

120

# Configure LLM with advanced parameters

121

llm = PyllamacppLLM(

122

model="/path/to/model.ggml",

123

n_ctx=2048,

124

temp=0.8,

125

top_p=0.95,

126

top_k=40,

127

repeat_penalty=1.1,

128

n_predict=200,

129

stop=["Human:", "AI:"]

130

)

131

132

# Create conversation chain with memory

133

memory = ConversationBufferMemory()

134

conversation = ConversationChain(

135

llm=llm,

136

memory=memory,

137

verbose=True

138

)

139

140

# Multi-turn conversation

141

response1 = conversation.predict(input="Tell me about machine learning")

142

print(response1)

143

144

response2 = conversation.predict(input="What are some practical applications?")

145

print(response2)

146

```

147

148

### Custom Prompt Templates

149

150

```python

151

from pyllamacpp.langchain_llm import PyllamacppLLM

152

from langchain.prompts import PromptTemplate

153

from langchain.chains import LLMChain

154

155

llm = PyllamacppLLM(

156

model="/path/to/model.ggml",

157

temp=0.75,

158

n_predict=150

159

)

160

161

# Instruction-following template

162

instruction_template = """

163

Below is an instruction that describes a task. Write a response that appropriately completes the request.

164

165

### Instruction:

166

{instruction}

167

168

### Response:

169

"""

170

171

instruction_prompt = PromptTemplate(

172

template=instruction_template,

173

input_variables=["instruction"]

174

)

175

176

instruction_chain = LLMChain(prompt=instruction_prompt, llm=llm)

177

178

# Use the chain

179

result = instruction_chain.run(instruction="Explain how neural networks work")

180

print(result)

181

```

182

183

### RAG (Retrieval-Augmented Generation) Example

184

185

```python

186

from pyllamacpp.langchain_llm import PyllamacppLLM

187

from langchain.chains import RetrievalQA

188

from langchain.vectorstores import FAISS

189

from langchain.embeddings import HuggingFaceEmbeddings

190

from langchain.text_splitter import CharacterTextSplitter

191

from langchain.document_loaders import TextLoader

192

193

# Load and process documents

194

loader = TextLoader("document.txt")

195

documents = loader.load()

196

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

197

docs = text_splitter.split_documents(documents)

198

199

# Create vector store

200

embeddings = HuggingFaceEmbeddings()

201

db = FAISS.from_documents(docs, embeddings)

202

203

# Configure LLM

204

llm = PyllamacppLLM(

205

model="/path/to/model.ggml",

206

temp=0.3,

207

n_predict=200,

208

top_p=0.9

209

)

210

211

# Create RAG chain

212

qa_chain = RetrievalQA.from_chain_type(

213

llm=llm,

214

chain_type="stuff",

215

retriever=db.as_retriever()

216

)

217

218

# Ask questions about the documents

219

question = "What are the main topics discussed in the document?"

220

answer = qa_chain.run(question)

221

print(answer)

222

```

223

224

### Parameter Configuration

225

226

All LangChain-specific parameters can be configured during initialization:

227

228

```python

229

# Model initialization parameters (passed to Model class)

230

llm = PyllamacppLLM(

231

model="/path/to/model.ggml",

232

# Context parameters

233

n_ctx=2048,

234

seed=42,

235

f16_kv=True,

236

logits_all=False,

237

vocab_only=False,

238

use_mlock=True,

239

embedding=False,

240

241

# Generation parameters

242

n_threads=8,

243

n_predict=200,

244

temp=0.8,

245

top_p=0.95,

246

top_k=40,

247

repeat_last_n=64,

248

repeat_penalty=1.1,

249

n_batch=8,

250

251

# LangChain-specific parameters

252

echo=False,

253

stop=["Human:", "Assistant:", "\n\n"],

254

streaming=False

255

)

256

```

257

258

### Error Handling

259

260

```python

261

from pyllamacpp.langchain_llm import PyllamacppLLM

262

263

try:

264

llm = PyllamacppLLM(model="/path/to/nonexistent/model.ggml")

265

except ValueError as e:

266

print(f"Model loading failed: {e}")

267

# Fallback to different model or error handling

268

269

try:

270

response = llm("Generate a very long response...")

271

except Exception as e:

272

print(f"Generation failed: {e}")

273

# Handle generation errors

274

```

275

276

## Integration with LangChain Ecosystem

277

278

The PyllamacppLLM class integrates seamlessly with the broader LangChain ecosystem:

279

280

- **Chains**: Use with SequentialChain, TransformChain, etc.

281

- **Agents**: Compatible with LangChain agents and tools

282

- **Memory**: Works with all LangChain memory implementations

283

- **Callbacks**: Supports LangChain callback system

284

- **Async**: Future support for async operations

285

286

Example with agents:

287

288

```python

289

from langchain.agents import initialize_agent, Tool

290

from langchain.agents import AgentType

291

292

# Define tools

293

def calculator(expression):

294

return str(eval(expression))

295

296

tools = [

297

Tool(

298

name="Calculator",

299

func=calculator,

300

description="Useful for mathematical calculations"

301

)

302

]

303

304

# Initialize agent with PyllamacppLLM

305

agent = initialize_agent(

306

tools=tools,

307

llm=llm,

308

agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,

309

verbose=True

310

)

311

312

# Use the agent

313

result = agent.run("What is 15 * 23 + 45?")

314

print(result)

315

```