0
# LangChain Integration
1
2
LangChain-compatible wrapper class enabling seamless integration with LangChain workflows and chains. The PyllamacppLLM class provides the same interface as other LangChain LLM implementations, allowing drop-in replacement in existing LangChain applications.
3
4
## Capabilities
5
6
### LangChain LLM Wrapper
7
8
A full-featured LangChain LLM implementation that wraps pyllamacpp's Model class, providing compatibility with LangChain's ecosystem of tools, chains, and agents.
9
10
```python { .api }
11
class PyllamacppLLM(LLM):
12
"""
13
LangChain-compatible wrapper for Pyllamacpp models.
14
15
Inherits from langchain.llms.base.LLM and provides
16
full compatibility with LangChain workflows.
17
"""
18
19
# Required model path
20
model: str
21
"""Path to the GGML model file."""
22
23
# Context and model parameters
24
n_ctx: int = 512
25
"""Token context window size."""
26
27
seed: int = 0
28
"""Random seed for generation. If -1, uses random seed."""
29
30
f16_kv: bool = False
31
"""Use half-precision for key/value cache."""
32
33
logits_all: bool = False
34
"""Return logits for all tokens, not just the last token."""
35
36
vocab_only: bool = False
37
"""Only load the vocabulary, no weights."""
38
39
use_mlock: bool = False
40
"""Force system to keep model in RAM."""
41
42
embedding: bool = False
43
"""Use embedding mode only."""
44
45
# Generation parameters
46
n_threads: int = 4
47
"""Number of CPU threads to use."""
48
49
n_predict: int = 50
50
"""Maximum number of tokens to generate."""
51
52
temp: float = 0.8
53
"""Temperature for sampling (higher = more random)."""
54
55
top_p: float = 0.95
56
"""Top-p nucleus sampling parameter."""
57
58
top_k: int = 40
59
"""Top-k sampling parameter."""
60
61
echo: bool = False
62
"""Whether to echo the input prompt in output."""
63
64
stop: List[str] = []
65
"""List of strings to stop generation when encountered."""
66
67
repeat_last_n: int = 64
68
"""Last n tokens to consider for repetition penalty."""
69
70
repeat_penalty: float = 1.3
71
"""Penalty factor for repeated tokens."""
72
73
n_batch: int = 1
74
"""Batch size for prompt processing."""
75
76
streaming: bool = False
77
"""Whether to stream results (not yet implemented)."""
78
```
79
80
### Basic Usage
81
82
```python
83
from pyllamacpp.langchain_llm import PyllamacppLLM
84
from langchain.prompts import PromptTemplate
85
from langchain.chains import LLMChain
86
87
# Initialize the LLM
88
llm = PyllamacppLLM(
89
model="/path/to/model.ggml",
90
temp=0.7,
91
n_predict=100,
92
top_p=0.9,
93
top_k=40
94
)
95
96
# Use with LangChain PromptTemplate
97
template = """
98
Question: {question}
99
100
Answer: Let me think about this step by step.
101
"""
102
103
prompt = PromptTemplate(template=template, input_variables=["question"])
104
llm_chain = LLMChain(prompt=prompt, llm=llm)
105
106
# Generate response
107
question = "What are the benefits of renewable energy?"
108
answer = llm_chain.run(question)
109
print(answer)
110
```
111
112
### Advanced LangChain Integration
113
114
```python
115
from pyllamacpp.langchain_llm import PyllamacppLLM
116
from langchain.chains import ConversationChain
117
from langchain.memory import ConversationBufferMemory
118
from langchain.prompts import PromptTemplate
119
120
# Configure LLM with advanced parameters
121
llm = PyllamacppLLM(
122
model="/path/to/model.ggml",
123
n_ctx=2048,
124
temp=0.8,
125
top_p=0.95,
126
top_k=40,
127
repeat_penalty=1.1,
128
n_predict=200,
129
stop=["Human:", "AI:"]
130
)
131
132
# Create conversation chain with memory
133
memory = ConversationBufferMemory()
134
conversation = ConversationChain(
135
llm=llm,
136
memory=memory,
137
verbose=True
138
)
139
140
# Multi-turn conversation
141
response1 = conversation.predict(input="Tell me about machine learning")
142
print(response1)
143
144
response2 = conversation.predict(input="What are some practical applications?")
145
print(response2)
146
```
147
148
### Custom Prompt Templates
149
150
```python
151
from pyllamacpp.langchain_llm import PyllamacppLLM
152
from langchain.prompts import PromptTemplate
153
from langchain.chains import LLMChain
154
155
llm = PyllamacppLLM(
156
model="/path/to/model.ggml",
157
temp=0.75,
158
n_predict=150
159
)
160
161
# Instruction-following template
162
instruction_template = """
163
Below is an instruction that describes a task. Write a response that appropriately completes the request.
164
165
### Instruction:
166
{instruction}
167
168
### Response:
169
"""
170
171
instruction_prompt = PromptTemplate(
172
template=instruction_template,
173
input_variables=["instruction"]
174
)
175
176
instruction_chain = LLMChain(prompt=instruction_prompt, llm=llm)
177
178
# Use the chain
179
result = instruction_chain.run(instruction="Explain how neural networks work")
180
print(result)
181
```
182
183
### RAG (Retrieval-Augmented Generation) Example
184
185
```python
186
from pyllamacpp.langchain_llm import PyllamacppLLM
187
from langchain.chains import RetrievalQA
188
from langchain.vectorstores import FAISS
189
from langchain.embeddings import HuggingFaceEmbeddings
190
from langchain.text_splitter import CharacterTextSplitter
191
from langchain.document_loaders import TextLoader
192
193
# Load and process documents
194
loader = TextLoader("document.txt")
195
documents = loader.load()
196
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
197
docs = text_splitter.split_documents(documents)
198
199
# Create vector store
200
embeddings = HuggingFaceEmbeddings()
201
db = FAISS.from_documents(docs, embeddings)
202
203
# Configure LLM
204
llm = PyllamacppLLM(
205
model="/path/to/model.ggml",
206
temp=0.3,
207
n_predict=200,
208
top_p=0.9
209
)
210
211
# Create RAG chain
212
qa_chain = RetrievalQA.from_chain_type(
213
llm=llm,
214
chain_type="stuff",
215
retriever=db.as_retriever()
216
)
217
218
# Ask questions about the documents
219
question = "What are the main topics discussed in the document?"
220
answer = qa_chain.run(question)
221
print(answer)
222
```
223
224
### Parameter Configuration
225
226
All LangChain-specific parameters can be configured during initialization:
227
228
```python
229
# Model initialization parameters (passed to Model class)
230
llm = PyllamacppLLM(
231
model="/path/to/model.ggml",
232
# Context parameters
233
n_ctx=2048,
234
seed=42,
235
f16_kv=True,
236
logits_all=False,
237
vocab_only=False,
238
use_mlock=True,
239
embedding=False,
240
241
# Generation parameters
242
n_threads=8,
243
n_predict=200,
244
temp=0.8,
245
top_p=0.95,
246
top_k=40,
247
repeat_last_n=64,
248
repeat_penalty=1.1,
249
n_batch=8,
250
251
# LangChain-specific parameters
252
echo=False,
253
stop=["Human:", "Assistant:", "\n\n"],
254
streaming=False
255
)
256
```
257
258
### Error Handling
259
260
```python
261
from pyllamacpp.langchain_llm import PyllamacppLLM
262
263
try:
264
llm = PyllamacppLLM(model="/path/to/nonexistent/model.ggml")
265
except ValueError as e:
266
print(f"Model loading failed: {e}")
267
# Fallback to different model or error handling
268
269
try:
270
response = llm("Generate a very long response...")
271
except Exception as e:
272
print(f"Generation failed: {e}")
273
# Handle generation errors
274
```
275
276
## Integration with LangChain Ecosystem
277
278
The PyllamacppLLM class integrates seamlessly with the broader LangChain ecosystem:
279
280
- **Chains**: Use with SequentialChain, TransformChain, etc.
281
- **Agents**: Compatible with LangChain agents and tools
282
- **Memory**: Works with all LangChain memory implementations
283
- **Callbacks**: Supports LangChain callback system
284
- **Async**: Future support for async operations
285
286
Example with agents:
287
288
```python
289
from langchain.agents import initialize_agent, Tool
290
from langchain.agents import AgentType
291
292
# Define tools
293
def calculator(expression):
294
return str(eval(expression))
295
296
tools = [
297
Tool(
298
name="Calculator",
299
func=calculator,
300
description="Useful for mathematical calculations"
301
)
302
]
303
304
# Initialize agent with PyllamacppLLM
305
agent = initialize_agent(
306
tools=tools,
307
llm=llm,
308
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
309
verbose=True
310
)
311
312
# Use the agent
313
result = agent.run("What is 15 * 23 + 45?")
314
print(result)
315
```