Tessl Tile for pypi/pyllamacpp@2.4.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

cli.md embeddings.md index.md langchain-integration.md model-operations.md utilities.md web-ui.md

langchain-integration.mddocs/

0
# LangChain Integration
1

2
LangChain-compatible wrapper class enabling seamless integration with LangChain workflows and chains. The PyllamacppLLM class provides the same interface as other LangChain LLM implementations, allowing drop-in replacement in existing LangChain applications.
3

4
## Capabilities
5

6
### LangChain LLM Wrapper
7

8
A full-featured LangChain LLM implementation that wraps pyllamacpp's Model class, providing compatibility with LangChain's ecosystem of tools, chains, and agents.
9

10
```python { .api }
11
class PyllamacppLLM(LLM):
12
    """
13
    LangChain-compatible wrapper for Pyllamacpp models.
14
    
15
    Inherits from langchain.llms.base.LLM and provides
16
    full compatibility with LangChain workflows.
17
    """
18
    
19
    # Required model path
20
    model: str
21
    """Path to the GGML model file."""
22
    
23
    # Context and model parameters
24
    n_ctx: int = 512
25
    """Token context window size."""
26
    
27
    seed: int = 0
28
    """Random seed for generation. If -1, uses random seed."""
29
    
30
    f16_kv: bool = False
31
    """Use half-precision for key/value cache."""
32
    
33
    logits_all: bool = False
34
    """Return logits for all tokens, not just the last token."""
35
    
36
    vocab_only: bool = False
37
    """Only load the vocabulary, no weights."""
38
    
39
    use_mlock: bool = False
40
    """Force system to keep model in RAM."""
41
    
42
    embedding: bool = False
43
    """Use embedding mode only."""
44
    
45
    # Generation parameters
46
    n_threads: int = 4
47
    """Number of CPU threads to use."""
48
    
49
    n_predict: int = 50
50
    """Maximum number of tokens to generate."""
51
    
52
    temp: float = 0.8
53
    """Temperature for sampling (higher = more random)."""
54
    
55
    top_p: float = 0.95
56
    """Top-p nucleus sampling parameter."""
57
    
58
    top_k: int = 40
59
    """Top-k sampling parameter."""
60
    
61
    echo: bool = False
62
    """Whether to echo the input prompt in output."""
63
    
64
    stop: List[str] = []
65
    """List of strings to stop generation when encountered."""
66
    
67
    repeat_last_n: int = 64
68
    """Last n tokens to consider for repetition penalty."""
69
    
70
    repeat_penalty: float = 1.3
71
    """Penalty factor for repeated tokens."""
72
    
73
    n_batch: int = 1
74
    """Batch size for prompt processing."""
75
    
76
    streaming: bool = False
77
    """Whether to stream results (not yet implemented)."""
78
```
79

80
### Basic Usage
81

82
```python
83
from pyllamacpp.langchain_llm import PyllamacppLLM
84
from langchain.prompts import PromptTemplate
85
from langchain.chains import LLMChain
86

87
# Initialize the LLM
88
llm = PyllamacppLLM(
89
    model="/path/to/model.ggml",
90
    temp=0.7,
91
    n_predict=100,
92
    top_p=0.9,
93
    top_k=40
94
)
95

96
# Use with LangChain PromptTemplate
97
template = """
98
Question: {question}
99

100
Answer: Let me think about this step by step.
101
"""
102

103
prompt = PromptTemplate(template=template, input_variables=["question"])
104
llm_chain = LLMChain(prompt=prompt, llm=llm)
105

106
# Generate response
107
question = "What are the benefits of renewable energy?"
108
answer = llm_chain.run(question)
109
print(answer)
110
```
111

112
### Advanced LangChain Integration
113

114
```python
115
from pyllamacpp.langchain_llm import PyllamacppLLM
116
from langchain.chains import ConversationChain
117
from langchain.memory import ConversationBufferMemory
118
from langchain.prompts import PromptTemplate
119

120
# Configure LLM with advanced parameters
121
llm = PyllamacppLLM(
122
    model="/path/to/model.ggml",
123
    n_ctx=2048,
124
    temp=0.8,
125
    top_p=0.95,
126
    top_k=40,
127
    repeat_penalty=1.1,
128
    n_predict=200,
129
    stop=["Human:", "AI:"]
130
)
131

132
# Create conversation chain with memory
133
memory = ConversationBufferMemory()
134
conversation = ConversationChain(
135
    llm=llm,
136
    memory=memory,
137
    verbose=True
138
)
139

140
# Multi-turn conversation
141
response1 = conversation.predict(input="Tell me about machine learning")
142
print(response1)
143

144
response2 = conversation.predict(input="What are some practical applications?")
145
print(response2)
146
```
147

148
### Custom Prompt Templates
149

150
```python
151
from pyllamacpp.langchain_llm import PyllamacppLLM
152
from langchain.prompts import PromptTemplate
153
from langchain.chains import LLMChain
154

155
llm = PyllamacppLLM(
156
    model="/path/to/model.ggml",
157
    temp=0.75,
158
    n_predict=150
159
)
160

161
# Instruction-following template
162
instruction_template = """
163
Below is an instruction that describes a task. Write a response that appropriately completes the request.
164

165
### Instruction:
166
{instruction}
167

168
### Response:
169
"""
170

171
instruction_prompt = PromptTemplate(
172
    template=instruction_template,
173
    input_variables=["instruction"]
174
)
175

176
instruction_chain = LLMChain(prompt=instruction_prompt, llm=llm)
177

178
# Use the chain
179
result = instruction_chain.run(instruction="Explain how neural networks work")
180
print(result)
181
```
182

183
### RAG (Retrieval-Augmented Generation) Example
184

185
```python
186
from pyllamacpp.langchain_llm import PyllamacppLLM
187
from langchain.chains import RetrievalQA
188
from langchain.vectorstores import FAISS
189
from langchain.embeddings import HuggingFaceEmbeddings
190
from langchain.text_splitter import CharacterTextSplitter
191
from langchain.document_loaders import TextLoader
192

193
# Load and process documents
194
loader = TextLoader("document.txt")
195
documents = loader.load()
196
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
197
docs = text_splitter.split_documents(documents)
198

199
# Create vector store
200
embeddings = HuggingFaceEmbeddings()
201
db = FAISS.from_documents(docs, embeddings)
202

203
# Configure LLM
204
llm = PyllamacppLLM(
205
    model="/path/to/model.ggml",
206
    temp=0.3,
207
    n_predict=200,
208
    top_p=0.9
209
)
210

211
# Create RAG chain
212
qa_chain = RetrievalQA.from_chain_type(
213
    llm=llm,
214
    chain_type="stuff",
215
    retriever=db.as_retriever()
216
)
217

218
# Ask questions about the documents
219
question = "What are the main topics discussed in the document?"
220
answer = qa_chain.run(question)
221
print(answer)
222
```
223

224
### Parameter Configuration
225

226
All LangChain-specific parameters can be configured during initialization:
227

228
```python
229
# Model initialization parameters (passed to Model class)
230
llm = PyllamacppLLM(
231
    model="/path/to/model.ggml",
232
    # Context parameters
233
    n_ctx=2048,
234
    seed=42,
235
    f16_kv=True,
236
    logits_all=False,
237
    vocab_only=False,
238
    use_mlock=True,
239
    embedding=False,
240
    
241
    # Generation parameters
242
    n_threads=8,
243
    n_predict=200,
244
    temp=0.8,
245
    top_p=0.95,
246
    top_k=40,
247
    repeat_last_n=64,
248
    repeat_penalty=1.1,
249
    n_batch=8,
250
    
251
    # LangChain-specific parameters
252
    echo=False,
253
    stop=["Human:", "Assistant:", "\n\n"],
254
    streaming=False
255
)
256
```
257

258
### Error Handling
259

260
```python
261
from pyllamacpp.langchain_llm import PyllamacppLLM
262

263
try:
264
    llm = PyllamacppLLM(model="/path/to/nonexistent/model.ggml")
265
except ValueError as e:
266
    print(f"Model loading failed: {e}")
267
    # Fallback to different model or error handling
268

269
try:
270
    response = llm("Generate a very long response...")
271
except Exception as e:
272
    print(f"Generation failed: {e}")
273
    # Handle generation errors
274
```
275

276
## Integration with LangChain Ecosystem
277

278
The PyllamacppLLM class integrates seamlessly with the broader LangChain ecosystem:
279

280
- **Chains**: Use with SequentialChain, TransformChain, etc.
281
- **Agents**: Compatible with LangChain agents and tools
282
- **Memory**: Works with all LangChain memory implementations
283
- **Callbacks**: Supports LangChain callback system
284
- **Async**: Future support for async operations
285

286
Example with agents:
287

288
```python
289
from langchain.agents import initialize_agent, Tool
290
from langchain.agents import AgentType
291

292
# Define tools
293
def calculator(expression):
294
    return str(eval(expression))
295

296
tools = [
297
    Tool(
298
        name="Calculator",
299
        func=calculator,
300
        description="Useful for mathematical calculations"
301
    )
302
]
303

304
# Initialize agent with PyllamacppLLM
305
agent = initialize_agent(
306
    tools=tools,
307
    llm=llm,
308
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
309
    verbose=True
310
)
311

312
# Use the agent
313
result = agent.run("What is 15 * 23 + 45?")
314
print(result)
315
```

Version

Tile

Files

langchain-integration.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

langchain-integration.mddocs/