pypi-pydantic-ai

Description
Agent Framework / shim to use Pydantic with LLMs
Author
tessl
Last updated

How to use

npx @tessl/cli registry install tessl/pypi-pydantic-ai@0.8.0

settings.md docs/

1
# Settings and Configuration
2
3
Model settings, usage tracking, and configuration options for fine-tuning agent behavior, monitoring resource consumption, and setting usage limits.
4
5
## Capabilities
6
7
### Model Settings
8
9
Comprehensive model configuration options for controlling generation behavior.
10
11
```python { .api }
12
class ModelSettings(TypedDict, total=False):
13
"""
14
Configuration options for model behavior.
15
All fields are optional and can be used to override default settings.
16
"""
17
max_tokens: int
18
temperature: float
19
top_p: float
20
timeout: float | Timeout
21
parallel_tool_calls: bool
22
seed: int
23
presence_penalty: float
24
frequency_penalty: float
25
logit_bias: dict[str, int]
26
stop_sequences: list[str]
27
extra_headers: dict[str, str]
28
extra_body: object
29
30
def merge_model_settings(
31
*settings: ModelSettings | None
32
) -> ModelSettings:
33
"""
34
Merge multiple model settings configurations.
35
36
Parameters:
37
- settings: Variable number of ModelSettings to merge
38
39
Returns:
40
Merged ModelSettings with later settings overriding earlier ones
41
"""
42
```
43
44
### Usage Tracking
45
46
Comprehensive usage metrics and tracking for monitoring resource consumption.
47
48
```python { .api }
49
class RequestUsage:
50
"""
51
Usage metrics for a single model request.
52
"""
53
input_tokens: int | None
54
output_tokens: int | None
55
cache_creation_input_tokens: int | None
56
cache_read_input_tokens: int | None
57
audio_input_tokens: int | None
58
audio_output_tokens: int | None
59
audio_cache_creation_input_tokens: int | None
60
audio_cache_read_input_tokens: int | None
61
62
@property
63
def total_tokens(self) -> int | None:
64
"""Total tokens used in this request."""
65
66
def details(self) -> dict[str, int]:
67
"""Get detailed usage breakdown as dictionary."""
68
69
class RunUsage:
70
"""
71
Usage metrics for an entire agent run.
72
"""
73
request_count: int
74
input_tokens: int | None
75
output_tokens: int | None
76
cache_creation_input_tokens: int | None
77
cache_read_input_tokens: int | None
78
audio_input_tokens: int | None
79
audio_output_tokens: int | None
80
audio_cache_creation_input_tokens: int | None
81
audio_cache_read_input_tokens: int | None
82
83
@property
84
def total_tokens(self) -> int | None:
85
"""Total tokens used across all requests in run."""
86
87
def details(self) -> dict[str, int | None]:
88
"""Get detailed usage breakdown as dictionary."""
89
90
def __add__(self, other: RunUsage) -> RunUsage:
91
"""Add two RunUsage objects together."""
92
93
# Deprecated alias for backwards compatibility
94
Usage = RunUsage
95
```
96
97
### Usage Limits
98
99
Configuration for setting and enforcing usage limits.
100
101
```python { .api }
102
class UsageLimits:
103
"""
104
Configuration for usage limits and quotas.
105
"""
106
def __init__(
107
self,
108
*,
109
request_limit: int | None = None,
110
input_token_limit: int | None = None,
111
output_token_limit: int | None = None,
112
total_token_limit: int | None = None
113
):
114
"""
115
Set usage limits for agent runs.
116
117
Parameters:
118
- request_limit: Maximum number of requests allowed
119
- input_token_limit: Maximum input tokens allowed
120
- output_token_limit: Maximum output tokens allowed
121
- total_token_limit: Maximum total tokens allowed
122
"""
123
124
def check_before_request(self, current_usage: RunUsage) -> None:
125
"""
126
Check if a new request would exceed limits.
127
128
Parameters:
129
- current_usage: Current usage metrics
130
131
Raises:
132
UsageLimitExceeded: If limits would be exceeded
133
"""
134
135
def check_after_request(
136
self,
137
current_usage: RunUsage,
138
request_usage: RequestUsage
139
) -> None:
140
"""
141
Check if usage limits have been exceeded after a request.
142
143
Parameters:
144
- current_usage: Current total usage
145
- request_usage: Usage from the latest request
146
147
Raises:
148
UsageLimitExceeded: If limits have been exceeded
149
"""
150
```
151
152
### Timeout Configuration
153
154
Timeout handling for model requests.
155
156
```python { .api }
157
class Timeout:
158
"""
159
Timeout configuration for model requests.
160
"""
161
def __init__(
162
self,
163
*,
164
connect: float | None = None,
165
read: float | None = None,
166
write: float | None = None,
167
pool: float | None = None
168
):
169
"""
170
Configure request timeouts.
171
172
Parameters:
173
- connect: Connection timeout in seconds
174
- read: Read timeout in seconds
175
- write: Write timeout in seconds
176
- pool: Pool timeout in seconds
177
"""
178
```
179
180
### Instrumentation Settings
181
182
OpenTelemetry instrumentation configuration for monitoring and debugging.
183
184
```python { .api }
185
class InstrumentationSettings:
186
"""
187
OpenTelemetry instrumentation configuration.
188
"""
189
def __init__(
190
self,
191
*,
192
capture_request_body: bool = True,
193
capture_response_body: bool = True,
194
capture_tool_calls: bool = True,
195
capture_usage: bool = True,
196
capture_model_name: bool = True
197
):
198
"""
199
Configure OpenTelemetry instrumentation.
200
201
Parameters:
202
- capture_request_body: Whether to capture request bodies
203
- capture_response_body: Whether to capture response bodies
204
- capture_tool_calls: Whether to capture tool call details
205
- capture_usage: Whether to capture usage metrics
206
- capture_model_name: Whether to capture model names
207
"""
208
```
209
210
## Model Settings Details
211
212
### Core Generation Parameters
213
214
```python
215
# Temperature: Controls randomness (0.0 = deterministic, 2.0 = very random)
216
settings = ModelSettings(temperature=0.7)
217
218
# Max tokens: Maximum tokens to generate
219
settings = ModelSettings(max_tokens=1000)
220
221
# Top-p: Nucleus sampling parameter (0.1 = conservative, 1.0 = full vocabulary)
222
settings = ModelSettings(top_p=0.9)
223
224
# Seed: For reproducible outputs
225
settings = ModelSettings(seed=42)
226
```
227
228
### Advanced Parameters
229
230
```python
231
# Penalties: Control repetition (-2.0 to 2.0)
232
settings = ModelSettings(
233
presence_penalty=0.5, # Reduce likelihood of repeating topics
234
frequency_penalty=0.3 # Reduce likelihood of repeating tokens
235
)
236
237
# Stop sequences: Strings that stop generation
238
settings = ModelSettings(stop_sequences=["END", "\n\n---"])
239
240
# Logit bias: Adjust token probabilities
241
settings = ModelSettings(
242
logit_bias={
243
"50256": -100, # Strongly discourage specific token
244
"1234": 20 # Strongly encourage specific token
245
}
246
)
247
```
248
249
### Request Configuration
250
251
```python
252
# Timeout configuration
253
settings = ModelSettings(
254
timeout=Timeout(
255
connect=10.0,
256
read=30.0,
257
write=10.0
258
)
259
)
260
261
# Tool calling configuration
262
settings = ModelSettings(parallel_tool_calls=True)
263
264
# Custom headers and body
265
settings = ModelSettings(
266
extra_headers={"Custom-Header": "value"},
267
extra_body={"custom_param": "value"}
268
)
269
```
270
271
## Usage Examples
272
273
### Basic Model Settings
274
275
```python
276
from pydantic_ai import Agent, ModelSettings
277
278
# Agent with custom model settings
279
settings = ModelSettings(
280
temperature=0.2, # More deterministic
281
max_tokens=500, # Limit response length
282
top_p=0.9 # Slightly focused sampling
283
)
284
285
agent = Agent(
286
model='gpt-4',
287
system_prompt='You are a precise technical assistant.',
288
model_settings=settings
289
)
290
291
result = agent.run_sync('Explain quantum computing')
292
```
293
294
### Runtime Model Settings Override
295
296
```python
297
from pydantic_ai import Agent, ModelSettings
298
299
agent = Agent(model='gpt-4')
300
301
# Override settings for specific run
302
creative_settings = ModelSettings(
303
temperature=1.2, # More creative
304
top_p=0.95, # Broader vocabulary
305
max_tokens=1000
306
)
307
308
result = agent.run_sync(
309
'Write a creative story',
310
model_settings=creative_settings
311
)
312
```
313
314
### Usage Tracking
315
316
```python
317
from pydantic_ai import Agent
318
319
agent = Agent(model='gpt-4')
320
result = agent.run_sync('Hello, world!')
321
322
# Access usage information
323
usage = result.usage
324
print(f"Requests made: {usage.request_count}")
325
print(f"Input tokens: {usage.input_tokens}")
326
print(f"Output tokens: {usage.output_tokens}")
327
print(f"Total tokens: {usage.total_tokens}")
328
329
# Get detailed breakdown
330
details = usage.details()
331
print(f"Usage details: {details}")
332
```
333
334
### Usage Limits
335
336
```python
337
from pydantic_ai import Agent, UsageLimits
338
from pydantic_ai.exceptions import UsageLimitExceeded
339
340
# Set usage limits
341
limits = UsageLimits(
342
request_limit=10,
343
total_token_limit=5000
344
)
345
346
agent = Agent(
347
model='gpt-4',
348
usage_limits=limits
349
)
350
351
try:
352
result = agent.run_sync('Generate a very long response')
353
print(f"Tokens used: {result.usage.total_tokens}")
354
except UsageLimitExceeded as e:
355
print(f"Usage limit exceeded: {e}")
356
```
357
358
### Merging Model Settings
359
360
```python
361
from pydantic_ai import Agent, ModelSettings, merge_model_settings
362
363
# Base settings
364
base_settings = ModelSettings(
365
temperature=0.7,
366
max_tokens=1000
367
)
368
369
# Override specific settings
370
override_settings = ModelSettings(
371
temperature=0.2, # Override temperature
372
seed=42 # Add seed
373
)
374
375
# Merge settings
376
final_settings = merge_model_settings(base_settings, override_settings)
377
# Result: temperature=0.2, max_tokens=1000, seed=42
378
379
agent = Agent(
380
model='gpt-4',
381
model_settings=final_settings
382
)
383
```
384
385
### Custom Timeouts
386
387
```python
388
from pydantic_ai import Agent, ModelSettings, Timeout
389
390
# Custom timeout configuration
391
timeout_config = Timeout(
392
connect=5.0, # 5 seconds to connect
393
read=60.0, # 60 seconds to read response
394
write=10.0 # 10 seconds to write request
395
)
396
397
settings = ModelSettings(timeout=timeout_config)
398
399
agent = Agent(
400
model='gpt-4',
401
model_settings=settings
402
)
403
404
# This agent will use the custom timeout settings
405
result = agent.run_sync('Generate a detailed explanation')
406
```
407
408
### Instrumentation Configuration
409
410
```python
411
from pydantic_ai import Agent, InstrumentationSettings
412
413
# Configure instrumentation
414
instrumentation = InstrumentationSettings(
415
capture_request_body=True,
416
capture_response_body=True,
417
capture_tool_calls=True,
418
capture_usage=True
419
)
420
421
agent = Agent(
422
model='gpt-4',
423
instrumented=instrumentation
424
)
425
426
# Agent will capture detailed telemetry data
427
result = agent.run_sync('Hello, world!')
428
```
429
430
### Production Configuration
431
432
```python
433
from pydantic_ai import Agent, ModelSettings, UsageLimits, Timeout
434
435
# Production-ready configuration
436
production_settings = ModelSettings(
437
temperature=0.3, # Consistent responses
438
max_tokens=2000, # Reasonable limit
439
timeout=Timeout(
440
connect=10.0,
441
read=120.0 # Allow longer responses
442
),
443
parallel_tool_calls=True,
444
extra_headers={
445
"User-Agent": "MyApp/1.0",
446
"X-Request-ID": "unique-id"
447
}
448
)
449
450
usage_limits = UsageLimits(
451
request_limit=100, # Max 100 requests per run
452
total_token_limit=50000 # Max 50k tokens per run
453
)
454
455
agent = Agent(
456
model='gpt-4',
457
model_settings=production_settings,
458
usage_limits=usage_limits,
459
system_prompt='You are a production assistant.',
460
retries=3 # Retry on failures
461
)
462
463
result = agent.run_sync('Process this user request')
464
print(f"Cost: ${result.cost:.4f}" if result.cost else "Cost not available")
465
```