0
# Settings and Configuration
1
2
Model settings, usage tracking, and configuration options for fine-tuning agent behavior, monitoring resource consumption, and setting usage limits.
3
4
## Capabilities
5
6
### Model Settings
7
8
Comprehensive model configuration options for controlling generation behavior.
9
10
```python { .api }
11
class ModelSettings(TypedDict, total=False):
12
"""
13
Configuration options for model behavior.
14
All fields are optional and can be used to override default settings.
15
"""
16
max_tokens: int
17
temperature: float
18
top_p: float
19
timeout: float | Timeout
20
parallel_tool_calls: bool
21
seed: int
22
presence_penalty: float
23
frequency_penalty: float
24
logit_bias: dict[str, int]
25
stop_sequences: list[str]
26
extra_headers: dict[str, str]
27
extra_body: object
28
29
def merge_model_settings(
30
*settings: ModelSettings | None
31
) -> ModelSettings:
32
"""
33
Merge multiple model settings configurations.
34
35
Parameters:
36
- settings: Variable number of ModelSettings to merge
37
38
Returns:
39
Merged ModelSettings with later settings overriding earlier ones
40
"""
41
```
42
43
### Usage Tracking
44
45
Comprehensive usage metrics and tracking for monitoring resource consumption.
46
47
```python { .api }
48
class RequestUsage:
49
"""
50
Usage metrics for a single model request.
51
"""
52
input_tokens: int | None
53
output_tokens: int | None
54
cache_creation_input_tokens: int | None
55
cache_read_input_tokens: int | None
56
audio_input_tokens: int | None
57
audio_output_tokens: int | None
58
audio_cache_creation_input_tokens: int | None
59
audio_cache_read_input_tokens: int | None
60
61
@property
62
def total_tokens(self) -> int | None:
63
"""Total tokens used in this request."""
64
65
def details(self) -> dict[str, int]:
66
"""Get detailed usage breakdown as dictionary."""
67
68
class RunUsage:
69
"""
70
Usage metrics for an entire agent run.
71
"""
72
request_count: int
73
input_tokens: int | None
74
output_tokens: int | None
75
cache_creation_input_tokens: int | None
76
cache_read_input_tokens: int | None
77
audio_input_tokens: int | None
78
audio_output_tokens: int | None
79
audio_cache_creation_input_tokens: int | None
80
audio_cache_read_input_tokens: int | None
81
82
@property
83
def total_tokens(self) -> int | None:
84
"""Total tokens used across all requests in run."""
85
86
def details(self) -> dict[str, int | None]:
87
"""Get detailed usage breakdown as dictionary."""
88
89
def __add__(self, other: RunUsage) -> RunUsage:
90
"""Add two RunUsage objects together."""
91
92
# Deprecated alias for backwards compatibility
93
Usage = RunUsage
94
```
95
96
### Usage Limits
97
98
Configuration for setting and enforcing usage limits.
99
100
```python { .api }
101
class UsageLimits:
102
"""
103
Configuration for usage limits and quotas.
104
"""
105
def __init__(
106
self,
107
*,
108
request_limit: int | None = None,
109
input_token_limit: int | None = None,
110
output_token_limit: int | None = None,
111
total_token_limit: int | None = None
112
):
113
"""
114
Set usage limits for agent runs.
115
116
Parameters:
117
- request_limit: Maximum number of requests allowed
118
- input_token_limit: Maximum input tokens allowed
119
- output_token_limit: Maximum output tokens allowed
120
- total_token_limit: Maximum total tokens allowed
121
"""
122
123
def check_before_request(self, current_usage: RunUsage) -> None:
124
"""
125
Check if a new request would exceed limits.
126
127
Parameters:
128
- current_usage: Current usage metrics
129
130
Raises:
131
UsageLimitExceeded: If limits would be exceeded
132
"""
133
134
def check_after_request(
135
self,
136
current_usage: RunUsage,
137
request_usage: RequestUsage
138
) -> None:
139
"""
140
Check if usage limits have been exceeded after a request.
141
142
Parameters:
143
- current_usage: Current total usage
144
- request_usage: Usage from the latest request
145
146
Raises:
147
UsageLimitExceeded: If limits have been exceeded
148
"""
149
```
150
151
### Timeout Configuration
152
153
Timeout handling for model requests.
154
155
```python { .api }
156
class Timeout:
157
"""
158
Timeout configuration for model requests.
159
"""
160
def __init__(
161
self,
162
*,
163
connect: float | None = None,
164
read: float | None = None,
165
write: float | None = None,
166
pool: float | None = None
167
):
168
"""
169
Configure request timeouts.
170
171
Parameters:
172
- connect: Connection timeout in seconds
173
- read: Read timeout in seconds
174
- write: Write timeout in seconds
175
- pool: Pool timeout in seconds
176
"""
177
```
178
179
### Instrumentation Settings
180
181
OpenTelemetry instrumentation configuration for monitoring and debugging.
182
183
```python { .api }
184
class InstrumentationSettings:
185
"""
186
OpenTelemetry instrumentation configuration.
187
"""
188
def __init__(
189
self,
190
*,
191
capture_request_body: bool = True,
192
capture_response_body: bool = True,
193
capture_tool_calls: bool = True,
194
capture_usage: bool = True,
195
capture_model_name: bool = True
196
):
197
"""
198
Configure OpenTelemetry instrumentation.
199
200
Parameters:
201
- capture_request_body: Whether to capture request bodies
202
- capture_response_body: Whether to capture response bodies
203
- capture_tool_calls: Whether to capture tool call details
204
- capture_usage: Whether to capture usage metrics
205
- capture_model_name: Whether to capture model names
206
"""
207
```
208
209
## Model Settings Details
210
211
### Core Generation Parameters
212
213
```python
214
# Temperature: Controls randomness (0.0 = deterministic, 2.0 = very random)
215
settings = ModelSettings(temperature=0.7)
216
217
# Max tokens: Maximum tokens to generate
218
settings = ModelSettings(max_tokens=1000)
219
220
# Top-p: Nucleus sampling parameter (0.1 = conservative, 1.0 = full vocabulary)
221
settings = ModelSettings(top_p=0.9)
222
223
# Seed: For reproducible outputs
224
settings = ModelSettings(seed=42)
225
```
226
227
### Advanced Parameters
228
229
```python
230
# Penalties: Control repetition (-2.0 to 2.0)
231
settings = ModelSettings(
232
presence_penalty=0.5, # Reduce likelihood of repeating topics
233
frequency_penalty=0.3 # Reduce likelihood of repeating tokens
234
)
235
236
# Stop sequences: Strings that stop generation
237
settings = ModelSettings(stop_sequences=["END", "\n\n---"])
238
239
# Logit bias: Adjust token probabilities
240
settings = ModelSettings(
241
logit_bias={
242
"50256": -100, # Strongly discourage specific token
243
"1234": 20 # Strongly encourage specific token
244
}
245
)
246
```
247
248
### Request Configuration
249
250
```python
251
# Timeout configuration
252
settings = ModelSettings(
253
timeout=Timeout(
254
connect=10.0,
255
read=30.0,
256
write=10.0
257
)
258
)
259
260
# Tool calling configuration
261
settings = ModelSettings(parallel_tool_calls=True)
262
263
# Custom headers and body
264
settings = ModelSettings(
265
extra_headers={"Custom-Header": "value"},
266
extra_body={"custom_param": "value"}
267
)
268
```
269
270
## Usage Examples
271
272
### Basic Model Settings
273
274
```python
275
from pydantic_ai import Agent, ModelSettings
276
277
# Agent with custom model settings
278
settings = ModelSettings(
279
temperature=0.2, # More deterministic
280
max_tokens=500, # Limit response length
281
top_p=0.9 # Slightly focused sampling
282
)
283
284
agent = Agent(
285
model='gpt-4',
286
system_prompt='You are a precise technical assistant.',
287
model_settings=settings
288
)
289
290
result = agent.run_sync('Explain quantum computing')
291
```
292
293
### Runtime Model Settings Override
294
295
```python
296
from pydantic_ai import Agent, ModelSettings
297
298
agent = Agent(model='gpt-4')
299
300
# Override settings for specific run
301
creative_settings = ModelSettings(
302
temperature=1.2, # More creative
303
top_p=0.95, # Broader vocabulary
304
max_tokens=1000
305
)
306
307
result = agent.run_sync(
308
'Write a creative story',
309
model_settings=creative_settings
310
)
311
```
312
313
### Usage Tracking
314
315
```python
316
from pydantic_ai import Agent
317
318
agent = Agent(model='gpt-4')
319
result = agent.run_sync('Hello, world!')
320
321
# Access usage information
322
usage = result.usage
323
print(f"Requests made: {usage.request_count}")
324
print(f"Input tokens: {usage.input_tokens}")
325
print(f"Output tokens: {usage.output_tokens}")
326
print(f"Total tokens: {usage.total_tokens}")
327
328
# Get detailed breakdown
329
details = usage.details()
330
print(f"Usage details: {details}")
331
```
332
333
### Usage Limits
334
335
```python
336
from pydantic_ai import Agent, UsageLimits
337
from pydantic_ai.exceptions import UsageLimitExceeded
338
339
# Set usage limits
340
limits = UsageLimits(
341
request_limit=10,
342
total_token_limit=5000
343
)
344
345
agent = Agent(
346
model='gpt-4',
347
usage_limits=limits
348
)
349
350
try:
351
result = agent.run_sync('Generate a very long response')
352
print(f"Tokens used: {result.usage.total_tokens}")
353
except UsageLimitExceeded as e:
354
print(f"Usage limit exceeded: {e}")
355
```
356
357
### Merging Model Settings
358
359
```python
360
from pydantic_ai import Agent, ModelSettings, merge_model_settings
361
362
# Base settings
363
base_settings = ModelSettings(
364
temperature=0.7,
365
max_tokens=1000
366
)
367
368
# Override specific settings
369
override_settings = ModelSettings(
370
temperature=0.2, # Override temperature
371
seed=42 # Add seed
372
)
373
374
# Merge settings
375
final_settings = merge_model_settings(base_settings, override_settings)
376
# Result: temperature=0.2, max_tokens=1000, seed=42
377
378
agent = Agent(
379
model='gpt-4',
380
model_settings=final_settings
381
)
382
```
383
384
### Custom Timeouts
385
386
```python
387
from pydantic_ai import Agent, ModelSettings, Timeout
388
389
# Custom timeout configuration
390
timeout_config = Timeout(
391
connect=5.0, # 5 seconds to connect
392
read=60.0, # 60 seconds to read response
393
write=10.0 # 10 seconds to write request
394
)
395
396
settings = ModelSettings(timeout=timeout_config)
397
398
agent = Agent(
399
model='gpt-4',
400
model_settings=settings
401
)
402
403
# This agent will use the custom timeout settings
404
result = agent.run_sync('Generate a detailed explanation')
405
```
406
407
### Instrumentation Configuration
408
409
```python
410
from pydantic_ai import Agent, InstrumentationSettings
411
412
# Configure instrumentation
413
instrumentation = InstrumentationSettings(
414
capture_request_body=True,
415
capture_response_body=True,
416
capture_tool_calls=True,
417
capture_usage=True
418
)
419
420
agent = Agent(
421
model='gpt-4',
422
instrumented=instrumentation
423
)
424
425
# Agent will capture detailed telemetry data
426
result = agent.run_sync('Hello, world!')
427
```
428
429
### Production Configuration
430
431
```python
432
from pydantic_ai import Agent, ModelSettings, UsageLimits, Timeout
433
434
# Production-ready configuration
435
production_settings = ModelSettings(
436
temperature=0.3, # Consistent responses
437
max_tokens=2000, # Reasonable limit
438
timeout=Timeout(
439
connect=10.0,
440
read=120.0 # Allow longer responses
441
),
442
parallel_tool_calls=True,
443
extra_headers={
444
"User-Agent": "MyApp/1.0",
445
"X-Request-ID": "unique-id"
446
}
447
)
448
449
usage_limits = UsageLimits(
450
request_limit=100, # Max 100 requests per run
451
total_token_limit=50000 # Max 50k tokens per run
452
)
453
454
agent = Agent(
455
model='gpt-4',
456
model_settings=production_settings,
457
usage_limits=usage_limits,
458
system_prompt='You are a production assistant.',
459
retries=3 # Retry on failures
460
)
461
462
result = agent.run_sync('Process this user request')
463
print(f"Cost: ${result.cost:.4f}" if result.cost else "Cost not available")
464
```