- Spec files
pypi-pydantic-ai
Describes: pkg:pypi/pydantic-ai@0.8.x
- Description
- Agent Framework / shim to use Pydantic with LLMs
- Author
- tessl
- Last updated
settings.md docs/
1# Settings and Configuration23Model settings, usage tracking, and configuration options for fine-tuning agent behavior, monitoring resource consumption, and setting usage limits.45## Capabilities67### Model Settings89Comprehensive model configuration options for controlling generation behavior.1011```python { .api }12class ModelSettings(TypedDict, total=False):13"""14Configuration options for model behavior.15All fields are optional and can be used to override default settings.16"""17max_tokens: int18temperature: float19top_p: float20timeout: float | Timeout21parallel_tool_calls: bool22seed: int23presence_penalty: float24frequency_penalty: float25logit_bias: dict[str, int]26stop_sequences: list[str]27extra_headers: dict[str, str]28extra_body: object2930def merge_model_settings(31*settings: ModelSettings | None32) -> ModelSettings:33"""34Merge multiple model settings configurations.3536Parameters:37- settings: Variable number of ModelSettings to merge3839Returns:40Merged ModelSettings with later settings overriding earlier ones41"""42```4344### Usage Tracking4546Comprehensive usage metrics and tracking for monitoring resource consumption.4748```python { .api }49class RequestUsage:50"""51Usage metrics for a single model request.52"""53input_tokens: int | None54output_tokens: int | None55cache_creation_input_tokens: int | None56cache_read_input_tokens: int | None57audio_input_tokens: int | None58audio_output_tokens: int | None59audio_cache_creation_input_tokens: int | None60audio_cache_read_input_tokens: int | None6162@property63def total_tokens(self) -> int | None:64"""Total tokens used in this request."""6566def details(self) -> dict[str, int]:67"""Get detailed usage breakdown as dictionary."""6869class RunUsage:70"""71Usage metrics for an entire agent run.72"""73request_count: int74input_tokens: int | None75output_tokens: int | None76cache_creation_input_tokens: int | None77cache_read_input_tokens: int | None78audio_input_tokens: int | None79audio_output_tokens: int | None80audio_cache_creation_input_tokens: int | None81audio_cache_read_input_tokens: int | None8283@property84def total_tokens(self) -> int | None:85"""Total tokens used across all requests in run."""8687def details(self) -> dict[str, int | None]:88"""Get detailed usage breakdown as dictionary."""8990def __add__(self, other: RunUsage) -> RunUsage:91"""Add two RunUsage objects together."""9293# Deprecated alias for backwards compatibility94Usage = RunUsage95```9697### Usage Limits9899Configuration for setting and enforcing usage limits.100101```python { .api }102class UsageLimits:103"""104Configuration for usage limits and quotas.105"""106def __init__(107self,108*,109request_limit: int | None = None,110input_token_limit: int | None = None,111output_token_limit: int | None = None,112total_token_limit: int | None = None113):114"""115Set usage limits for agent runs.116117Parameters:118- request_limit: Maximum number of requests allowed119- input_token_limit: Maximum input tokens allowed120- output_token_limit: Maximum output tokens allowed121- total_token_limit: Maximum total tokens allowed122"""123124def check_before_request(self, current_usage: RunUsage) -> None:125"""126Check if a new request would exceed limits.127128Parameters:129- current_usage: Current usage metrics130131Raises:132UsageLimitExceeded: If limits would be exceeded133"""134135def check_after_request(136self,137current_usage: RunUsage,138request_usage: RequestUsage139) -> None:140"""141Check if usage limits have been exceeded after a request.142143Parameters:144- current_usage: Current total usage145- request_usage: Usage from the latest request146147Raises:148UsageLimitExceeded: If limits have been exceeded149"""150```151152### Timeout Configuration153154Timeout handling for model requests.155156```python { .api }157class Timeout:158"""159Timeout configuration for model requests.160"""161def __init__(162self,163*,164connect: float | None = None,165read: float | None = None,166write: float | None = None,167pool: float | None = None168):169"""170Configure request timeouts.171172Parameters:173- connect: Connection timeout in seconds174- read: Read timeout in seconds175- write: Write timeout in seconds176- pool: Pool timeout in seconds177"""178```179180### Instrumentation Settings181182OpenTelemetry instrumentation configuration for monitoring and debugging.183184```python { .api }185class InstrumentationSettings:186"""187OpenTelemetry instrumentation configuration.188"""189def __init__(190self,191*,192capture_request_body: bool = True,193capture_response_body: bool = True,194capture_tool_calls: bool = True,195capture_usage: bool = True,196capture_model_name: bool = True197):198"""199Configure OpenTelemetry instrumentation.200201Parameters:202- capture_request_body: Whether to capture request bodies203- capture_response_body: Whether to capture response bodies204- capture_tool_calls: Whether to capture tool call details205- capture_usage: Whether to capture usage metrics206- capture_model_name: Whether to capture model names207"""208```209210## Model Settings Details211212### Core Generation Parameters213214```python215# Temperature: Controls randomness (0.0 = deterministic, 2.0 = very random)216settings = ModelSettings(temperature=0.7)217218# Max tokens: Maximum tokens to generate219settings = ModelSettings(max_tokens=1000)220221# Top-p: Nucleus sampling parameter (0.1 = conservative, 1.0 = full vocabulary)222settings = ModelSettings(top_p=0.9)223224# Seed: For reproducible outputs225settings = ModelSettings(seed=42)226```227228### Advanced Parameters229230```python231# Penalties: Control repetition (-2.0 to 2.0)232settings = ModelSettings(233presence_penalty=0.5, # Reduce likelihood of repeating topics234frequency_penalty=0.3 # Reduce likelihood of repeating tokens235)236237# Stop sequences: Strings that stop generation238settings = ModelSettings(stop_sequences=["END", "\n\n---"])239240# Logit bias: Adjust token probabilities241settings = ModelSettings(242logit_bias={243"50256": -100, # Strongly discourage specific token244"1234": 20 # Strongly encourage specific token245}246)247```248249### Request Configuration250251```python252# Timeout configuration253settings = ModelSettings(254timeout=Timeout(255connect=10.0,256read=30.0,257write=10.0258)259)260261# Tool calling configuration262settings = ModelSettings(parallel_tool_calls=True)263264# Custom headers and body265settings = ModelSettings(266extra_headers={"Custom-Header": "value"},267extra_body={"custom_param": "value"}268)269```270271## Usage Examples272273### Basic Model Settings274275```python276from pydantic_ai import Agent, ModelSettings277278# Agent with custom model settings279settings = ModelSettings(280temperature=0.2, # More deterministic281max_tokens=500, # Limit response length282top_p=0.9 # Slightly focused sampling283)284285agent = Agent(286model='gpt-4',287system_prompt='You are a precise technical assistant.',288model_settings=settings289)290291result = agent.run_sync('Explain quantum computing')292```293294### Runtime Model Settings Override295296```python297from pydantic_ai import Agent, ModelSettings298299agent = Agent(model='gpt-4')300301# Override settings for specific run302creative_settings = ModelSettings(303temperature=1.2, # More creative304top_p=0.95, # Broader vocabulary305max_tokens=1000306)307308result = agent.run_sync(309'Write a creative story',310model_settings=creative_settings311)312```313314### Usage Tracking315316```python317from pydantic_ai import Agent318319agent = Agent(model='gpt-4')320result = agent.run_sync('Hello, world!')321322# Access usage information323usage = result.usage324print(f"Requests made: {usage.request_count}")325print(f"Input tokens: {usage.input_tokens}")326print(f"Output tokens: {usage.output_tokens}")327print(f"Total tokens: {usage.total_tokens}")328329# Get detailed breakdown330details = usage.details()331print(f"Usage details: {details}")332```333334### Usage Limits335336```python337from pydantic_ai import Agent, UsageLimits338from pydantic_ai.exceptions import UsageLimitExceeded339340# Set usage limits341limits = UsageLimits(342request_limit=10,343total_token_limit=5000344)345346agent = Agent(347model='gpt-4',348usage_limits=limits349)350351try:352result = agent.run_sync('Generate a very long response')353print(f"Tokens used: {result.usage.total_tokens}")354except UsageLimitExceeded as e:355print(f"Usage limit exceeded: {e}")356```357358### Merging Model Settings359360```python361from pydantic_ai import Agent, ModelSettings, merge_model_settings362363# Base settings364base_settings = ModelSettings(365temperature=0.7,366max_tokens=1000367)368369# Override specific settings370override_settings = ModelSettings(371temperature=0.2, # Override temperature372seed=42 # Add seed373)374375# Merge settings376final_settings = merge_model_settings(base_settings, override_settings)377# Result: temperature=0.2, max_tokens=1000, seed=42378379agent = Agent(380model='gpt-4',381model_settings=final_settings382)383```384385### Custom Timeouts386387```python388from pydantic_ai import Agent, ModelSettings, Timeout389390# Custom timeout configuration391timeout_config = Timeout(392connect=5.0, # 5 seconds to connect393read=60.0, # 60 seconds to read response394write=10.0 # 10 seconds to write request395)396397settings = ModelSettings(timeout=timeout_config)398399agent = Agent(400model='gpt-4',401model_settings=settings402)403404# This agent will use the custom timeout settings405result = agent.run_sync('Generate a detailed explanation')406```407408### Instrumentation Configuration409410```python411from pydantic_ai import Agent, InstrumentationSettings412413# Configure instrumentation414instrumentation = InstrumentationSettings(415capture_request_body=True,416capture_response_body=True,417capture_tool_calls=True,418capture_usage=True419)420421agent = Agent(422model='gpt-4',423instrumented=instrumentation424)425426# Agent will capture detailed telemetry data427result = agent.run_sync('Hello, world!')428```429430### Production Configuration431432```python433from pydantic_ai import Agent, ModelSettings, UsageLimits, Timeout434435# Production-ready configuration436production_settings = ModelSettings(437temperature=0.3, # Consistent responses438max_tokens=2000, # Reasonable limit439timeout=Timeout(440connect=10.0,441read=120.0 # Allow longer responses442),443parallel_tool_calls=True,444extra_headers={445"User-Agent": "MyApp/1.0",446"X-Request-ID": "unique-id"447}448)449450usage_limits = UsageLimits(451request_limit=100, # Max 100 requests per run452total_token_limit=50000 # Max 50k tokens per run453)454455agent = Agent(456model='gpt-4',457model_settings=production_settings,458usage_limits=usage_limits,459system_prompt='You are a production assistant.',460retries=3 # Retry on failures461)462463result = agent.run_sync('Process this user request')464print(f"Cost: ${result.cost:.4f}" if result.cost else "Cost not available")465```