or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agent.mdindex.mdmessages.mdmodels.mdoutput.mdsettings.mdstreaming.mdtools.md

settings.mddocs/

0

# Settings and Configuration

1

2

Model settings, usage tracking, and configuration options for fine-tuning agent behavior, monitoring resource consumption, and setting usage limits.

3

4

## Capabilities

5

6

### Model Settings

7

8

Comprehensive model configuration options for controlling generation behavior.

9

10

```python { .api }

11

class ModelSettings(TypedDict, total=False):

12

"""

13

Configuration options for model behavior.

14

All fields are optional and can be used to override default settings.

15

"""

16

max_tokens: int

17

temperature: float

18

top_p: float

19

timeout: float | Timeout

20

parallel_tool_calls: bool

21

seed: int

22

presence_penalty: float

23

frequency_penalty: float

24

logit_bias: dict[str, int]

25

stop_sequences: list[str]

26

extra_headers: dict[str, str]

27

extra_body: object

28

29

def merge_model_settings(

30

*settings: ModelSettings | None

31

) -> ModelSettings:

32

"""

33

Merge multiple model settings configurations.

34

35

Parameters:

36

- settings: Variable number of ModelSettings to merge

37

38

Returns:

39

Merged ModelSettings with later settings overriding earlier ones

40

"""

41

```

42

43

### Usage Tracking

44

45

Comprehensive usage metrics and tracking for monitoring resource consumption.

46

47

```python { .api }

48

class RequestUsage:

49

"""

50

Usage metrics for a single model request.

51

"""

52

input_tokens: int | None

53

output_tokens: int | None

54

cache_creation_input_tokens: int | None

55

cache_read_input_tokens: int | None

56

audio_input_tokens: int | None

57

audio_output_tokens: int | None

58

audio_cache_creation_input_tokens: int | None

59

audio_cache_read_input_tokens: int | None

60

61

@property

62

def total_tokens(self) -> int | None:

63

"""Total tokens used in this request."""

64

65

def details(self) -> dict[str, int]:

66

"""Get detailed usage breakdown as dictionary."""

67

68

class RunUsage:

69

"""

70

Usage metrics for an entire agent run.

71

"""

72

request_count: int

73

input_tokens: int | None

74

output_tokens: int | None

75

cache_creation_input_tokens: int | None

76

cache_read_input_tokens: int | None

77

audio_input_tokens: int | None

78

audio_output_tokens: int | None

79

audio_cache_creation_input_tokens: int | None

80

audio_cache_read_input_tokens: int | None

81

82

@property

83

def total_tokens(self) -> int | None:

84

"""Total tokens used across all requests in run."""

85

86

def details(self) -> dict[str, int | None]:

87

"""Get detailed usage breakdown as dictionary."""

88

89

def __add__(self, other: RunUsage) -> RunUsage:

90

"""Add two RunUsage objects together."""

91

92

# Deprecated alias for backwards compatibility

93

Usage = RunUsage

94

```

95

96

### Usage Limits

97

98

Configuration for setting and enforcing usage limits.

99

100

```python { .api }

101

class UsageLimits:

102

"""

103

Configuration for usage limits and quotas.

104

"""

105

def __init__(

106

self,

107

*,

108

request_limit: int | None = None,

109

input_token_limit: int | None = None,

110

output_token_limit: int | None = None,

111

total_token_limit: int | None = None

112

):

113

"""

114

Set usage limits for agent runs.

115

116

Parameters:

117

- request_limit: Maximum number of requests allowed

118

- input_token_limit: Maximum input tokens allowed

119

- output_token_limit: Maximum output tokens allowed

120

- total_token_limit: Maximum total tokens allowed

121

"""

122

123

def check_before_request(self, current_usage: RunUsage) -> None:

124

"""

125

Check if a new request would exceed limits.

126

127

Parameters:

128

- current_usage: Current usage metrics

129

130

Raises:

131

UsageLimitExceeded: If limits would be exceeded

132

"""

133

134

def check_after_request(

135

self,

136

current_usage: RunUsage,

137

request_usage: RequestUsage

138

) -> None:

139

"""

140

Check if usage limits have been exceeded after a request.

141

142

Parameters:

143

- current_usage: Current total usage

144

- request_usage: Usage from the latest request

145

146

Raises:

147

UsageLimitExceeded: If limits have been exceeded

148

"""

149

```

150

151

### Timeout Configuration

152

153

Timeout handling for model requests.

154

155

```python { .api }

156

class Timeout:

157

"""

158

Timeout configuration for model requests.

159

"""

160

def __init__(

161

self,

162

*,

163

connect: float | None = None,

164

read: float | None = None,

165

write: float | None = None,

166

pool: float | None = None

167

):

168

"""

169

Configure request timeouts.

170

171

Parameters:

172

- connect: Connection timeout in seconds

173

- read: Read timeout in seconds

174

- write: Write timeout in seconds

175

- pool: Pool timeout in seconds

176

"""

177

```

178

179

### Instrumentation Settings

180

181

OpenTelemetry instrumentation configuration for monitoring and debugging.

182

183

```python { .api }

184

class InstrumentationSettings:

185

"""

186

OpenTelemetry instrumentation configuration.

187

"""

188

def __init__(

189

self,

190

*,

191

capture_request_body: bool = True,

192

capture_response_body: bool = True,

193

capture_tool_calls: bool = True,

194

capture_usage: bool = True,

195

capture_model_name: bool = True

196

):

197

"""

198

Configure OpenTelemetry instrumentation.

199

200

Parameters:

201

- capture_request_body: Whether to capture request bodies

202

- capture_response_body: Whether to capture response bodies

203

- capture_tool_calls: Whether to capture tool call details

204

- capture_usage: Whether to capture usage metrics

205

- capture_model_name: Whether to capture model names

206

"""

207

```

208

209

## Model Settings Details

210

211

### Core Generation Parameters

212

213

```python

214

# Temperature: Controls randomness (0.0 = deterministic, 2.0 = very random)

215

settings = ModelSettings(temperature=0.7)

216

217

# Max tokens: Maximum tokens to generate

218

settings = ModelSettings(max_tokens=1000)

219

220

# Top-p: Nucleus sampling parameter (0.1 = conservative, 1.0 = full vocabulary)

221

settings = ModelSettings(top_p=0.9)

222

223

# Seed: For reproducible outputs

224

settings = ModelSettings(seed=42)

225

```

226

227

### Advanced Parameters

228

229

```python

230

# Penalties: Control repetition (-2.0 to 2.0)

231

settings = ModelSettings(

232

presence_penalty=0.5, # Reduce likelihood of repeating topics

233

frequency_penalty=0.3 # Reduce likelihood of repeating tokens

234

)

235

236

# Stop sequences: Strings that stop generation

237

settings = ModelSettings(stop_sequences=["END", "\n\n---"])

238

239

# Logit bias: Adjust token probabilities

240

settings = ModelSettings(

241

logit_bias={

242

"50256": -100, # Strongly discourage specific token

243

"1234": 20 # Strongly encourage specific token

244

}

245

)

246

```

247

248

### Request Configuration

249

250

```python

251

# Timeout configuration

252

settings = ModelSettings(

253

timeout=Timeout(

254

connect=10.0,

255

read=30.0,

256

write=10.0

257

)

258

)

259

260

# Tool calling configuration

261

settings = ModelSettings(parallel_tool_calls=True)

262

263

# Custom headers and body

264

settings = ModelSettings(

265

extra_headers={"Custom-Header": "value"},

266

extra_body={"custom_param": "value"}

267

)

268

```

269

270

## Usage Examples

271

272

### Basic Model Settings

273

274

```python

275

from pydantic_ai import Agent, ModelSettings

276

277

# Agent with custom model settings

278

settings = ModelSettings(

279

temperature=0.2, # More deterministic

280

max_tokens=500, # Limit response length

281

top_p=0.9 # Slightly focused sampling

282

)

283

284

agent = Agent(

285

model='gpt-4',

286

system_prompt='You are a precise technical assistant.',

287

model_settings=settings

288

)

289

290

result = agent.run_sync('Explain quantum computing')

291

```

292

293

### Runtime Model Settings Override

294

295

```python

296

from pydantic_ai import Agent, ModelSettings

297

298

agent = Agent(model='gpt-4')

299

300

# Override settings for specific run

301

creative_settings = ModelSettings(

302

temperature=1.2, # More creative

303

top_p=0.95, # Broader vocabulary

304

max_tokens=1000

305

)

306

307

result = agent.run_sync(

308

'Write a creative story',

309

model_settings=creative_settings

310

)

311

```

312

313

### Usage Tracking

314

315

```python

316

from pydantic_ai import Agent

317

318

agent = Agent(model='gpt-4')

319

result = agent.run_sync('Hello, world!')

320

321

# Access usage information

322

usage = result.usage

323

print(f"Requests made: {usage.request_count}")

324

print(f"Input tokens: {usage.input_tokens}")

325

print(f"Output tokens: {usage.output_tokens}")

326

print(f"Total tokens: {usage.total_tokens}")

327

328

# Get detailed breakdown

329

details = usage.details()

330

print(f"Usage details: {details}")

331

```

332

333

### Usage Limits

334

335

```python

336

from pydantic_ai import Agent, UsageLimits

337

from pydantic_ai.exceptions import UsageLimitExceeded

338

339

# Set usage limits

340

limits = UsageLimits(

341

request_limit=10,

342

total_token_limit=5000

343

)

344

345

agent = Agent(

346

model='gpt-4',

347

usage_limits=limits

348

)

349

350

try:

351

result = agent.run_sync('Generate a very long response')

352

print(f"Tokens used: {result.usage.total_tokens}")

353

except UsageLimitExceeded as e:

354

print(f"Usage limit exceeded: {e}")

355

```

356

357

### Merging Model Settings

358

359

```python

360

from pydantic_ai import Agent, ModelSettings, merge_model_settings

361

362

# Base settings

363

base_settings = ModelSettings(

364

temperature=0.7,

365

max_tokens=1000

366

)

367

368

# Override specific settings

369

override_settings = ModelSettings(

370

temperature=0.2, # Override temperature

371

seed=42 # Add seed

372

)

373

374

# Merge settings

375

final_settings = merge_model_settings(base_settings, override_settings)

376

# Result: temperature=0.2, max_tokens=1000, seed=42

377

378

agent = Agent(

379

model='gpt-4',

380

model_settings=final_settings

381

)

382

```

383

384

### Custom Timeouts

385

386

```python

387

from pydantic_ai import Agent, ModelSettings, Timeout

388

389

# Custom timeout configuration

390

timeout_config = Timeout(

391

connect=5.0, # 5 seconds to connect

392

read=60.0, # 60 seconds to read response

393

write=10.0 # 10 seconds to write request

394

)

395

396

settings = ModelSettings(timeout=timeout_config)

397

398

agent = Agent(

399

model='gpt-4',

400

model_settings=settings

401

)

402

403

# This agent will use the custom timeout settings

404

result = agent.run_sync('Generate a detailed explanation')

405

```

406

407

### Instrumentation Configuration

408

409

```python

410

from pydantic_ai import Agent, InstrumentationSettings

411

412

# Configure instrumentation

413

instrumentation = InstrumentationSettings(

414

capture_request_body=True,

415

capture_response_body=True,

416

capture_tool_calls=True,

417

capture_usage=True

418

)

419

420

agent = Agent(

421

model='gpt-4',

422

instrumented=instrumentation

423

)

424

425

# Agent will capture detailed telemetry data

426

result = agent.run_sync('Hello, world!')

427

```

428

429

### Production Configuration

430

431

```python

432

from pydantic_ai import Agent, ModelSettings, UsageLimits, Timeout

433

434

# Production-ready configuration

435

production_settings = ModelSettings(

436

temperature=0.3, # Consistent responses

437

max_tokens=2000, # Reasonable limit

438

timeout=Timeout(

439

connect=10.0,

440

read=120.0 # Allow longer responses

441

),

442

parallel_tool_calls=True,

443

extra_headers={

444

"User-Agent": "MyApp/1.0",

445

"X-Request-ID": "unique-id"

446

}

447

)

448

449

usage_limits = UsageLimits(

450

request_limit=100, # Max 100 requests per run

451

total_token_limit=50000 # Max 50k tokens per run

452

)

453

454

agent = Agent(

455

model='gpt-4',

456

model_settings=production_settings,

457

usage_limits=usage_limits,

458

system_prompt='You are a production assistant.',

459

retries=3 # Retry on failures

460

)

461

462

result = agent.run_sync('Process this user request')

463

print(f"Cost: ${result.cost:.4f}" if result.cost else "Cost not available")

464

```