or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

batch-processing.mdclient-usage.mddsl-components.mdindex.mdmodes-and-configuration.mdproviders.mdschema-generation.mdvalidation.md

schema-generation.mddocs/

0

# Schema Generation

1

2

The instructor package provides comprehensive schema generation utilities for converting Pydantic models to provider-specific formats. These utilities enable seamless integration with different LLM providers while maintaining type safety.

3

4

## Provider-Specific Schema Functions

5

6

### OpenAI Schema Generation

7

8

Generate OpenAI-compatible function schemas from Pydantic models.

9

10

```python { .api }

11

def generate_openai_schema(

12

model: Type[BaseModel],

13

name: Optional[str] = None,

14

description: Optional[str] = None,

15

**kwargs: Any

16

) -> Dict[str, Any]:

17

"""

18

Generate OpenAI function schema from Pydantic model.

19

20

Args:

21

model: Pydantic model class to convert

22

name: Optional custom function name

23

description: Optional custom function description

24

**kwargs: Additional schema configuration options

25

26

Returns:

27

OpenAI function schema dictionary

28

"""

29

```

30

31

#### Usage Examples

32

33

```python { .api }

34

from instructor import generate_openai_schema

35

from pydantic import BaseModel, Field

36

from typing import List, Optional

37

38

class UserProfile(BaseModel):

39

"""User profile information."""

40

name: str = Field(..., description="Full name of the user")

41

age: int = Field(..., ge=0, le=150, description="Age in years")

42

email: str = Field(..., description="Email address")

43

interests: List[str] = Field(default=[], description="List of interests")

44

is_premium: bool = Field(default=False, description="Premium membership status")

45

46

# Generate OpenAI schema

47

openai_schema = generate_openai_schema(

48

UserProfile,

49

name="extract_user_profile",

50

description="Extract user profile information from text"

51

)

52

53

print(openai_schema)

54

# Output:

55

# {

56

# "name": "extract_user_profile",

57

# "description": "Extract user profile information from text",

58

# "parameters": {

59

# "type": "object",

60

# "properties": {

61

# "name": {"type": "string", "description": "Full name of the user"},

62

# "age": {"type": "integer", "minimum": 0, "maximum": 150, "description": "Age in years"},

63

# "email": {"type": "string", "description": "Email address"},

64

# "interests": {"type": "array", "items": {"type": "string"}, "description": "List of interests"},

65

# "is_premium": {"type": "boolean", "description": "Premium membership status"}

66

# },

67

# "required": ["name", "age", "email"]

68

# }

69

# }

70

71

# Use with OpenAI client directly

72

import openai

73

client = openai.OpenAI()

74

75

response = client.chat.completions.create(

76

model="gpt-4",

77

messages=[{"role": "user", "content": "Extract: John Doe, 25, john@example.com"}],

78

functions=[openai_schema],

79

function_call={"name": "extract_user_profile"}

80

)

81

```

82

83

### Anthropic Schema Generation

84

85

Generate Anthropic-compatible tool schemas from Pydantic models.

86

87

```python { .api }

88

def generate_anthropic_schema(

89

model: Type[BaseModel],

90

name: Optional[str] = None,

91

description: Optional[str] = None,

92

**kwargs: Any

93

) -> Dict[str, Any]:

94

"""

95

Generate Anthropic tool schema from Pydantic model.

96

97

Args:

98

model: Pydantic model class to convert

99

name: Optional custom tool name

100

description: Optional custom tool description

101

**kwargs: Additional schema configuration options

102

103

Returns:

104

Anthropic tool schema dictionary

105

"""

106

```

107

108

#### Usage Examples

109

110

```python { .api }

111

from instructor import generate_anthropic_schema

112

113

class ProductInfo(BaseModel):

114

"""Product information extraction."""

115

name: str = Field(..., description="Product name")

116

price: float = Field(..., gt=0, description="Product price in USD")

117

category: str = Field(..., description="Product category")

118

features: List[str] = Field(default=[], description="Key product features")

119

in_stock: bool = Field(..., description="Whether product is in stock")

120

121

# Generate Anthropic schema

122

anthropic_schema = generate_anthropic_schema(

123

ProductInfo,

124

name="extract_product_info",

125

description="Extract structured product information"

126

)

127

128

print(anthropic_schema)

129

# Output:

130

# {

131

# "name": "extract_product_info",

132

# "description": "Extract structured product information",

133

# "input_schema": {

134

# "type": "object",

135

# "properties": {

136

# "name": {"type": "string", "description": "Product name"},

137

# "price": {"type": "number", "minimum": 0, "exclusiveMinimum": True, "description": "Product price in USD"},

138

# "category": {"type": "string", "description": "Product category"},

139

# "features": {"type": "array", "items": {"type": "string"}, "description": "Key product features"},

140

# "in_stock": {"type": "boolean", "description": "Whether product is in stock"}

141

# },

142

# "required": ["name", "price", "category", "in_stock"]

143

# }

144

# }

145

146

# Use with Anthropic client directly

147

import anthropic

148

client = anthropic.Anthropic()

149

150

response = client.messages.create(

151

model="claude-3-sonnet-20240229",

152

max_tokens=1000,

153

messages=[{"role": "user", "content": "Extract product: iPhone 15 Pro, $999, Smartphones"}],

154

tools=[anthropic_schema]

155

)

156

```

157

158

### Gemini Schema Generation

159

160

Generate Google Gemini-compatible function schemas from Pydantic models.

161

162

```python { .api }

163

def generate_gemini_schema(

164

model: Type[BaseModel],

165

name: Optional[str] = None,

166

description: Optional[str] = None,

167

**kwargs: Any

168

) -> Dict[str, Any]:

169

"""

170

Generate Gemini function schema from Pydantic model.

171

172

Args:

173

model: Pydantic model class to convert

174

name: Optional custom function name

175

description: Optional custom function description

176

**kwargs: Additional schema configuration options

177

178

Returns:

179

Gemini function schema dictionary

180

"""

181

```

182

183

#### Usage Examples

184

185

```python { .api }

186

from instructor import generate_gemini_schema

187

188

class EventInfo(BaseModel):

189

"""Event information extraction."""

190

title: str = Field(..., description="Event title")

191

date: str = Field(..., description="Event date (YYYY-MM-DD format)")

192

location: str = Field(..., description="Event location")

193

attendees: Optional[int] = Field(None, ge=0, description="Expected number of attendees")

194

is_virtual: bool = Field(default=False, description="Whether event is virtual")

195

196

# Generate Gemini schema

197

gemini_schema = generate_gemini_schema(

198

EventInfo,

199

name="extract_event_info",

200

description="Extract event details from text"

201

)

202

203

print(gemini_schema)

204

# Output format compatible with Google Gemini function calling

205

206

# Use with Gemini client

207

import google.generativeai as genai

208

209

model = genai.GenerativeModel('gemini-pro')

210

response = model.generate_content(

211

"Extract: Tech Conference 2024, January 15th, San Francisco Convention Center",

212

tools=[genai.protos.Tool(function_declarations=[gemini_schema])]

213

)

214

```

215

216

## OpenAI Schema Base Classes

217

218

### OpenAISchema Base Class

219

220

Base class for creating OpenAI-compatible schema models.

221

222

```python { .api }

223

class OpenAISchema(BaseModel):

224

"""

225

Base class for OpenAI-compatible schema models.

226

227

Provides automatic schema generation and OpenAI integration

228

capabilities for Pydantic models.

229

"""

230

231

@classmethod

232

def openai_schema(cls) -> Dict[str, Any]:

233

"""

234

Generate OpenAI function schema for this model.

235

236

Returns:

237

OpenAI function schema dictionary

238

"""

239

240

@classmethod

241

def from_response(cls, response: Any) -> 'OpenAISchema':

242

"""

243

Create model instance from OpenAI response.

244

245

Args:

246

response: OpenAI API response containing function call

247

248

Returns:

249

Model instance with extracted data

250

"""

251

252

def to_openai_function_call(self) -> Dict[str, Any]:

253

"""

254

Convert model instance to OpenAI function call format.

255

256

Returns:

257

OpenAI function call dictionary

258

"""

259

```

260

261

### openai_schema Decorator

262

263

Decorator function for automatic schema generation and registration.

264

265

```python { .api }

266

def openai_schema(

267

name: Optional[str] = None,

268

description: Optional[str] = None,

269

**kwargs: Any

270

) -> Callable[[Type[BaseModel]], Type[OpenAISchema]]:

271

"""

272

Decorator for automatic OpenAI schema generation.

273

274

Args:

275

name: Optional custom function name

276

description: Optional custom function description

277

**kwargs: Additional schema configuration options

278

279

Returns:

280

Decorator function that converts model to OpenAISchema

281

"""

282

```

283

284

#### Usage Examples

285

286

```python { .api }

287

from instructor import OpenAISchema, openai_schema

288

289

# Using base class

290

class ContactInfo(OpenAISchema):

291

"""Contact information extraction."""

292

name: str = Field(..., description="Contact name")

293

phone: str = Field(..., description="Phone number")

294

email: str = Field(..., description="Email address")

295

296

# Generate schema

297

schema = ContactInfo.openai_schema()

298

print(schema["name"]) # "ContactInfo"

299

300

# Using decorator

301

@openai_schema(

302

name="extract_contact",

303

description="Extract contact information from text"

304

)

305

class DecoratedContact(BaseModel):

306

name: str = Field(..., description="Contact name")

307

company: str = Field(..., description="Company name")

308

309

# Schema automatically generated with custom name/description

310

schema = DecoratedContact.openai_schema()

311

print(schema["name"]) # "extract_contact"

312

```

313

314

## Advanced Schema Configuration

315

316

### Complex Data Types

317

318

```python { .api }

319

from typing import Union, Literal, Dict, Any

320

from enum import Enum

321

from datetime import datetime

322

323

class Priority(str, Enum):

324

LOW = "low"

325

MEDIUM = "medium"

326

HIGH = "high"

327

URGENT = "urgent"

328

329

class TaskStatus(str, Enum):

330

PENDING = "pending"

331

IN_PROGRESS = "in_progress"

332

COMPLETED = "completed"

333

CANCELLED = "cancelled"

334

335

class Task(BaseModel):

336

"""Complex task model with various data types."""

337

338

title: str = Field(..., description="Task title")

339

description: Optional[str] = Field(None, description="Detailed description")

340

priority: Priority = Field(..., description="Task priority level")

341

status: TaskStatus = Field(default=TaskStatus.PENDING, description="Current status")

342

343

# Union types

344

assignee: Union[str, int] = Field(..., description="Assignee name or ID")

345

346

# Literal types

347

task_type: Literal["bug", "feature", "improvement"] = Field(..., description="Type of task")

348

349

# Complex nested objects

350

metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")

351

352

# Date handling

353

due_date: Optional[str] = Field(None, description="Due date in ISO format")

354

created_at: str = Field(default_factory=lambda: datetime.now().isoformat())

355

356

# Generate schemas for different providers

357

openai_schema = generate_openai_schema(Task)

358

anthropic_schema = generate_anthropic_schema(Task)

359

gemini_schema = generate_gemini_schema(Task)

360

361

# Each provider handles enums, unions, and complex types appropriately

362

```

363

364

### Nested Models

365

366

```python { .api }

367

class Address(BaseModel):

368

"""Address information."""

369

street: str = Field(..., description="Street address")

370

city: str = Field(..., description="City name")

371

state: str = Field(..., description="State/province")

372

zip_code: str = Field(..., description="ZIP/postal code")

373

country: str = Field(default="USA", description="Country")

374

375

class Company(BaseModel):

376

"""Company information."""

377

name: str = Field(..., description="Company name")

378

industry: str = Field(..., description="Industry sector")

379

employee_count: Optional[int] = Field(None, ge=1, description="Number of employees")

380

address: Address = Field(..., description="Company address")

381

382

class Employee(BaseModel):

383

"""Employee profile with nested company info."""

384

name: str = Field(..., description="Employee name")

385

position: str = Field(..., description="Job title/position")

386

salary: Optional[float] = Field(None, gt=0, description="Annual salary")

387

company: Company = Field(..., description="Company information")

388

389

# Multiple nested models

390

emergency_contacts: List[ContactInfo] = Field(

391

default=[],

392

description="Emergency contact information"

393

)

394

395

# Nested models are properly handled in schema generation

396

employee_schema = generate_openai_schema(Employee)

397

398

# The generated schema includes proper nesting:

399

# properties.company.properties.address.properties.street, etc.

400

```

401

402

### Schema Customization

403

404

```python { .api }

405

def custom_schema_generator(

406

model: Type[BaseModel],

407

provider: str = "openai",

408

custom_types: Dict[str, Any] = None,

409

exclude_fields: List[str] = None,

410

**kwargs: Any

411

) -> Dict[str, Any]:

412

"""

413

Custom schema generator with additional configuration options.

414

415

Args:

416

model: Pydantic model to convert

417

provider: Target provider ("openai", "anthropic", "gemini")

418

custom_types: Custom type mappings for specific fields

419

exclude_fields: Fields to exclude from schema

420

**kwargs: Additional provider-specific options

421

422

Returns:

423

Customized schema dictionary

424

"""

425

426

# Get base schema

427

if provider == "openai":

428

schema = generate_openai_schema(model, **kwargs)

429

elif provider == "anthropic":

430

schema = generate_anthropic_schema(model, **kwargs)

431

elif provider == "gemini":

432

schema = generate_gemini_schema(model, **kwargs)

433

else:

434

raise ValueError(f"Unsupported provider: {provider}")

435

436

# Apply customizations

437

if exclude_fields:

438

properties = schema.get("parameters", {}).get("properties", {})

439

for field in exclude_fields:

440

properties.pop(field, None)

441

442

if custom_types:

443

properties = schema.get("parameters", {}).get("properties", {})

444

for field, custom_type in custom_types.items():

445

if field in properties:

446

properties[field].update(custom_type)

447

448

return schema

449

450

# Usage

451

class FlexibleModel(BaseModel):

452

name: str

453

age: int

454

score: float

455

metadata: Dict[str, Any]

456

457

# Customize schema generation

458

custom_schema = custom_schema_generator(

459

FlexibleModel,

460

provider="openai",

461

exclude_fields=["metadata"], # Don't include metadata in schema

462

custom_types={

463

"score": {"minimum": 0.0, "maximum": 100.0} # Add score constraints

464

},

465

name="flexible_extraction"

466

)

467

```

468

469

## Schema Validation and Testing

470

471

```python { .api }

472

from jsonschema import validate, ValidationError

473

474

def validate_generated_schema(

475

model: Type[BaseModel],

476

provider: str = "openai"

477

) -> bool:

478

"""

479

Validate that generated schema is properly formed.

480

481

Args:

482

model: Pydantic model to test

483

provider: Provider to generate schema for

484

485

Returns:

486

True if schema is valid

487

"""

488

489

try:

490

if provider == "openai":

491

schema = generate_openai_schema(model)

492

493

# Validate OpenAI function schema format

494

required_keys = ["name", "parameters"]

495

for key in required_keys:

496

if key not in schema:

497

raise ValueError(f"Missing required key: {key}")

498

499

# Validate parameters schema

500

params = schema["parameters"]

501

if params.get("type") != "object":

502

raise ValueError("Parameters must be object type")

503

504

elif provider == "anthropic":

505

schema = generate_anthropic_schema(model)

506

507

# Validate Anthropic tool schema format

508

required_keys = ["name", "input_schema"]

509

for key in required_keys:

510

if key not in schema:

511

raise ValueError(f"Missing required key: {key}")

512

513

return True

514

515

except Exception as e:

516

print(f"Schema validation failed: {e}")

517

return False

518

519

# Test schema generation

520

models_to_test = [UserProfile, ProductInfo, Task, Employee]

521

522

for model in models_to_test:

523

for provider in ["openai", "anthropic", "gemini"]:

524

is_valid = validate_generated_schema(model, provider)

525

print(f"{model.__name__} - {provider}: {'✓' if is_valid else '✗'}")

526

```

527

528

## Performance Optimization

529

530

```python { .api }

531

from functools import lru_cache

532

from typing import TypeVar

533

534

ModelType = TypeVar('ModelType', bound=BaseModel)

535

536

@lru_cache(maxsize=128)

537

def cached_schema_generation(

538

model_name: str,

539

provider: str = "openai"

540

) -> Dict[str, Any]:

541

"""

542

Cached schema generation for improved performance.

543

544

Args:

545

model_name: String identifier for the model

546

provider: Provider to generate schema for

547

548

Returns:

549

Cached generated schema

550

"""

551

552

# This would need a registry of models by name

553

# Implementation depends on your specific use case

554

pass

555

556

class SchemaRegistry:

557

"""Registry for managing and caching generated schemas."""

558

559

def __init__(self):

560

self._schemas: Dict[str, Dict[str, Any]] = {}

561

self._models: Dict[str, Type[BaseModel]] = {}

562

563

def register_model(

564

self,

565

name: str,

566

model: Type[BaseModel]

567

) -> None:

568

"""Register a model in the schema registry."""

569

self._models[name] = model

570

571

def get_schema(

572

self,

573

model_name: str,

574

provider: str = "openai"

575

) -> Dict[str, Any]:

576

"""Get schema from registry, generating if necessary."""

577

578

cache_key = f"{model_name}:{provider}"

579

580

if cache_key not in self._schemas:

581

if model_name not in self._models:

582

raise ValueError(f"Model {model_name} not registered")

583

584

model = self._models[model_name]

585

586

if provider == "openai":

587

schema = generate_openai_schema(model)

588

elif provider == "anthropic":

589

schema = generate_anthropic_schema(model)

590

elif provider == "gemini":

591

schema = generate_gemini_schema(model)

592

else:

593

raise ValueError(f"Unsupported provider: {provider}")

594

595

self._schemas[cache_key] = schema

596

597

return self._schemas[cache_key]

598

599

# Usage

600

registry = SchemaRegistry()

601

registry.register_model("user_profile", UserProfile)

602

registry.register_model("product_info", ProductInfo)

603

604

# Fast schema retrieval

605

openai_user_schema = registry.get_schema("user_profile", "openai")

606

anthropic_product_schema = registry.get_schema("product_info", "anthropic")

607

```