Structured outputs for LLMs with type safety, validation, and automatic retries
—
The instructor package provides a domain-specific language (DSL) for advanced extraction patterns. These components enable optional extraction, streaming validation, multi-task extraction, and citation tracking.
Optional result wrapper for handling cases where extraction might fail or return no data.
def Maybe(model: type[T]) -> type[MaybeBase[T]]:
"""
Create optional result wrapper.
Args:
model: Pydantic model class to wrap
Returns:
MaybeBase subclass that can represent success or failure
"""
class MaybeBase[T]:
"""Base class for Maybe results."""
result: T | None
error: bool
message: str | None
def __init__(
self,
result: T | None = None,
error: bool = False,
message: str | None = None
) -> None:
"""
Initialize Maybe result.
Args:
result: The extracted model instance (None if failed)
error: Whether an error occurred during extraction
message: Optional error message or explanation
"""
@property
def is_success(self) -> bool:
"""Check if extraction was successful."""
@property
def is_failure(self) -> bool:
"""Check if extraction failed."""
def unwrap(self) -> T:
"""
Get the result, raising exception if failed.
Returns:
The extracted model instance
Raises:
ValueError: If extraction failed
"""
def unwrap_or(self, default: T) -> T:
"""
Get the result or return default if failed.
Args:
default: Value to return if extraction failed
Returns:
The extracted model or default value
"""from instructor import Maybe
from pydantic import BaseModel
class User(BaseModel):
name: str
email: str
age: int
# Create optional wrapper
OptionalUser = Maybe(User)
# Use in extraction
maybe_user = client.create(
model="gpt-4",
messages=[{"role": "user", "content": "No user data here"}],
response_model=OptionalUser
)
# Check result
if maybe_user.is_success:
user = maybe_user.unwrap()
print(f"Extracted: {user.name}")
else:
print(f"Extraction failed: {maybe_user.message}")
# Use with default
user = maybe_user.unwrap_or(User(name="Unknown", email="", age=0))
# Pattern matching style
match maybe_user:
case OptionalUser(result=user) if maybe_user.is_success:
print(f"Success: {user}")
case OptionalUser(error=True, message=msg):
print(f"Failed: {msg}")Generic class for streaming validation that allows partial model validation as data arrives.
class Partial[T]:
"""
Partial validation streaming wrapper.
Allows streaming validation of Pydantic models as data becomes available.
Use as Partial[YourModel] to enable incremental validation.
"""
def __class_getitem__(cls, item: type[BaseModel]) -> type[BaseModel]:
"""
Create partial validation class for given model.
Args:
item: Pydantic model class to wrap
Returns:
Modified model class with partial validation support
"""from instructor import Partial
from pydantic import BaseModel
from typing import List
class Article(BaseModel):
title: str
author: str
content: str
tags: List[str]
word_count: int
# Stream partial results
for partial_article in client.create_partial(
model="gpt-4",
messages=[{
"role": "user",
"content": "Write a long article about climate change"
}],
response_model=Partial[Article]
):
# Display progress as fields become available
if partial_article.title:
print(f"Title: {partial_article.title}")
if partial_article.author:
print(f"Author: {partial_article.author}")
if partial_article.content:
print(f"Content length: {len(partial_article.content)}")
if partial_article.tags:
print(f"Tags so far: {partial_article.tags}")
if partial_article.word_count:
print(f"Word count: {partial_article.word_count}")
# Final result is fully validated
final_article = partial_article
assert isinstance(final_article, Article)Function factory for multi-task extraction that creates models capable of handling multiple instances.
def IterableModel(
subtask_class: type[BaseModel],
name: Optional[str] = None,
description: Optional[str] = None
) -> type[BaseModel]:
"""
Create multi-task extraction wrapper.
Args:
subtask_class: Pydantic model class for individual tasks
name: Optional name for the iterable model
description: Optional description for the extraction task
Returns:
Model class that can extract multiple instances of subtask_class
"""from instructor import IterableModel
from pydantic import BaseModel
from typing import List
class Task(BaseModel):
name: str
priority: str
assigned_to: str
due_date: str
# Create iterable model
TaskList = IterableModel(
Task,
name="ProjectTasks",
description="Extract all tasks from project description"
)
# Extract multiple tasks
task_extraction = client.create(
model="gpt-4",
messages=[{
"role": "user",
"content": """
Project tasks:
1. Design database schema (high priority, John, 2024-01-15)
2. Implement API endpoints (medium priority, Sarah, 2024-01-20)
3. Write unit tests (low priority, Mike, 2024-01-25)
4. Deploy to staging (high priority, John, 2024-01-30)
"""
}],
response_model=TaskList
)
# Access extracted tasks
for task in task_extraction.tasks: # TaskList has 'tasks' attribute
print(f"{task.name} - {task.priority} - {task.assigned_to}")
# Alternative: Use create_iterable directly
tasks = client.create_iterable(
model="gpt-4",
messages=[{"role": "user", "content": "Extract tasks..."}],
response_model=Task
)
for task in tasks:
print(f"Task: {task.name}")Mixin class for adding citation tracking capabilities to models.
class CitationMixin:
"""
Citation tracking mixin.
Add citation tracking capabilities to Pydantic models by inheriting
from this mixin along with BaseModel.
"""
citations: List[str] = Field(
default_factory=list,
description="Source citations for extracted information"
)
confidence: Optional[float] = Field(
None,
description="Confidence score for extraction (0.0-1.0)"
)
source_text: Optional[str] = Field(
None,
description="Original text that information was extracted from"
)
def add_citation(self, citation: str) -> None:
"""
Add citation to the model.
Args:
citation: Citation string to add
"""
def has_citations(self) -> bool:
"""Check if model has any citations."""
def get_citations(self) -> List[str]:
"""Get all citations for this model."""from instructor import CitationMixin
from pydantic import BaseModel, Field
from typing import List, Optional
class CitedFact(CitationMixin, BaseModel):
statement: str = Field(description="The factual statement")
category: str = Field(description="Category of the fact")
class ResearchSummary(CitationMixin, BaseModel):
topic: str
key_findings: List[str]
methodology: str
conclusion: str
# Extract with citations
summary = client.create(
model="gpt-4",
messages=[{
"role": "user",
"content": """
Based on the research paper 'Climate Change Impacts 2023' by Smith et al.,
extract a summary. The study used satellite data from 2020-2023 and found
that arctic ice decreased by 15% annually. The methodology involved thermal
imaging and statistical analysis.
"""
}],
response_model=ResearchSummary
)
# Access citation information
print(f"Summary: {summary.topic}")
print(f"Confidence: {summary.confidence}")
print(f"Citations: {summary.citations}")
print(f"Source: {summary.source_text}")
# Manual citation management
summary.add_citation("Smith et al. 2023, Climate Change Impacts")
summary.add_citation("Arctic Research Database 2023")
if summary.has_citations():
for citation in summary.get_citations():
print(f"Source: {citation}")from instructor import Maybe, Partial, IterableModel, CitationMixin
class Evidence(CitationMixin, BaseModel):
claim: str
supporting_data: str
reliability: str
class Argument(BaseModel):
thesis: str
evidence: List[Evidence]
counter_arguments: List[str]
# Optional iterable with citations
OptionalEvidenceList = Maybe(IterableModel(Evidence))
# Stream partial arguments with citations
PartialArgument = Partial[Argument]
# Extract optional evidence list
maybe_evidence = client.create(
model="gpt-4",
messages=[{"role": "user", "content": "Find evidence for climate change"}],
response_model=OptionalEvidenceList
)
if maybe_evidence.is_success:
evidence_list = maybe_evidence.unwrap()
for evidence in evidence_list.tasks:
print(f"Claim: {evidence.claim}")
print(f"Citations: {evidence.citations}")class OptionalTask(Maybe(BaseModel)):
"""Task that might not be extractable."""
name: str
description: str
class ProjectPlan(BaseModel):
title: str
required_tasks: List[Task] # Always present
optional_tasks: List[OptionalTask] # May be empty or failed
# Extract mixed required and optional tasks
plan = client.create(
model="gpt-4",
messages=[{"role": "user", "content": "Create project plan..."}],
response_model=ProjectPlan
)
# Handle mixed results
print(f"Required tasks: {len(plan.required_tasks)}")
for optional in plan.optional_tasks:
if optional.is_success:
task = optional.unwrap()
print(f"Optional task: {task.name}")
else:
print(f"Failed to extract optional task: {optional.message}")from typing import TypeVar, Generic
from pydantic import BaseModel, Field
T = TypeVar('T', bound=BaseModel)
class Weighted(Generic[T]):
"""Custom DSL component for weighted results."""
@classmethod
def create(cls, model_class: type[T]) -> type[BaseModel]:
"""Create weighted version of model."""
class WeightedModel(BaseModel):
result: model_class
weight: float = Field(
...,
ge=0.0,
le=1.0,
description="Confidence weight for this result"
)
reasoning: str = Field(
...,
description="Explanation for the assigned weight"
)
return WeightedModel
# Usage
WeightedUser = Weighted.create(User)
weighted_result = client.create(
model="gpt-4",
messages=[{"role": "user", "content": "Extract user with confidence"}],
response_model=WeightedUser
)
print(f"User: {weighted_result.result.name}")
print(f"Weight: {weighted_result.weight}")
print(f"Reasoning: {weighted_result.reasoning}")Install with Tessl CLI
npx tessl i tessl/pypi-instructor