Building applications with LLMs through composability
—
Structured output allows agents to return data in a predictable, validated format rather than free-form text. LangChain supports multiple schema types and strategies for extracting structured data from LLM responses, enabling reliable integration with downstream systems and databases.
The response_format parameter in create_agent() enables structured output by specifying a schema that the agent's responses must conform to. LangChain supports multiple schema types and automatically handles the extraction and validation.
The response_format parameter supports multiple strategies for structured output:
# Strategy types
ToolStrategy # Use tool calls for structured output
ProviderStrategy # Use provider's native structured output (JSON mode)
AutoStrategy # Auto-detect best strategy (default)
# Union type
ResponseFormat = ToolStrategy | ProviderStrategy | AutoStrategyAutoStrategy (Default): Automatically selects the best strategy based on the model's capabilities. This is the recommended approach for most use cases.
ToolStrategy: Forces the use of tool calling for structured output. The schema is converted to a tool definition, and the model must call the tool with the structured data.
ProviderStrategy: Uses the provider's native structured output features (e.g., OpenAI's JSON mode, Anthropic's structured output). Only available for models that support it.
LangChain accepts multiple schema definition formats:
The most common and recommended approach:
from pydantic import BaseModel, Field
from langchain.agents import create_agent
class WeatherReport(BaseModel):
"""Weather information for a location."""
location: str = Field(description="City name")
temperature: float = Field(description="Temperature in Fahrenheit")
conditions: str = Field(description="Weather conditions (e.g., sunny, rainy)")
humidity: int = Field(description="Humidity percentage", ge=0, le=100)
agent = create_agent(
model="openai:gpt-4o",
response_format=WeatherReport,
system_prompt="Extract weather information from user input."
)
result = agent.invoke({
"messages": [{"role": "user", "content": "It's 72 degrees and sunny in San Francisco with 45% humidity"}]
})
# Access structured output
weather: WeatherReport = result["structured_response"]
print(f"{weather.location}: {weather.temperature}°F")
print(f"Conditions: {weather.conditions}")
print(f"Humidity: {weather.humidity}%")For simpler schemas without Pydantic:
from typing import TypedDict
from langchain.agents import create_agent
class UserInfo(TypedDict):
"""User information."""
name: str
age: int
email: str
agent = create_agent(
model="openai:gpt-4o",
response_format=UserInfo,
system_prompt="Extract user information from the input."
)
result = agent.invoke({
"messages": [{"role": "user", "content": "My name is Alice, I'm 30 years old, and my email is alice@example.com"}]
})
user: UserInfo = result["structured_response"]
print(f"Name: {user['name']}")
print(f"Age: {user['age']}")
print(f"Email: {user['email']}")Using Python's built-in dataclass decorator:
from dataclasses import dataclass
from langchain.agents import create_agent
@dataclass
class Product:
"""Product information."""
name: str
price: float
category: str
in_stock: bool
agent = create_agent(
model="openai:gpt-4o",
response_format=Product,
system_prompt="Extract product information."
)
result = agent.invoke({
"messages": [{"role": "user", "content": "The Laptop Pro costs $1299 and is available in the Electronics category"}]
})
product: Product = result["structured_response"]
print(f"{product.name}: ${product.price}")
print(f"Category: {product.category}")
print(f"Available: {product.in_stock}")For maximum control, provide a raw JSON schema:
from langchain.agents import create_agent
schema = {
"type": "object",
"properties": {
"title": {"type": "string"},
"author": {"type": "string"},
"year": {"type": "integer"},
"rating": {"type": "number", "minimum": 0, "maximum": 5}
},
"required": ["title", "author", "year"]
}
agent = create_agent(
model="openai:gpt-4o",
response_format=schema,
system_prompt="Extract book information."
)
result = agent.invoke({
"messages": [{"role": "user", "content": "1984 by George Orwell, published in 1949, rated 4.5 stars"}]
})
book = result["structured_response"]
print(f"{book['title']} by {book['author']} ({book['year']})")LangChain provides specific exceptions for structured output failures:
class StructuredOutputError(Exception):
"""Base error for structured output failures."""
class MultipleStructuredOutputsError(StructuredOutputError):
"""Raised when multiple output tools are called but only one expected."""
class StructuredOutputValidationError(StructuredOutputError):
"""Raised when structured output fails schema validation."""from langchain.agents import create_agent
from langchain.agents.structured_output import (
StructuredOutputError,
StructuredOutputValidationError
)
from pydantic import BaseModel, Field
class Analysis(BaseModel):
sentiment: str = Field(description="Sentiment (positive, negative, neutral)")
confidence: float = Field(description="Confidence score", ge=0.0, le=1.0)
agent = create_agent(
model="openai:gpt-4o",
response_format=Analysis
)
try:
result = agent.invoke({
"messages": [{"role": "user", "content": "This product is amazing!"}]
})
analysis = result["structured_response"]
print(f"Sentiment: {analysis.sentiment} (confidence: {analysis.confidence})")
except StructuredOutputValidationError as e:
print(f"Validation failed: {e}")
# Handle invalid output (e.g., retry with more specific prompt)
except StructuredOutputError as e:
print(f"Structured output error: {e}")
# Handle general structured output errorsfrom pydantic import BaseModel, Field
from typing import List
class Address(BaseModel):
"""Street address."""
street: str
city: str
state: str
zip_code: str
class Person(BaseModel):
"""Person with address."""
name: str
age: int
address: Address
agent = create_agent(
model="openai:gpt-4o",
response_format=Person,
system_prompt="Extract person information including their address."
)
result = agent.invoke({
"messages": [{"role": "user", "content": "John Doe, 35 years old, lives at 123 Main St, Springfield, IL 62701"}]
})
person = result["structured_response"]
print(f"{person.name}, {person.age}")
print(f"Address: {person.address.street}, {person.address.city}, {person.address.state} {person.address.zip_code}")from pydantic import BaseModel
from typing import List
class Task(BaseModel):
"""A single task."""
title: str
priority: str # high, medium, low
completed: bool = False
class TaskList(BaseModel):
"""Collection of tasks."""
tasks: List[Task]
total_count: int
agent = create_agent(
model="openai:gpt-4o",
response_format=TaskList,
system_prompt="Extract all tasks from the user's input."
)
result = agent.invoke({
"messages": [{"role": "user", "content": "I need to: 1) finish report (high priority), 2) call client (medium), 3) update docs (low)"}]
})
task_list = result["structured_response"]
print(f"Total tasks: {task_list.total_count}")
for task in task_list.tasks:
status = "✓" if task.completed else "○"
print(f"{status} [{task.priority}] {task.title}")from pydantic import BaseModel, Field
from typing import Optional
class MovieReview(BaseModel):
"""Movie review information."""
title: str
year: int
rating: float = Field(ge=0.0, le=10.0)
review_text: Optional[str] = None
reviewer_name: Optional[str] = "Anonymous"
recommended: bool = True
agent = create_agent(
model="openai:gpt-4o",
response_format=MovieReview,
system_prompt="Extract movie review information."
)
result = agent.invoke({
"messages": [{"role": "user", "content": "Inception from 2010, rated 9/10"}]
})
review = result["structured_response"]
print(f"{review.title} ({review.year}): {review.rating}/10")
print(f"Reviewer: {review.reviewer_name}")
print(f"Recommended: {'Yes' if review.recommended else 'No'}")from pydantic import BaseModel
from enum import Enum
class Priority(str, Enum):
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
URGENT = "urgent"
class Status(str, Enum):
TODO = "todo"
IN_PROGRESS = "in_progress"
DONE = "done"
class Issue(BaseModel):
"""Bug or feature request."""
title: str
description: str
priority: Priority
status: Status = Status.TODO
agent = create_agent(
model="openai:gpt-4o",
response_format=Issue,
system_prompt="Extract issue details from bug reports."
)
result = agent.invoke({
"messages": [{"role": "user", "content": "High priority: Login button not working on mobile devices"}]
})
issue = result["structured_response"]
print(f"Issue: {issue.title}")
print(f"Priority: {issue.priority.value}")
print(f"Status: {issue.status.value}")Pydantic provides powerful validation:
from pydantic import BaseModel, Field, field_validator, model_validator
class UserRegistration(BaseModel):
"""User registration data."""
username: str = Field(min_length=3, max_length=20, pattern=r"^[a-zA-Z0-9_]+$")
email: str = Field(pattern=r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
age: int = Field(ge=13, le=120)
password: str = Field(min_length=8)
confirm_password: str
@field_validator('username')
@classmethod
def username_alphanumeric(cls, v: str) -> str:
if not v.replace('_', '').isalnum():
raise ValueError('Username must be alphanumeric')
return v
@field_validator('email')
@classmethod
def email_lowercase(cls, v: str) -> str:
return v.lower()
@model_validator(mode='after')
def check_passwords_match(self):
if self.password != self.confirm_password:
raise ValueError('Passwords do not match')
return self
agent = create_agent(
model="openai:gpt-4o",
response_format=UserRegistration,
system_prompt="Extract user registration information."
)
# If the model returns invalid data, StructuredOutputValidationError is raisedYou can use both tools and structured output in the same agent:
from pydantic import BaseModel
from langchain.tools import tool
from langchain.agents import create_agent
@tool
def search_database(query: str) -> str:
"""Search the database."""
return f"Results for: {query}"
class Summary(BaseModel):
"""Summary of search results."""
query: str
result_count: int
key_findings: list[str]
agent = create_agent(
model="openai:gpt-4o",
tools=[search_database],
response_format=Summary,
system_prompt="Use the search tool, then provide a structured summary."
)
result = agent.invoke({
"messages": [{"role": "user", "content": "Search for recent papers on AI"}]
})
# Agent calls the tool, then returns structured output
summary = result["structured_response"]
print(f"Query: {summary.query}")
print(f"Found {summary.result_count} results")Use structured output with conversation history to refine results:
from pydantic import BaseModel
from langchain.agents import create_agent
class ProductRecommendation(BaseModel):
"""Product recommendation."""
product_name: str
price: float
reason: str
agent = create_agent(
model="openai:gpt-4o",
response_format=ProductRecommendation,
system_prompt="Recommend products based on user preferences."
)
# Initial recommendation
result1 = agent.invoke({
"messages": [{"role": "user", "content": "I need a laptop for gaming"}]
})
rec1 = result1["structured_response"]
print(f"Recommendation 1: {rec1.product_name} - ${rec1.price}")
# Refine based on feedback
result2 = agent.invoke({
"messages": result1["messages"] + [
{"role": "user", "content": "That's too expensive. Something under $1000?"}
]
})
rec2 = result2["structured_response"]
print(f"Recommendation 2: {rec2.product_name} - ${rec2.price}")Extract multiple structured records from a single input:
from pydantic import BaseModel
from typing import List
class Contact(BaseModel):
"""Contact information."""
name: str
email: str
phone: str
class ContactList(BaseModel):
"""List of contacts."""
contacts: List[Contact]
agent = create_agent(
model="openai:gpt-4o",
response_format=ContactList,
system_prompt="Extract all contacts from the text."
)
result = agent.invoke({
"messages": [{"role": "user", "content": """
Team contacts:
1. Alice Smith - alice@example.com - 555-0101
2. Bob Jones - bob@example.com - 555-0102
3. Carol White - carol@example.com - 555-0103
"""}]
})
contacts = result["structured_response"]
for contact in contacts.contacts:
print(f"{contact.name}: {contact.email} ({contact.phone})")Use different schemas based on input type:
from pydantic import BaseModel
from typing import Union, Literal
class EmailMessage(BaseModel):
"""Email message."""
type: Literal["email"] = "email"
to: str
subject: str
body: str
class SlackMessage(BaseModel):
"""Slack message."""
type: Literal["slack"] = "slack"
channel: str
text: str
Message = Union[EmailMessage, SlackMessage]
# Note: Union types require special handling
# Use separate agents or custom logic to determine schemaUse detailed field descriptions to guide the model:
from pydantic import BaseModel, Field
class Event(BaseModel):
"""Event information extracted from text."""
title: str = Field(description="The name or title of the event")
date: str = Field(description="Event date in YYYY-MM-DD format")
time: str = Field(description="Event time in HH:MM format (24-hour)")
location: str = Field(description="Physical or virtual location of the event")
attendees: list[str] = Field(description="List of attendee names or email addresses")Choose the right type for each field:
from pydantic import BaseModel, HttpUrl, EmailStr
from datetime import datetime
from typing import Optional
class Article(BaseModel):
"""Article metadata."""
title: str
url: HttpUrl # Validates URL format
author_email: EmailStr # Validates email format
published_at: datetime # Parses datetime
word_count: int # Ensures integer
rating: Optional[float] = None # Optional numeric ratingUse Optional and defaults for fields that might be missing:
from pydantic import BaseModel
from typing import Optional
class ArticleMetadata(BaseModel):
"""Article metadata with optional fields."""
title: str # Required
author: Optional[str] = None # Optional
publication_date: Optional[str] = None # Optional
summary: str = "No summary available" # Required with defaultAdd validators to catch issues before processing:
from pydantic import BaseModel, field_validator
from datetime import datetime
class Appointment(BaseModel):
"""Appointment booking."""
date: str
time: str
duration_minutes: int
@field_validator('date')
@classmethod
def validate_date_format(cls, v: str) -> str:
try:
datetime.strptime(v, '%Y-%m-%d')
except ValueError:
raise ValueError('Date must be in YYYY-MM-DD format')
return v
@field_validator('duration_minutes')
@classmethod
def validate_duration(cls, v: int) -> int:
if v < 15 or v > 480:
raise ValueError('Duration must be between 15 and 480 minutes')
return vEnsure your schema handles edge cases:
# Test with:
# - Missing optional fields
# - Boundary values (min/max)
# - Invalid formats
# - Empty lists
# - Null values
# - Unicode characters
# - Very long stringsIf the model returns unstructured text instead of following the schema:
ToolStrategy to force tool-based extractionIf you're getting StructuredOutputValidationError:
If structured output varies between runs:
Install with Tessl CLI
npx tessl i tessl/pypi-langchain