tessl/pypi-instructor

Structured outputs for LLMs with type safety, validation, and automatic retries

—

Pending

Overview

Eval results

Files

Client Usage

Name: tessl/pypi-instructor
Author: tessl

This document covers the core client functionality and methods for the instructor package, including client creation, core methods, hooks, and multimodal support.

Type Imports

from typing import List, Dict, Any, Type, Tuple, Generator, AsyncGenerator
from openai.types.chat import ChatCompletionMessageParam
from pydantic import BaseModel
from tenacity import Retrying, AsyncRetrying
from instructor.core.hooks import Hooks

Core Client Classes

Instructor

The primary synchronous client for structured output extraction.

class Instructor:
    def __init__(
        self,
        client: Any | None,
        create: Callable[..., Any], 
        mode: instructor.Mode = instructor.Mode.TOOLS,
        provider: Provider = Provider.OPENAI,
        hooks: Hooks | None = None,
        **kwargs: Any
    ) -> None:
        """
        Initialize Instructor client.
        
        Args:
            client: The underlying LLM client (OpenAI, Anthropic, etc.)
            create: The create function to patch
            mode: The extraction mode to use
            provider: The LLM provider type
            hooks: Event hooks for lifecycle management
            **kwargs: Additional configuration options
        """
        
    def create(
        self,
        response_model: Type[BaseModel] | None,
        messages: List[ChatCompletionMessageParam],
        max_retries: int | Retrying = 3,
        validation_context: dict[str, Any] | None = None,
        context: dict[str, Any] | None = None,
        strict: bool = True,
        hooks: Hooks | None = None,
        **kwargs: Any
    ) -> BaseModel:
        """
        Generate structured output from LLM.
        
        Args:
            response_model: Pydantic model to extract (can be None)
            messages: Chat messages for the LLM
            max_retries: Number of retry attempts on validation failure
            validation_context: Context dict for validation (deprecated, use context)
            context: Context dict for validation and processing
            strict: Whether to use strict mode for schema validation
            hooks: Per-call hooks to override client hooks
            **kwargs: Additional model parameters (model, temperature, etc.)
            
        Returns:
            Instance of response_model with extracted data, or Any if response_model is None
        """
        
    def create_partial(
        self,
        response_model: Type[BaseModel],
        messages: List[ChatCompletionMessageParam],
        max_retries: int | Retrying = 3,
        validation_context: dict[str, Any] | None = None,
        context: dict[str, Any] | None = None,
        strict: bool = True,
        hooks: Hooks | None = None,
        **kwargs: Any
    ) -> Generator[BaseModel, None, None]:
        """
        Stream partial results during extraction.
        
        Args:
            response_model: Pydantic model to extract
            messages: Chat messages for the LLM
            max_retries: Number of retry attempts on validation failure
            validation_context: Context dict for validation (deprecated, use context)
            context: Context dict for validation and processing
            strict: Whether to use strict mode for schema validation
            hooks: Per-call hooks to override client hooks
            **kwargs: Additional model parameters (model, temperature, etc.)
            
        Yields:
            Partial instances of response_model as they're built
        """
        
    def create_iterable(
        self,
        messages: List[ChatCompletionMessageParam],
        response_model: Type[BaseModel],
        max_retries: int | Retrying = 3,
        validation_context: dict[str, Any] | None = None,
        context: dict[str, Any] | None = None,
        strict: bool = True,
        hooks: Hooks | None = None,
        **kwargs: Any
    ) -> Generator[BaseModel, None, None]:
        """
        Generate multiple structured outputs.
        
        Args:
            messages: Chat messages for the LLM
            response_model: Pydantic model to extract
            max_retries: Number of retry attempts on validation failure
            validation_context: Context dict for validation (deprecated, use context)
            context: Context dict for validation and processing
            strict: Whether to use strict mode for schema validation
            hooks: Per-call hooks to override client hooks
            **kwargs: Additional model parameters (model, temperature, etc.)
            
        Yields:
            Generator of response_model instances
        """
        
    def create_with_completion(
        self,
        messages: List[ChatCompletionMessageParam],
        response_model: Type[BaseModel],
        max_retries: int | Retrying = 3,
        validation_context: dict[str, Any] | None = None,
        context: dict[str, Any] | None = None,
        strict: bool = True,
        hooks: Hooks | None = None,
        **kwargs: Any
    ) -> Tuple[BaseModel, Any]:
        """
        Return both structured result and raw completion.
        
        Args:
            messages: Chat messages for the LLM
            response_model: Pydantic model to extract
            max_retries: Number of retry attempts on validation failure
            validation_context: Context dict for validation (deprecated, use context)
            context: Context dict for validation and processing
            strict: Whether to use strict mode for schema validation
            hooks: Per-call hooks to override client hooks
            **kwargs: Additional model parameters (model, temperature, etc.)
            
        Returns:
            Tuple of (extracted_model, raw_completion)
        """

AsyncInstructor

The asynchronous client variant with identical API patterns.

class AsyncInstructor:
    def __init__(
        self,
        client: Any | None,
        create: Callable[..., Any],
        mode: instructor.Mode = instructor.Mode.TOOLS, 
        provider: Provider = Provider.OPENAI,
        hooks: Hooks | None = None,
        **kwargs: Any
    ) -> None:
        """
        Initialize AsyncInstructor client.
        
        Args:
            client: The underlying async LLM client
            create: The async create function to patch
            mode: The extraction mode to use
            provider: The LLM provider type
            hooks: Event hooks for lifecycle management
            **kwargs: Additional configuration options
        """
        
    async def create(
        self,
        response_model: Type[BaseModel] | None,
        messages: List[ChatCompletionMessageParam],
        max_retries: int | AsyncRetrying = 3,
        validation_context: dict[str, Any] | None = None,
        context: dict[str, Any] | None = None,
        strict: bool = True,
        hooks: Hooks | None = None,
        **kwargs: Any
    ) -> BaseModel:
        """Async version of create method."""
        
    async def create_partial(
        self,
        response_model: Type[BaseModel],
        messages: List[ChatCompletionMessageParam],
        max_retries: int | AsyncRetrying = 3,
        validation_context: dict[str, Any] | None = None,
        context: dict[str, Any] | None = None,
        strict: bool = True,
        hooks: Hooks | None = None,
        **kwargs: Any
    ) -> AsyncGenerator[BaseModel, None]:
        """Async version of create_partial method."""
        
    async def create_iterable(
        self,
        messages: List[ChatCompletionMessageParam],
        response_model: Type[BaseModel],
        max_retries: int | AsyncRetrying = 3,
        validation_context: dict[str, Any] | None = None,
        context: dict[str, Any] | None = None,
        strict: bool = True,
        hooks: Hooks | None = None,
        **kwargs: Any
    ) -> AsyncGenerator[BaseModel, None]:
        """Async version of create_iterable method."""
        
    async def create_with_completion(
        self,
        messages: List[ChatCompletionMessageParam],
        response_model: Type[BaseModel],
        max_retries: int | AsyncRetrying = 3,
        validation_context: dict[str, Any] | None = None,
        context: dict[str, Any] | None = None,
        strict: bool = True,
        hooks: Hooks | None = None,
        **kwargs: Any
    ) -> Tuple[BaseModel, Any]:
        """Async version of create_with_completion method."""

Hook Management

Both client classes support event hooks for lifecycle management:

from instructor.core.hooks import HookName
from typing import Callable, Literal

class Instructor:
    def on(
        self,
        hook_name: (
            HookName
            | Literal[
                "completion:kwargs",
                "completion:response", 
                "completion:error",
                "completion:last_attempt",
                "parse:error",
            ]
        ),
        handler: Callable[[Any], None],
    ) -> None:
        """
        Register event hook.
        
        Args:
            hook_name: Hook name to listen for (specific hook names)
            handler: Function to call when event occurs
        """
        
    def off(
        self, 
        hook_name: str, 
        handler: Callable[[Any], None] | None = None
    ) -> None:
        """
        Unregister event hook.
        
        Args:
            hook_name: Hook name to stop listening for
            handler: Specific handler to remove (None for all)
        """
        
    def clear(self) -> None:
        """Remove all registered hooks."""

Usage Examples

Basic Extraction

import instructor
from openai import OpenAI
from pydantic import BaseModel

class Person(BaseModel):
    name: str
    age: int
    occupation: str

client = instructor.from_openai(OpenAI())

person = client.create(
    model="gpt-4",
    messages=[{
        "role": "user", 
        "content": "Extract info: Sarah Chen, 28, software engineer"
    }],
    response_model=Person
)

print(f"{person.name} is {person.age} years old")

Streaming Partial Results

from instructor import Partial

class LongResponse(BaseModel):
    title: str
    summary: str
    key_points: List[str]
    conclusion: str

client = instructor.from_openai(OpenAI())

for partial in client.create_partial(
    model="gpt-4",
    messages=[{
        "role": "user",
        "content": "Write a detailed analysis of climate change impacts"
    }],
    response_model=Partial[LongResponse]
):
    if partial.title:
        print(f"Title: {partial.title}")
    if partial.summary:
        print(f"Summary: {partial.summary}")
    if partial.key_points:
        print(f"Points so far: {len(partial.key_points)}")

Iterable Extraction

class Task(BaseModel):
    name: str
    priority: str
    estimated_hours: int

tasks = client.create_iterable(
    model="gpt-4",
    messages=[{
        "role": "user",
        "content": "Extract all tasks from: Fix bugs (high, 4h), Write docs (medium, 2h), Review code (low, 1h)"
    }],
    response_model=Task
)

for task in tasks:
    print(f"{task.name}: {task.priority} priority, {task.estimated_hours}h")

Access to Raw Completion

result, completion = client.create_with_completion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Extract user data: John, 30, teacher"}],
    response_model=Person
)

print(f"Extracted: {result}")
print(f"Raw completion: {completion}")
print(f"Token usage: {completion.usage}")

Multimodal Support

Image Processing

from instructor import Image

class ImageDescription(BaseModel):
    objects: List[str]
    scene: str
    mood: str
    colors: List[str]

# From URL
image = Image.from_url("https://example.com/photo.jpg")

# From local path  
image = Image.from_path("/path/to/image.png")

# From base64 string
image = Image.from_base64("iVBORw0KGgoAAAANSUhEUgAAAA...")

# Auto-detect source
image = Image.autodetect("https://example.com/photo.jpg")

description = client.create(
    model="gpt-4-vision-preview",
    messages=[{
        "role": "user",
        "content": [
            {"type": "text", "text": "Describe this image in detail"},
            {"type": "image_url", "image_url": {"url": image.to_openai()}}
        ]
    }],
    response_model=ImageDescription
)

Audio Processing

from instructor import Audio

class AudioTranscription(BaseModel):
    transcript: str
    language: str
    confidence: float
    speaker_count: int

# From local audio file
audio = Audio.from_path("/path/to/audio.wav")

# Convert for different providers
openai_audio = audio.to_openai()
anthropic_audio = audio.to_anthropic()

transcription = client.create(
    model="whisper-1",
    messages=[{
        "role": "user", 
        "content": f"Transcribe and analyze: {openai_audio}"
    }],
    response_model=AudioTranscription
)

Provider-Specific Image Conversion

from instructor import Image

image = Image.from_path("/path/to/image.jpg")

# OpenAI format
openai_format = image.to_openai()  # Returns base64 data URL

# Anthropic format  
anthropic_format = image.to_anthropic()  # Returns Anthropic image format

# Google GenAI format
genai_format = image.to_genai()  # Returns GenAI image part

Error Handling

from instructor.exceptions import InstructorError
from pydantic import ValidationError

try:
    result = client.create(
        model="gpt-4",
        messages=[{"role": "user", "content": "Invalid data"}],
        response_model=Person
    )
except ValidationError as e:
    print(f"Validation failed: {e}")
except InstructorError as e:
    print(f"Instructor error: {e}")
except Exception as e:
    print(f"Unexpected error: {e}")

Hook Usage

def on_completion(completion):
    print(f"Completion received: {completion.usage}")

def on_error(error):
    print(f"Error occurred: {error}")

client.on("completion", on_completion)
client.on("error", on_error)

# Use client normally - hooks will be called
result = client.create(
    model="gpt-4",
    messages=[{"role": "user", "content": "Hello"}],
    response_model=Person
)

# Clean up hooks
client.off("completion", on_completion)
client.clear()  # Remove all hooks

Install with Tessl CLI