Tessl Tile for pypi/instructor@1.11.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

batch-processing.md client-usage.md dsl-components.md index.md modes-and-configuration.md providers.md schema-generation.md validation.md

batch-processing.mddocs/

0
# Batch Processing
1

2
The instructor package provides comprehensive batch processing capabilities for handling large-scale structured extraction tasks efficiently. It supports both modern unified batch processing and legacy batch job handling.
3

4
## BatchProcessor
5

6
Unified batch processing class for handling batch requests across different providers.
7

8
```python { .api }
9
class BatchProcessor:
10
    """
11
    Unified batch processing for structured extraction.
12
    
13
    Handles batch submission, monitoring, and result retrieval
14
    across different LLM providers with consistent API.
15
    """
16
    
17
    def __init__(
18
        self, 
19
        model: str, 
20
        response_model: Type[BaseModel],
21
        client: Optional[Any] = None,
22
        **kwargs: Any
23
    ) -> None:
24
        """
25
        Initialize batch processor.
26
        
27
        Args:
28
            model: Model name to use (e.g. "openai/gpt-4o-mini", "anthropic/claude-3")
29
            response_model: Pydantic model for parsing results
30
            client: Optional instructor client (auto-detected if None)
31
            **kwargs: Additional processor configuration
32
        """
33
        
34
    def submit_batch(
35
        self, 
36
        file_path: str,
37
        custom_id_prefix: str = ""
38
    ) -> str:
39
        """
40
        Submit batch requests from JSONL file for processing.
41
        
42
        Args:
43
            file_path: Path to JSONL file containing batch requests
44
            custom_id_prefix: Optional prefix for custom IDs
45
            
46
        Returns:
47
            Batch ID for monitoring and result retrieval
48
        """
49
        
50
    def retrieve_results(
51
        self, 
52
        batch_id: str
53
    ) -> List[BatchResult]:
54
        """
55
        Retrieve results from completed batch.
56
        
57
        Args:
58
            batch_id: Identifier of the batch to retrieve
59
            
60
        Returns:
61
            List of BatchResult objects (BatchSuccess or BatchError)
62
        """
63
        
64
    def get_batch_status(self, batch_id: str) -> BatchJobInfo:
65
        """
66
        Get current status of batch processing.
67
        
68
        Args:
69
            batch_id: Batch identifier
70
            
71
        Returns:
72
            BatchJobInfo with status and progress information
73
        """
74
```
75

76
## BatchRequest
77

78
Model class representing individual batch requests.
79

80
```python { .api }
81
from pydantic import BaseModel
82
from typing import Dict, Any, List, Optional
83

84
class BatchRequest(BaseModel):
85
    """
86
    Individual batch request specification for JSONL batch processing.
87
    
88
    Represents a single extraction request within a batch operation.
89
    """
90
    
91
    custom_id: str
92
    method: str = "POST"
93
    url: str = "/v1/chat/completions"
94
    body: RequestBody
95
    
96
    @classmethod
97
    def from_create_params(
98
        cls,
99
        custom_id: str,
100
        model: str,
101
        messages: List[Dict[str, Any]],
102
        tools: Optional[List[Tool]] = None,
103
        **kwargs: Any
104
    ) -> 'BatchRequest':
105
        """
106
        Create batch request from standard create parameters.
107
        
108
        Args:
109
            custom_id: Unique identifier for this request
110
            model: LLM model to use
111
            messages: Chat messages for the extraction
112
            tools: Optional function tools for structured output
113
            **kwargs: Additional model parameters
114
        """
115

116
class RequestBody(BaseModel):
117
    """Request body for batch requests."""
118
    model: str
119
    messages: List[Dict[str, Any]]
120
    tools: Optional[List[Tool]] = None
121
    tool_choice: Optional[Dict[str, Any]] = None
122
    
123
class Tool(BaseModel):
124
    """Tool definition for function calling."""
125
    type: str = "function"
126
    function: Function
127
    
128
class Function(BaseModel):
129
    """Function definition within a tool."""
130
    name: str
131
    description: Optional[str] = None
132
    parameters: Optional[Dict[str, Any]] = None
133
```
134

135
## BatchJob
136

137
Legacy batch job handler with file-based processing.
138

139
```python { .api }
140
class BatchJob:
141
    """
142
    Legacy batch job handler for file-based batch processing.
143
    
144
    Provides compatibility with file-based batch operations
145
    and result parsing from JSONL files.
146
    """
147
    
148
    @classmethod
149
    def parse_from_file(
150
        cls, 
151
        file_path: str, 
152
        response_model: Type[BaseModel]
153
    ) -> Tuple[List[BaseModel], List[Dict[Any, Any]]]:
154
        """
155
        Parse batch results from JSONL file.
156
        
157
        Args:
158
            file_path: Path to JSONL results file
159
            response_model: Model to parse each result into
160
            
161
        Returns:
162
            Tuple of (successfully_parsed_models, error_objects)
163
        """
164
        
165
    @classmethod  
166
    def parse_from_string(
167
        cls,
168
        content: str,
169
        response_model: Type[BaseModel]
170
    ) -> Tuple[List[BaseModel], List[Dict[Any, Any]]]:
171
        """
172
        Parse batch results from string content.
173
        
174
        Args:
175
            content: JSONL string content
176
            response_model: Model to parse each result into
177
            
178
        Returns:
179
            Tuple of (successfully_parsed_models, error_objects)
180
        """
181
```
182

183
## Batch Result Types
184

185
The batch processing system uses a Result/Maybe pattern for type-safe handling of batch results.
186

187
```python { .api }
188
from typing import Union, Generic, TypeVar
189
from pydantic import BaseModel
190

191
T = TypeVar('T', bound=BaseModel)
192

193
class BatchSuccess(BaseModel, Generic[T]):
194
    """Successful batch result."""
195
    result: T
196
    custom_id: str
197
    
198
class BatchError(BaseModel):
199
    """Failed batch result."""
200
    error: str
201
    custom_id: str
202
    
203
# Union type for all batch results
204
BatchResult = Union[BatchSuccess[T], BatchError]
205

206
# Additional utility functions
207
def filter_successful(results: List[BatchResult]) -> List[BatchSuccess]:
208
    """Filter only successful results."""
209
    
210
def filter_errors(results: List[BatchResult]) -> List[BatchError]:
211
    """Filter only error results."""
212
    
213
def extract_results(results: List[BatchResult]) -> List[T]:
214
    """Extract just the result objects from successful results."""
215
```
216

217
## Usage Examples
218

219
### Modern Batch Processing
220

221
```python { .api }
222
import instructor
223
from instructor import BatchProcessor, filter_successful, extract_results
224
from pydantic import BaseModel
225
from typing import List
226

227
class UserProfile(BaseModel):
228
    name: str
229
    email: str
230
    age: int
231
    occupation: str
232

233
# Set up processor
234
processor = BatchProcessor("openai/gpt-4o-mini", UserProfile)
235

236
# Submit batch from JSONL file
237
# File should contain requests in OpenAI batch format
238
batch_id = processor.submit_batch("user_extraction_requests.jsonl")
239

240
print(f"Submitted batch: {batch_id}")
241

242
# Monitor progress
243
status = processor.get_batch_status(batch_id)
244
print(f"Status: {status.status}")
245
print(f"Progress: {status.request_counts.completed}/{status.request_counts.total}")
246

247
# Retrieve results when ready
248
all_results = processor.retrieve_results(batch_id)
249

250
# Filter successful results
251
successful_results = filter_successful(all_results)
252
extracted_users = extract_results(all_results)
253

254
for user in extracted_users:
255
    print(f"Extracted: {user.name} - {user.email}")
256

257
# Handle errors
258
errors = filter_errors(all_results)
259
for error in errors:
260
    print(f"Error in {error.custom_id}: {error.error}")
261
```
262

263
### Legacy File-Based Processing
264

265
```python { .api }
266
from instructor import BatchJob
267
from pydantic import BaseModel
268

269
class UserProfile(BaseModel):
270
    name: str
271
    email: str
272
    age: int
273
    occupation: str
274

275
# Parse results from OpenAI batch output file
276
successful_results, errors = BatchJob.parse_from_file(
277
    "batch_output_results.jsonl", 
278
    UserProfile
279
)
280

281
print(f"Successfully parsed {len(successful_results)} users")
282
print(f"Failed to parse {len(errors)} results")
283

284
for user in successful_results:
285
    print(f"User: {user.name} - {user.email}")
286

287
# Parse from string content
288
jsonl_content = """
289
{"custom_id": "user_1", "response": {"body": {"choices": [{"message": {"content": "{\"name\": \"John Doe\", \"email\": \"john@example.com\", \"age\": 25, \"occupation\": \"engineer\"}"}}]}}}
290
{"custom_id": "user_2", "response": {"body": {"choices": [{"message": {"content": "{\"name\": \"Jane Smith\", \"email\": \"jane@example.com\", \"age\": 30, \"occupation\": \"manager\"}"}}]}}}
291
"""
292

293
users_from_string, string_errors = BatchJob.parse_from_string(
294
    jsonl_content, 
295
    UserProfile
296
)
297

298
print(f"Parsed {len(users_from_string)} users from string")
299
```

Version

Tile

Files

batch-processing.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

batch-processing.mddocs/