or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

batch-processing.mdclient-usage.mddsl-components.mdindex.mdmodes-and-configuration.mdproviders.mdschema-generation.mdvalidation.md

batch-processing.mddocs/

0

# Batch Processing

1

2

The instructor package provides comprehensive batch processing capabilities for handling large-scale structured extraction tasks efficiently. It supports both modern unified batch processing and legacy batch job handling.

3

4

## BatchProcessor

5

6

Unified batch processing class for handling batch requests across different providers.

7

8

```python { .api }

9

class BatchProcessor:

10

"""

11

Unified batch processing for structured extraction.

12

13

Handles batch submission, monitoring, and result retrieval

14

across different LLM providers with consistent API.

15

"""

16

17

def __init__(

18

self,

19

model: str,

20

response_model: Type[BaseModel],

21

client: Optional[Any] = None,

22

**kwargs: Any

23

) -> None:

24

"""

25

Initialize batch processor.

26

27

Args:

28

model: Model name to use (e.g. "openai/gpt-4o-mini", "anthropic/claude-3")

29

response_model: Pydantic model for parsing results

30

client: Optional instructor client (auto-detected if None)

31

**kwargs: Additional processor configuration

32

"""

33

34

def submit_batch(

35

self,

36

file_path: str,

37

custom_id_prefix: str = ""

38

) -> str:

39

"""

40

Submit batch requests from JSONL file for processing.

41

42

Args:

43

file_path: Path to JSONL file containing batch requests

44

custom_id_prefix: Optional prefix for custom IDs

45

46

Returns:

47

Batch ID for monitoring and result retrieval

48

"""

49

50

def retrieve_results(

51

self,

52

batch_id: str

53

) -> List[BatchResult]:

54

"""

55

Retrieve results from completed batch.

56

57

Args:

58

batch_id: Identifier of the batch to retrieve

59

60

Returns:

61

List of BatchResult objects (BatchSuccess or BatchError)

62

"""

63

64

def get_batch_status(self, batch_id: str) -> BatchJobInfo:

65

"""

66

Get current status of batch processing.

67

68

Args:

69

batch_id: Batch identifier

70

71

Returns:

72

BatchJobInfo with status and progress information

73

"""

74

```

75

76

## BatchRequest

77

78

Model class representing individual batch requests.

79

80

```python { .api }

81

from pydantic import BaseModel

82

from typing import Dict, Any, List, Optional

83

84

class BatchRequest(BaseModel):

85

"""

86

Individual batch request specification for JSONL batch processing.

87

88

Represents a single extraction request within a batch operation.

89

"""

90

91

custom_id: str

92

method: str = "POST"

93

url: str = "/v1/chat/completions"

94

body: RequestBody

95

96

@classmethod

97

def from_create_params(

98

cls,

99

custom_id: str,

100

model: str,

101

messages: List[Dict[str, Any]],

102

tools: Optional[List[Tool]] = None,

103

**kwargs: Any

104

) -> 'BatchRequest':

105

"""

106

Create batch request from standard create parameters.

107

108

Args:

109

custom_id: Unique identifier for this request

110

model: LLM model to use

111

messages: Chat messages for the extraction

112

tools: Optional function tools for structured output

113

**kwargs: Additional model parameters

114

"""

115

116

class RequestBody(BaseModel):

117

"""Request body for batch requests."""

118

model: str

119

messages: List[Dict[str, Any]]

120

tools: Optional[List[Tool]] = None

121

tool_choice: Optional[Dict[str, Any]] = None

122

123

class Tool(BaseModel):

124

"""Tool definition for function calling."""

125

type: str = "function"

126

function: Function

127

128

class Function(BaseModel):

129

"""Function definition within a tool."""

130

name: str

131

description: Optional[str] = None

132

parameters: Optional[Dict[str, Any]] = None

133

```

134

135

## BatchJob

136

137

Legacy batch job handler with file-based processing.

138

139

```python { .api }

140

class BatchJob:

141

"""

142

Legacy batch job handler for file-based batch processing.

143

144

Provides compatibility with file-based batch operations

145

and result parsing from JSONL files.

146

"""

147

148

@classmethod

149

def parse_from_file(

150

cls,

151

file_path: str,

152

response_model: Type[BaseModel]

153

) -> Tuple[List[BaseModel], List[Dict[Any, Any]]]:

154

"""

155

Parse batch results from JSONL file.

156

157

Args:

158

file_path: Path to JSONL results file

159

response_model: Model to parse each result into

160

161

Returns:

162

Tuple of (successfully_parsed_models, error_objects)

163

"""

164

165

@classmethod

166

def parse_from_string(

167

cls,

168

content: str,

169

response_model: Type[BaseModel]

170

) -> Tuple[List[BaseModel], List[Dict[Any, Any]]]:

171

"""

172

Parse batch results from string content.

173

174

Args:

175

content: JSONL string content

176

response_model: Model to parse each result into

177

178

Returns:

179

Tuple of (successfully_parsed_models, error_objects)

180

"""

181

```

182

183

## Batch Result Types

184

185

The batch processing system uses a Result/Maybe pattern for type-safe handling of batch results.

186

187

```python { .api }

188

from typing import Union, Generic, TypeVar

189

from pydantic import BaseModel

190

191

T = TypeVar('T', bound=BaseModel)

192

193

class BatchSuccess(BaseModel, Generic[T]):

194

"""Successful batch result."""

195

result: T

196

custom_id: str

197

198

class BatchError(BaseModel):

199

"""Failed batch result."""

200

error: str

201

custom_id: str

202

203

# Union type for all batch results

204

BatchResult = Union[BatchSuccess[T], BatchError]

205

206

# Additional utility functions

207

def filter_successful(results: List[BatchResult]) -> List[BatchSuccess]:

208

"""Filter only successful results."""

209

210

def filter_errors(results: List[BatchResult]) -> List[BatchError]:

211

"""Filter only error results."""

212

213

def extract_results(results: List[BatchResult]) -> List[T]:

214

"""Extract just the result objects from successful results."""

215

```

216

217

## Usage Examples

218

219

### Modern Batch Processing

220

221

```python { .api }

222

import instructor

223

from instructor import BatchProcessor, filter_successful, extract_results

224

from pydantic import BaseModel

225

from typing import List

226

227

class UserProfile(BaseModel):

228

name: str

229

email: str

230

age: int

231

occupation: str

232

233

# Set up processor

234

processor = BatchProcessor("openai/gpt-4o-mini", UserProfile)

235

236

# Submit batch from JSONL file

237

# File should contain requests in OpenAI batch format

238

batch_id = processor.submit_batch("user_extraction_requests.jsonl")

239

240

print(f"Submitted batch: {batch_id}")

241

242

# Monitor progress

243

status = processor.get_batch_status(batch_id)

244

print(f"Status: {status.status}")

245

print(f"Progress: {status.request_counts.completed}/{status.request_counts.total}")

246

247

# Retrieve results when ready

248

all_results = processor.retrieve_results(batch_id)

249

250

# Filter successful results

251

successful_results = filter_successful(all_results)

252

extracted_users = extract_results(all_results)

253

254

for user in extracted_users:

255

print(f"Extracted: {user.name} - {user.email}")

256

257

# Handle errors

258

errors = filter_errors(all_results)

259

for error in errors:

260

print(f"Error in {error.custom_id}: {error.error}")

261

```

262

263

### Legacy File-Based Processing

264

265

```python { .api }

266

from instructor import BatchJob

267

from pydantic import BaseModel

268

269

class UserProfile(BaseModel):

270

name: str

271

email: str

272

age: int

273

occupation: str

274

275

# Parse results from OpenAI batch output file

276

successful_results, errors = BatchJob.parse_from_file(

277

"batch_output_results.jsonl",

278

UserProfile

279

)

280

281

print(f"Successfully parsed {len(successful_results)} users")

282

print(f"Failed to parse {len(errors)} results")

283

284

for user in successful_results:

285

print(f"User: {user.name} - {user.email}")

286

287

# Parse from string content

288

jsonl_content = """

289

{"custom_id": "user_1", "response": {"body": {"choices": [{"message": {"content": "{\"name\": \"John Doe\", \"email\": \"john@example.com\", \"age\": 25, \"occupation\": \"engineer\"}"}}]}}}

290

{"custom_id": "user_2", "response": {"body": {"choices": [{"message": {"content": "{\"name\": \"Jane Smith\", \"email\": \"jane@example.com\", \"age\": 30, \"occupation\": \"manager\"}"}}]}}}

291

"""

292

293

users_from_string, string_errors = BatchJob.parse_from_string(

294

jsonl_content,

295

UserProfile

296

)

297

298

print(f"Parsed {len(users_from_string)} users from string")

299

```