0
# DSL Components
1
2
The instructor package provides a domain-specific language (DSL) for advanced extraction patterns. These components enable optional extraction, streaming validation, multi-task extraction, and citation tracking.
3
4
## Maybe
5
6
Optional result wrapper for handling cases where extraction might fail or return no data.
7
8
```python { .api }
9
def Maybe(model: type[T]) -> type[MaybeBase[T]]:
10
"""
11
Create optional result wrapper.
12
13
Args:
14
model: Pydantic model class to wrap
15
16
Returns:
17
MaybeBase subclass that can represent success or failure
18
"""
19
20
class MaybeBase[T]:
21
"""Base class for Maybe results."""
22
23
result: T | None
24
error: bool
25
message: str | None
26
27
def __init__(
28
self,
29
result: T | None = None,
30
error: bool = False,
31
message: str | None = None
32
) -> None:
33
"""
34
Initialize Maybe result.
35
36
Args:
37
result: The extracted model instance (None if failed)
38
error: Whether an error occurred during extraction
39
message: Optional error message or explanation
40
"""
41
42
@property
43
def is_success(self) -> bool:
44
"""Check if extraction was successful."""
45
46
@property
47
def is_failure(self) -> bool:
48
"""Check if extraction failed."""
49
50
def unwrap(self) -> T:
51
"""
52
Get the result, raising exception if failed.
53
54
Returns:
55
The extracted model instance
56
57
Raises:
58
ValueError: If extraction failed
59
"""
60
61
def unwrap_or(self, default: T) -> T:
62
"""
63
Get the result or return default if failed.
64
65
Args:
66
default: Value to return if extraction failed
67
68
Returns:
69
The extracted model or default value
70
"""
71
```
72
73
### Maybe Usage Examples
74
75
```python { .api }
76
from instructor import Maybe
77
from pydantic import BaseModel
78
79
class User(BaseModel):
80
name: str
81
email: str
82
age: int
83
84
# Create optional wrapper
85
OptionalUser = Maybe(User)
86
87
# Use in extraction
88
maybe_user = client.create(
89
model="gpt-4",
90
messages=[{"role": "user", "content": "No user data here"}],
91
response_model=OptionalUser
92
)
93
94
# Check result
95
if maybe_user.is_success:
96
user = maybe_user.unwrap()
97
print(f"Extracted: {user.name}")
98
else:
99
print(f"Extraction failed: {maybe_user.message}")
100
101
# Use with default
102
user = maybe_user.unwrap_or(User(name="Unknown", email="", age=0))
103
104
# Pattern matching style
105
match maybe_user:
106
case OptionalUser(result=user) if maybe_user.is_success:
107
print(f"Success: {user}")
108
case OptionalUser(error=True, message=msg):
109
print(f"Failed: {msg}")
110
```
111
112
## Partial
113
114
Generic class for streaming validation that allows partial model validation as data arrives.
115
116
```python { .api }
117
class Partial[T]:
118
"""
119
Partial validation streaming wrapper.
120
121
Allows streaming validation of Pydantic models as data becomes available.
122
Use as Partial[YourModel] to enable incremental validation.
123
"""
124
125
def __class_getitem__(cls, item: type[BaseModel]) -> type[BaseModel]:
126
"""
127
Create partial validation class for given model.
128
129
Args:
130
item: Pydantic model class to wrap
131
132
Returns:
133
Modified model class with partial validation support
134
"""
135
```
136
137
### Partial Usage Examples
138
139
```python { .api }
140
from instructor import Partial
141
from pydantic import BaseModel
142
from typing import List
143
144
class Article(BaseModel):
145
title: str
146
author: str
147
content: str
148
tags: List[str]
149
word_count: int
150
151
# Stream partial results
152
for partial_article in client.create_partial(
153
model="gpt-4",
154
messages=[{
155
"role": "user",
156
"content": "Write a long article about climate change"
157
}],
158
response_model=Partial[Article]
159
):
160
# Display progress as fields become available
161
if partial_article.title:
162
print(f"Title: {partial_article.title}")
163
164
if partial_article.author:
165
print(f"Author: {partial_article.author}")
166
167
if partial_article.content:
168
print(f"Content length: {len(partial_article.content)}")
169
170
if partial_article.tags:
171
print(f"Tags so far: {partial_article.tags}")
172
173
if partial_article.word_count:
174
print(f"Word count: {partial_article.word_count}")
175
176
# Final result is fully validated
177
final_article = partial_article
178
assert isinstance(final_article, Article)
179
```
180
181
## IterableModel
182
183
Function factory for multi-task extraction that creates models capable of handling multiple instances.
184
185
```python { .api }
186
def IterableModel(
187
subtask_class: type[BaseModel],
188
name: Optional[str] = None,
189
description: Optional[str] = None
190
) -> type[BaseModel]:
191
"""
192
Create multi-task extraction wrapper.
193
194
Args:
195
subtask_class: Pydantic model class for individual tasks
196
name: Optional name for the iterable model
197
description: Optional description for the extraction task
198
199
Returns:
200
Model class that can extract multiple instances of subtask_class
201
"""
202
```
203
204
### IterableModel Usage Examples
205
206
```python { .api }
207
from instructor import IterableModel
208
from pydantic import BaseModel
209
from typing import List
210
211
class Task(BaseModel):
212
name: str
213
priority: str
214
assigned_to: str
215
due_date: str
216
217
# Create iterable model
218
TaskList = IterableModel(
219
Task,
220
name="ProjectTasks",
221
description="Extract all tasks from project description"
222
)
223
224
# Extract multiple tasks
225
task_extraction = client.create(
226
model="gpt-4",
227
messages=[{
228
"role": "user",
229
"content": """
230
Project tasks:
231
1. Design database schema (high priority, John, 2024-01-15)
232
2. Implement API endpoints (medium priority, Sarah, 2024-01-20)
233
3. Write unit tests (low priority, Mike, 2024-01-25)
234
4. Deploy to staging (high priority, John, 2024-01-30)
235
"""
236
}],
237
response_model=TaskList
238
)
239
240
# Access extracted tasks
241
for task in task_extraction.tasks: # TaskList has 'tasks' attribute
242
print(f"{task.name} - {task.priority} - {task.assigned_to}")
243
244
# Alternative: Use create_iterable directly
245
tasks = client.create_iterable(
246
model="gpt-4",
247
messages=[{"role": "user", "content": "Extract tasks..."}],
248
response_model=Task
249
)
250
251
for task in tasks:
252
print(f"Task: {task.name}")
253
```
254
255
## CitationMixin
256
257
Mixin class for adding citation tracking capabilities to models.
258
259
```python { .api }
260
class CitationMixin:
261
"""
262
Citation tracking mixin.
263
264
Add citation tracking capabilities to Pydantic models by inheriting
265
from this mixin along with BaseModel.
266
"""
267
268
citations: List[str] = Field(
269
default_factory=list,
270
description="Source citations for extracted information"
271
)
272
273
confidence: Optional[float] = Field(
274
None,
275
description="Confidence score for extraction (0.0-1.0)"
276
)
277
278
source_text: Optional[str] = Field(
279
None,
280
description="Original text that information was extracted from"
281
)
282
283
def add_citation(self, citation: str) -> None:
284
"""
285
Add citation to the model.
286
287
Args:
288
citation: Citation string to add
289
"""
290
291
def has_citations(self) -> bool:
292
"""Check if model has any citations."""
293
294
def get_citations(self) -> List[str]:
295
"""Get all citations for this model."""
296
```
297
298
### CitationMixin Usage Examples
299
300
```python { .api }
301
from instructor import CitationMixin
302
from pydantic import BaseModel, Field
303
from typing import List, Optional
304
305
class CitedFact(CitationMixin, BaseModel):
306
statement: str = Field(description="The factual statement")
307
category: str = Field(description="Category of the fact")
308
309
class ResearchSummary(CitationMixin, BaseModel):
310
topic: str
311
key_findings: List[str]
312
methodology: str
313
conclusion: str
314
315
# Extract with citations
316
summary = client.create(
317
model="gpt-4",
318
messages=[{
319
"role": "user",
320
"content": """
321
Based on the research paper 'Climate Change Impacts 2023' by Smith et al.,
322
extract a summary. The study used satellite data from 2020-2023 and found
323
that arctic ice decreased by 15% annually. The methodology involved thermal
324
imaging and statistical analysis.
325
"""
326
}],
327
response_model=ResearchSummary
328
)
329
330
# Access citation information
331
print(f"Summary: {summary.topic}")
332
print(f"Confidence: {summary.confidence}")
333
print(f"Citations: {summary.citations}")
334
print(f"Source: {summary.source_text}")
335
336
# Manual citation management
337
summary.add_citation("Smith et al. 2023, Climate Change Impacts")
338
summary.add_citation("Arctic Research Database 2023")
339
340
if summary.has_citations():
341
for citation in summary.get_citations():
342
print(f"Source: {citation}")
343
```
344
345
## Advanced DSL Patterns
346
347
### Combining DSL Components
348
349
```python { .api }
350
from instructor import Maybe, Partial, IterableModel, CitationMixin
351
352
class Evidence(CitationMixin, BaseModel):
353
claim: str
354
supporting_data: str
355
reliability: str
356
357
class Argument(BaseModel):
358
thesis: str
359
evidence: List[Evidence]
360
counter_arguments: List[str]
361
362
# Optional iterable with citations
363
OptionalEvidenceList = Maybe(IterableModel(Evidence))
364
365
# Stream partial arguments with citations
366
PartialArgument = Partial[Argument]
367
368
# Extract optional evidence list
369
maybe_evidence = client.create(
370
model="gpt-4",
371
messages=[{"role": "user", "content": "Find evidence for climate change"}],
372
response_model=OptionalEvidenceList
373
)
374
375
if maybe_evidence.is_success:
376
evidence_list = maybe_evidence.unwrap()
377
for evidence in evidence_list.tasks:
378
print(f"Claim: {evidence.claim}")
379
print(f"Citations: {evidence.citations}")
380
```
381
382
### Nested DSL Structures
383
384
```python { .api }
385
class OptionalTask(Maybe(BaseModel)):
386
"""Task that might not be extractable."""
387
name: str
388
description: str
389
390
class ProjectPlan(BaseModel):
391
title: str
392
required_tasks: List[Task] # Always present
393
optional_tasks: List[OptionalTask] # May be empty or failed
394
395
# Extract mixed required and optional tasks
396
plan = client.create(
397
model="gpt-4",
398
messages=[{"role": "user", "content": "Create project plan..."}],
399
response_model=ProjectPlan
400
)
401
402
# Handle mixed results
403
print(f"Required tasks: {len(plan.required_tasks)}")
404
for optional in plan.optional_tasks:
405
if optional.is_success:
406
task = optional.unwrap()
407
print(f"Optional task: {task.name}")
408
else:
409
print(f"Failed to extract optional task: {optional.message}")
410
```
411
412
### Custom DSL Extensions
413
414
```python { .api }
415
from typing import TypeVar, Generic
416
from pydantic import BaseModel, Field
417
418
T = TypeVar('T', bound=BaseModel)
419
420
class Weighted(Generic[T]):
421
"""Custom DSL component for weighted results."""
422
423
@classmethod
424
def create(cls, model_class: type[T]) -> type[BaseModel]:
425
"""Create weighted version of model."""
426
427
class WeightedModel(BaseModel):
428
result: model_class
429
weight: float = Field(
430
...,
431
ge=0.0,
432
le=1.0,
433
description="Confidence weight for this result"
434
)
435
reasoning: str = Field(
436
...,
437
description="Explanation for the assigned weight"
438
)
439
440
return WeightedModel
441
442
# Usage
443
WeightedUser = Weighted.create(User)
444
445
weighted_result = client.create(
446
model="gpt-4",
447
messages=[{"role": "user", "content": "Extract user with confidence"}],
448
response_model=WeightedUser
449
)
450
451
print(f"User: {weighted_result.result.name}")
452
print(f"Weight: {weighted_result.weight}")
453
print(f"Reasoning: {weighted_result.reasoning}")
454
```