Tessl Tile for pypi/instructor@1.11.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

batch-processing.md client-usage.md dsl-components.md index.md modes-and-configuration.md providers.md schema-generation.md validation.md

dsl-components.mddocs/

0
# DSL Components
1

2
The instructor package provides a domain-specific language (DSL) for advanced extraction patterns. These components enable optional extraction, streaming validation, multi-task extraction, and citation tracking.
3

4
## Maybe
5

6
Optional result wrapper for handling cases where extraction might fail or return no data.
7

8
```python { .api }
9
def Maybe(model: type[T]) -> type[MaybeBase[T]]:
10
    """
11
    Create optional result wrapper.
12
    
13
    Args:
14
        model: Pydantic model class to wrap
15
        
16
    Returns:
17
        MaybeBase subclass that can represent success or failure
18
    """
19

20
class MaybeBase[T]:
21
    """Base class for Maybe results."""
22
    
23
    result: T | None
24
    error: bool
25
    message: str | None
26
    
27
    def __init__(
28
        self,
29
        result: T | None = None,
30
        error: bool = False, 
31
        message: str | None = None
32
    ) -> None:
33
        """
34
        Initialize Maybe result.
35
        
36
        Args:
37
            result: The extracted model instance (None if failed)
38
            error: Whether an error occurred during extraction
39
            message: Optional error message or explanation
40
        """
41
        
42
    @property
43
    def is_success(self) -> bool:
44
        """Check if extraction was successful."""
45
        
46
    @property 
47
    def is_failure(self) -> bool:
48
        """Check if extraction failed."""
49
        
50
    def unwrap(self) -> T:
51
        """
52
        Get the result, raising exception if failed.
53
        
54
        Returns:
55
            The extracted model instance
56
            
57
        Raises:
58
            ValueError: If extraction failed
59
        """
60
        
61
    def unwrap_or(self, default: T) -> T:
62
        """
63
        Get the result or return default if failed.
64
        
65
        Args:
66
            default: Value to return if extraction failed
67
            
68
        Returns:
69
            The extracted model or default value
70
        """
71
```
72

73
### Maybe Usage Examples
74

75
```python { .api }
76
from instructor import Maybe
77
from pydantic import BaseModel
78

79
class User(BaseModel):
80
    name: str
81
    email: str
82
    age: int
83

84
# Create optional wrapper
85
OptionalUser = Maybe(User)
86

87
# Use in extraction
88
maybe_user = client.create(
89
    model="gpt-4",
90
    messages=[{"role": "user", "content": "No user data here"}],
91
    response_model=OptionalUser
92
)
93

94
# Check result
95
if maybe_user.is_success:
96
    user = maybe_user.unwrap()
97
    print(f"Extracted: {user.name}")
98
else:
99
    print(f"Extraction failed: {maybe_user.message}")
100

101
# Use with default
102
user = maybe_user.unwrap_or(User(name="Unknown", email="", age=0))
103

104
# Pattern matching style
105
match maybe_user:
106
    case OptionalUser(result=user) if maybe_user.is_success:
107
        print(f"Success: {user}")
108
    case OptionalUser(error=True, message=msg):
109
        print(f"Failed: {msg}")
110
```
111

112
## Partial
113

114
Generic class for streaming validation that allows partial model validation as data arrives.
115

116
```python { .api }
117
class Partial[T]:
118
    """
119
    Partial validation streaming wrapper.
120
    
121
    Allows streaming validation of Pydantic models as data becomes available.
122
    Use as Partial[YourModel] to enable incremental validation.
123
    """
124
    
125
    def __class_getitem__(cls, item: type[BaseModel]) -> type[BaseModel]:
126
        """
127
        Create partial validation class for given model.
128
        
129
        Args:
130
            item: Pydantic model class to wrap
131
            
132
        Returns:
133
            Modified model class with partial validation support
134
        """
135
```
136

137
### Partial Usage Examples
138

139
```python { .api }
140
from instructor import Partial
141
from pydantic import BaseModel
142
from typing import List
143

144
class Article(BaseModel):
145
    title: str
146
    author: str
147
    content: str
148
    tags: List[str]
149
    word_count: int
150

151
# Stream partial results
152
for partial_article in client.create_partial(
153
    model="gpt-4",
154
    messages=[{
155
        "role": "user", 
156
        "content": "Write a long article about climate change"
157
    }],
158
    response_model=Partial[Article]
159
):
160
    # Display progress as fields become available
161
    if partial_article.title:
162
        print(f"Title: {partial_article.title}")
163
    
164
    if partial_article.author:
165
        print(f"Author: {partial_article.author}")
166
        
167
    if partial_article.content:
168
        print(f"Content length: {len(partial_article.content)}")
169
        
170
    if partial_article.tags:
171
        print(f"Tags so far: {partial_article.tags}")
172
        
173
    if partial_article.word_count:
174
        print(f"Word count: {partial_article.word_count}")
175

176
# Final result is fully validated
177
final_article = partial_article
178
assert isinstance(final_article, Article)
179
```
180

181
## IterableModel
182

183
Function factory for multi-task extraction that creates models capable of handling multiple instances.
184

185
```python { .api }
186
def IterableModel(
187
    subtask_class: type[BaseModel],
188
    name: Optional[str] = None,
189
    description: Optional[str] = None
190
) -> type[BaseModel]:
191
    """
192
    Create multi-task extraction wrapper.
193
    
194
    Args:
195
        subtask_class: Pydantic model class for individual tasks
196
        name: Optional name for the iterable model
197
        description: Optional description for the extraction task
198
        
199
    Returns:
200
        Model class that can extract multiple instances of subtask_class
201
    """
202
```
203

204
### IterableModel Usage Examples
205

206
```python { .api }
207
from instructor import IterableModel
208
from pydantic import BaseModel
209
from typing import List
210

211
class Task(BaseModel):
212
    name: str
213
    priority: str
214
    assigned_to: str
215
    due_date: str
216

217
# Create iterable model
218
TaskList = IterableModel(
219
    Task,
220
    name="ProjectTasks",
221
    description="Extract all tasks from project description"
222
)
223

224
# Extract multiple tasks
225
task_extraction = client.create(
226
    model="gpt-4", 
227
    messages=[{
228
        "role": "user",
229
        "content": """
230
        Project tasks:
231
        1. Design database schema (high priority, John, 2024-01-15)
232
        2. Implement API endpoints (medium priority, Sarah, 2024-01-20)  
233
        3. Write unit tests (low priority, Mike, 2024-01-25)
234
        4. Deploy to staging (high priority, John, 2024-01-30)
235
        """
236
    }],
237
    response_model=TaskList
238
)
239

240
# Access extracted tasks
241
for task in task_extraction.tasks:  # TaskList has 'tasks' attribute
242
    print(f"{task.name} - {task.priority} - {task.assigned_to}")
243

244
# Alternative: Use create_iterable directly
245
tasks = client.create_iterable(
246
    model="gpt-4",
247
    messages=[{"role": "user", "content": "Extract tasks..."}],
248
    response_model=Task
249
)
250

251
for task in tasks:
252
    print(f"Task: {task.name}")
253
```
254

255
## CitationMixin
256

257
Mixin class for adding citation tracking capabilities to models.
258

259
```python { .api }
260
class CitationMixin:
261
    """
262
    Citation tracking mixin.
263
    
264
    Add citation tracking capabilities to Pydantic models by inheriting
265
    from this mixin along with BaseModel.
266
    """
267
    
268
    citations: List[str] = Field(
269
        default_factory=list,
270
        description="Source citations for extracted information"
271
    )
272
    
273
    confidence: Optional[float] = Field(
274
        None,
275
        description="Confidence score for extraction (0.0-1.0)"
276
    )
277
    
278
    source_text: Optional[str] = Field(
279
        None, 
280
        description="Original text that information was extracted from"
281
    )
282
    
283
    def add_citation(self, citation: str) -> None:
284
        """
285
        Add citation to the model.
286
        
287
        Args:
288
            citation: Citation string to add
289
        """
290
        
291
    def has_citations(self) -> bool:
292
        """Check if model has any citations."""
293
        
294
    def get_citations(self) -> List[str]:
295
        """Get all citations for this model."""
296
```
297

298
### CitationMixin Usage Examples
299

300
```python { .api }
301
from instructor import CitationMixin
302
from pydantic import BaseModel, Field
303
from typing import List, Optional
304

305
class CitedFact(CitationMixin, BaseModel):
306
    statement: str = Field(description="The factual statement")
307
    category: str = Field(description="Category of the fact")
308
    
309
class ResearchSummary(CitationMixin, BaseModel):
310
    topic: str
311
    key_findings: List[str]
312
    methodology: str
313
    conclusion: str
314

315
# Extract with citations
316
summary = client.create(
317
    model="gpt-4",
318
    messages=[{
319
        "role": "user", 
320
        "content": """
321
        Based on the research paper 'Climate Change Impacts 2023' by Smith et al.,
322
        extract a summary. The study used satellite data from 2020-2023 and found
323
        that arctic ice decreased by 15% annually. The methodology involved thermal
324
        imaging and statistical analysis.
325
        """
326
    }],
327
    response_model=ResearchSummary
328
)
329

330
# Access citation information
331
print(f"Summary: {summary.topic}")
332
print(f"Confidence: {summary.confidence}")
333
print(f"Citations: {summary.citations}")
334
print(f"Source: {summary.source_text}")
335

336
# Manual citation management
337
summary.add_citation("Smith et al. 2023, Climate Change Impacts")
338
summary.add_citation("Arctic Research Database 2023")
339

340
if summary.has_citations():
341
    for citation in summary.get_citations():
342
        print(f"Source: {citation}")
343
```
344

345
## Advanced DSL Patterns
346

347
### Combining DSL Components
348

349
```python { .api }
350
from instructor import Maybe, Partial, IterableModel, CitationMixin
351

352
class Evidence(CitationMixin, BaseModel):
353
    claim: str
354
    supporting_data: str
355
    reliability: str
356

357
class Argument(BaseModel):
358
    thesis: str
359
    evidence: List[Evidence]
360
    counter_arguments: List[str]
361

362
# Optional iterable with citations
363
OptionalEvidenceList = Maybe(IterableModel(Evidence))
364

365
# Stream partial arguments with citations
366
PartialArgument = Partial[Argument]
367

368
# Extract optional evidence list
369
maybe_evidence = client.create(
370
    model="gpt-4",
371
    messages=[{"role": "user", "content": "Find evidence for climate change"}],
372
    response_model=OptionalEvidenceList
373
)
374

375
if maybe_evidence.is_success:
376
    evidence_list = maybe_evidence.unwrap()
377
    for evidence in evidence_list.tasks:
378
        print(f"Claim: {evidence.claim}")
379
        print(f"Citations: {evidence.citations}")
380
```
381

382
### Nested DSL Structures
383

384
```python { .api }
385
class OptionalTask(Maybe(BaseModel)):
386
    """Task that might not be extractable."""
387
    name: str
388
    description: str
389

390
class ProjectPlan(BaseModel):
391
    title: str
392
    required_tasks: List[Task]  # Always present
393
    optional_tasks: List[OptionalTask]  # May be empty or failed
394
    
395
# Extract mixed required and optional tasks
396
plan = client.create(
397
    model="gpt-4",
398
    messages=[{"role": "user", "content": "Create project plan..."}],
399
    response_model=ProjectPlan
400
)
401

402
# Handle mixed results
403
print(f"Required tasks: {len(plan.required_tasks)}")
404
for optional in plan.optional_tasks:
405
    if optional.is_success:
406
        task = optional.unwrap()
407
        print(f"Optional task: {task.name}")
408
    else:
409
        print(f"Failed to extract optional task: {optional.message}")
410
```
411

412
### Custom DSL Extensions
413

414
```python { .api }
415
from typing import TypeVar, Generic
416
from pydantic import BaseModel, Field
417

418
T = TypeVar('T', bound=BaseModel)
419

420
class Weighted(Generic[T]):
421
    """Custom DSL component for weighted results."""
422
    
423
    @classmethod
424
    def create(cls, model_class: type[T]) -> type[BaseModel]:
425
        """Create weighted version of model."""
426
        
427
        class WeightedModel(BaseModel):
428
            result: model_class
429
            weight: float = Field(
430
                ..., 
431
                ge=0.0, 
432
                le=1.0,
433
                description="Confidence weight for this result"
434
            )
435
            reasoning: str = Field(
436
                ...,
437
                description="Explanation for the assigned weight"
438
            )
439
            
440
        return WeightedModel
441

442
# Usage
443
WeightedUser = Weighted.create(User)
444

445
weighted_result = client.create(
446
    model="gpt-4",
447
    messages=[{"role": "user", "content": "Extract user with confidence"}],
448
    response_model=WeightedUser
449
)
450

451
print(f"User: {weighted_result.result.name}")
452
print(f"Weight: {weighted_result.weight}")
453
print(f"Reasoning: {weighted_result.reasoning}")
454
```

Version

Tile

Files

dsl-components.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

dsl-components.mddocs/