or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

batch-processing.mdclient-usage.mddsl-components.mdindex.mdmodes-and-configuration.mdproviders.mdschema-generation.mdvalidation.md

dsl-components.mddocs/

0

# DSL Components

1

2

The instructor package provides a domain-specific language (DSL) for advanced extraction patterns. These components enable optional extraction, streaming validation, multi-task extraction, and citation tracking.

3

4

## Maybe

5

6

Optional result wrapper for handling cases where extraction might fail or return no data.

7

8

```python { .api }

9

def Maybe(model: type[T]) -> type[MaybeBase[T]]:

10

"""

11

Create optional result wrapper.

12

13

Args:

14

model: Pydantic model class to wrap

15

16

Returns:

17

MaybeBase subclass that can represent success or failure

18

"""

19

20

class MaybeBase[T]:

21

"""Base class for Maybe results."""

22

23

result: T | None

24

error: bool

25

message: str | None

26

27

def __init__(

28

self,

29

result: T | None = None,

30

error: bool = False,

31

message: str | None = None

32

) -> None:

33

"""

34

Initialize Maybe result.

35

36

Args:

37

result: The extracted model instance (None if failed)

38

error: Whether an error occurred during extraction

39

message: Optional error message or explanation

40

"""

41

42

@property

43

def is_success(self) -> bool:

44

"""Check if extraction was successful."""

45

46

@property

47

def is_failure(self) -> bool:

48

"""Check if extraction failed."""

49

50

def unwrap(self) -> T:

51

"""

52

Get the result, raising exception if failed.

53

54

Returns:

55

The extracted model instance

56

57

Raises:

58

ValueError: If extraction failed

59

"""

60

61

def unwrap_or(self, default: T) -> T:

62

"""

63

Get the result or return default if failed.

64

65

Args:

66

default: Value to return if extraction failed

67

68

Returns:

69

The extracted model or default value

70

"""

71

```

72

73

### Maybe Usage Examples

74

75

```python { .api }

76

from instructor import Maybe

77

from pydantic import BaseModel

78

79

class User(BaseModel):

80

name: str

81

email: str

82

age: int

83

84

# Create optional wrapper

85

OptionalUser = Maybe(User)

86

87

# Use in extraction

88

maybe_user = client.create(

89

model="gpt-4",

90

messages=[{"role": "user", "content": "No user data here"}],

91

response_model=OptionalUser

92

)

93

94

# Check result

95

if maybe_user.is_success:

96

user = maybe_user.unwrap()

97

print(f"Extracted: {user.name}")

98

else:

99

print(f"Extraction failed: {maybe_user.message}")

100

101

# Use with default

102

user = maybe_user.unwrap_or(User(name="Unknown", email="", age=0))

103

104

# Pattern matching style

105

match maybe_user:

106

case OptionalUser(result=user) if maybe_user.is_success:

107

print(f"Success: {user}")

108

case OptionalUser(error=True, message=msg):

109

print(f"Failed: {msg}")

110

```

111

112

## Partial

113

114

Generic class for streaming validation that allows partial model validation as data arrives.

115

116

```python { .api }

117

class Partial[T]:

118

"""

119

Partial validation streaming wrapper.

120

121

Allows streaming validation of Pydantic models as data becomes available.

122

Use as Partial[YourModel] to enable incremental validation.

123

"""

124

125

def __class_getitem__(cls, item: type[BaseModel]) -> type[BaseModel]:

126

"""

127

Create partial validation class for given model.

128

129

Args:

130

item: Pydantic model class to wrap

131

132

Returns:

133

Modified model class with partial validation support

134

"""

135

```

136

137

### Partial Usage Examples

138

139

```python { .api }

140

from instructor import Partial

141

from pydantic import BaseModel

142

from typing import List

143

144

class Article(BaseModel):

145

title: str

146

author: str

147

content: str

148

tags: List[str]

149

word_count: int

150

151

# Stream partial results

152

for partial_article in client.create_partial(

153

model="gpt-4",

154

messages=[{

155

"role": "user",

156

"content": "Write a long article about climate change"

157

}],

158

response_model=Partial[Article]

159

):

160

# Display progress as fields become available

161

if partial_article.title:

162

print(f"Title: {partial_article.title}")

163

164

if partial_article.author:

165

print(f"Author: {partial_article.author}")

166

167

if partial_article.content:

168

print(f"Content length: {len(partial_article.content)}")

169

170

if partial_article.tags:

171

print(f"Tags so far: {partial_article.tags}")

172

173

if partial_article.word_count:

174

print(f"Word count: {partial_article.word_count}")

175

176

# Final result is fully validated

177

final_article = partial_article

178

assert isinstance(final_article, Article)

179

```

180

181

## IterableModel

182

183

Function factory for multi-task extraction that creates models capable of handling multiple instances.

184

185

```python { .api }

186

def IterableModel(

187

subtask_class: type[BaseModel],

188

name: Optional[str] = None,

189

description: Optional[str] = None

190

) -> type[BaseModel]:

191

"""

192

Create multi-task extraction wrapper.

193

194

Args:

195

subtask_class: Pydantic model class for individual tasks

196

name: Optional name for the iterable model

197

description: Optional description for the extraction task

198

199

Returns:

200

Model class that can extract multiple instances of subtask_class

201

"""

202

```

203

204

### IterableModel Usage Examples

205

206

```python { .api }

207

from instructor import IterableModel

208

from pydantic import BaseModel

209

from typing import List

210

211

class Task(BaseModel):

212

name: str

213

priority: str

214

assigned_to: str

215

due_date: str

216

217

# Create iterable model

218

TaskList = IterableModel(

219

Task,

220

name="ProjectTasks",

221

description="Extract all tasks from project description"

222

)

223

224

# Extract multiple tasks

225

task_extraction = client.create(

226

model="gpt-4",

227

messages=[{

228

"role": "user",

229

"content": """

230

Project tasks:

231

1. Design database schema (high priority, John, 2024-01-15)

232

2. Implement API endpoints (medium priority, Sarah, 2024-01-20)

233

3. Write unit tests (low priority, Mike, 2024-01-25)

234

4. Deploy to staging (high priority, John, 2024-01-30)

235

"""

236

}],

237

response_model=TaskList

238

)

239

240

# Access extracted tasks

241

for task in task_extraction.tasks: # TaskList has 'tasks' attribute

242

print(f"{task.name} - {task.priority} - {task.assigned_to}")

243

244

# Alternative: Use create_iterable directly

245

tasks = client.create_iterable(

246

model="gpt-4",

247

messages=[{"role": "user", "content": "Extract tasks..."}],

248

response_model=Task

249

)

250

251

for task in tasks:

252

print(f"Task: {task.name}")

253

```

254

255

## CitationMixin

256

257

Mixin class for adding citation tracking capabilities to models.

258

259

```python { .api }

260

class CitationMixin:

261

"""

262

Citation tracking mixin.

263

264

Add citation tracking capabilities to Pydantic models by inheriting

265

from this mixin along with BaseModel.

266

"""

267

268

citations: List[str] = Field(

269

default_factory=list,

270

description="Source citations for extracted information"

271

)

272

273

confidence: Optional[float] = Field(

274

None,

275

description="Confidence score for extraction (0.0-1.0)"

276

)

277

278

source_text: Optional[str] = Field(

279

None,

280

description="Original text that information was extracted from"

281

)

282

283

def add_citation(self, citation: str) -> None:

284

"""

285

Add citation to the model.

286

287

Args:

288

citation: Citation string to add

289

"""

290

291

def has_citations(self) -> bool:

292

"""Check if model has any citations."""

293

294

def get_citations(self) -> List[str]:

295

"""Get all citations for this model."""

296

```

297

298

### CitationMixin Usage Examples

299

300

```python { .api }

301

from instructor import CitationMixin

302

from pydantic import BaseModel, Field

303

from typing import List, Optional

304

305

class CitedFact(CitationMixin, BaseModel):

306

statement: str = Field(description="The factual statement")

307

category: str = Field(description="Category of the fact")

308

309

class ResearchSummary(CitationMixin, BaseModel):

310

topic: str

311

key_findings: List[str]

312

methodology: str

313

conclusion: str

314

315

# Extract with citations

316

summary = client.create(

317

model="gpt-4",

318

messages=[{

319

"role": "user",

320

"content": """

321

Based on the research paper 'Climate Change Impacts 2023' by Smith et al.,

322

extract a summary. The study used satellite data from 2020-2023 and found

323

that arctic ice decreased by 15% annually. The methodology involved thermal

324

imaging and statistical analysis.

325

"""

326

}],

327

response_model=ResearchSummary

328

)

329

330

# Access citation information

331

print(f"Summary: {summary.topic}")

332

print(f"Confidence: {summary.confidence}")

333

print(f"Citations: {summary.citations}")

334

print(f"Source: {summary.source_text}")

335

336

# Manual citation management

337

summary.add_citation("Smith et al. 2023, Climate Change Impacts")

338

summary.add_citation("Arctic Research Database 2023")

339

340

if summary.has_citations():

341

for citation in summary.get_citations():

342

print(f"Source: {citation}")

343

```

344

345

## Advanced DSL Patterns

346

347

### Combining DSL Components

348

349

```python { .api }

350

from instructor import Maybe, Partial, IterableModel, CitationMixin

351

352

class Evidence(CitationMixin, BaseModel):

353

claim: str

354

supporting_data: str

355

reliability: str

356

357

class Argument(BaseModel):

358

thesis: str

359

evidence: List[Evidence]

360

counter_arguments: List[str]

361

362

# Optional iterable with citations

363

OptionalEvidenceList = Maybe(IterableModel(Evidence))

364

365

# Stream partial arguments with citations

366

PartialArgument = Partial[Argument]

367

368

# Extract optional evidence list

369

maybe_evidence = client.create(

370

model="gpt-4",

371

messages=[{"role": "user", "content": "Find evidence for climate change"}],

372

response_model=OptionalEvidenceList

373

)

374

375

if maybe_evidence.is_success:

376

evidence_list = maybe_evidence.unwrap()

377

for evidence in evidence_list.tasks:

378

print(f"Claim: {evidence.claim}")

379

print(f"Citations: {evidence.citations}")

380

```

381

382

### Nested DSL Structures

383

384

```python { .api }

385

class OptionalTask(Maybe(BaseModel)):

386

"""Task that might not be extractable."""

387

name: str

388

description: str

389

390

class ProjectPlan(BaseModel):

391

title: str

392

required_tasks: List[Task] # Always present

393

optional_tasks: List[OptionalTask] # May be empty or failed

394

395

# Extract mixed required and optional tasks

396

plan = client.create(

397

model="gpt-4",

398

messages=[{"role": "user", "content": "Create project plan..."}],

399

response_model=ProjectPlan

400

)

401

402

# Handle mixed results

403

print(f"Required tasks: {len(plan.required_tasks)}")

404

for optional in plan.optional_tasks:

405

if optional.is_success:

406

task = optional.unwrap()

407

print(f"Optional task: {task.name}")

408

else:

409

print(f"Failed to extract optional task: {optional.message}")

410

```

411

412

### Custom DSL Extensions

413

414

```python { .api }

415

from typing import TypeVar, Generic

416

from pydantic import BaseModel, Field

417

418

T = TypeVar('T', bound=BaseModel)

419

420

class Weighted(Generic[T]):

421

"""Custom DSL component for weighted results."""

422

423

@classmethod

424

def create(cls, model_class: type[T]) -> type[BaseModel]:

425

"""Create weighted version of model."""

426

427

class WeightedModel(BaseModel):

428

result: model_class

429

weight: float = Field(

430

...,

431

ge=0.0,

432

le=1.0,

433

description="Confidence weight for this result"

434

)

435

reasoning: str = Field(

436

...,

437

description="Explanation for the assigned weight"

438

)

439

440

return WeightedModel

441

442

# Usage

443

WeightedUser = Weighted.create(User)

444

445

weighted_result = client.create(

446

model="gpt-4",

447

messages=[{"role": "user", "content": "Extract user with confidence"}],

448

response_model=WeightedUser

449

)

450

451

print(f"User: {weighted_result.result.name}")

452

print(f"Weight: {weighted_result.weight}")

453

print(f"Reasoning: {weighted_result.reasoning}")

454

```