or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

batch-processing.mdclient-usage.mddsl-components.mdindex.mdmodes-and-configuration.mdproviders.mdschema-generation.mdvalidation.md

validation.mddocs/

0

# Validation System

1

2

The instructor package provides advanced validation capabilities including LLM-powered validation and content moderation. These validators can be applied to Pydantic model fields to ensure data quality and safety.

3

4

## LLM Validator

5

6

Use Large Language Models to validate field values with custom logic and natural language instructions.

7

8

```python { .api }

9

def llm_validator(

10

statement: str,

11

client: Optional[Any] = None,

12

model: Optional[str] = None,

13

temperature: float = 0.0,

14

max_retries: int = 3,

15

**kwargs: Any

16

) -> Callable[[Any], Any]:

17

"""

18

Create LLM-based field validator.

19

20

Args:

21

statement: Natural language validation instruction

22

client: Optional LLM client (uses global default if None)

23

model: Optional model name (uses client default if None)

24

temperature: Sampling temperature for validation

25

max_retries: Maximum number of validation attempts

26

**kwargs: Additional arguments for LLM call

27

28

Returns:

29

Validator function for use with Pydantic Field

30

"""

31

```

32

33

### LLM Validator Usage Examples

34

35

```python { .api }

36

from instructor import llm_validator

37

from pydantic import BaseModel, Field

38

from typing import List

39

40

class Product(BaseModel):

41

name: str = Field(

42

...,

43

description="Product name",

44

validator=llm_validator(

45

"Check if this is a valid product name that makes sense"

46

)

47

)

48

49

price: float = Field(

50

...,

51

description="Product price in USD",

52

validator=llm_validator(

53

"Verify this is a reasonable price for the given product name",

54

temperature=0.1

55

)

56

)

57

58

description: str = Field(

59

...,

60

description="Product description",

61

validator=llm_validator(

62

"Ensure the description accurately matches the product name and is marketing-appropriate"

63

)

64

)

65

66

category: str = Field(

67

...,

68

description="Product category",

69

validator=llm_validator(

70

"Validate that the category is appropriate for this type of product"

71

)

72

)

73

74

# Usage with custom client and model

75

class ReviewedArticle(BaseModel):

76

title: str = Field(

77

...,

78

description="Article title",

79

validator=llm_validator(

80

"Check if this title is engaging and grammatically correct",

81

model="gpt-4",

82

max_retries=2

83

)

84

)

85

86

content: str = Field(

87

...,

88

description="Article content",

89

validator=llm_validator(

90

"Verify the content is well-structured, informative, and free of factual errors",

91

model="gpt-4",

92

temperature=0.2

93

)

94

)

95

96

tags: List[str] = Field(

97

...,

98

description="Article tags",

99

validator=llm_validator(

100

"Ensure all tags are relevant to the article content and properly formatted"

101

)

102

)

103

104

# Extract with validation

105

product = client.create(

106

model="gpt-4",

107

messages=[{

108

"role": "user",

109

"content": "Extract product: Premium Wireless Headphones, $299, High-quality audio experience"

110

}],

111

response_model=Product

112

)

113

# All fields are automatically validated by LLM before returning

114

```

115

116

## OpenAI Moderation

117

118

Use OpenAI's moderation API to check content safety and compliance.

119

120

```python { .api }

121

def openai_moderation(

122

client: Optional[Any] = None,

123

model: str = "text-moderation-latest",

124

**kwargs: Any

125

) -> Callable[[Any], Any]:

126

"""

127

Create OpenAI moderation validator.

128

129

Args:

130

client: Optional OpenAI client (uses global default if None)

131

model: Moderation model to use

132

**kwargs: Additional arguments for moderation call

133

134

Returns:

135

Validator function that checks content safety

136

137

Raises:

138

ValidationError: If content fails moderation check

139

"""

140

```

141

142

### OpenAI Moderation Usage Examples

143

144

```python { .api }

145

from instructor import openai_moderation

146

from pydantic import BaseModel, Field

147

148

class UserContent(BaseModel):

149

username: str = Field(

150

...,

151

description="User's chosen username",

152

validator=openai_moderation() # Check for inappropriate usernames

153

)

154

155

bio: str = Field(

156

...,

157

description="User biography",

158

validator=openai_moderation() # Check bio content

159

)

160

161

post_content: str = Field(

162

...,

163

description="User's post content",

164

validator=openai_moderation(model="text-moderation-stable")

165

)

166

167

class SafeComment(BaseModel):

168

author: str = Field(

169

...,

170

description="Comment author"

171

)

172

173

text: str = Field(

174

...,

175

description="Comment text content",

176

validator=openai_moderation()

177

)

178

179

is_public: bool = Field(

180

...,

181

description="Whether comment should be public"

182

)

183

184

# Extract user content with safety checking

185

user_data = client.create(

186

model="gpt-4",

187

messages=[{

188

"role": "user",

189

"content": "Extract user info: JohnDoe, 'Love hiking and photography', 'Check out my latest mountain photos!'"

190

}],

191

response_model=UserContent

192

)

193

# Content automatically checked for safety violations

194

```

195

196

## Combining Validators

197

198

You can combine multiple validators on the same field for comprehensive validation.

199

200

```python { .api }

201

from pydantic import Field, validator

202

from typing import Any

203

204

class QualityContent(BaseModel):

205

title: str = Field(

206

...,

207

description="Content title",

208

validators=[

209

llm_validator("Check if title is engaging and appropriate"),

210

openai_moderation() # Also check for safety

211

]

212

)

213

214

body: str = Field(

215

...,

216

description="Content body",

217

validators=[

218

openai_moderation(), # Safety first

219

llm_validator("Verify content is well-written and informative") # Quality second

220

]

221

)

222

223

# Custom validation with both LLM and traditional validation

224

class ValidatedEmail(BaseModel):

225

email: str = Field(

226

...,

227

description="Email address",

228

validators=[

229

llm_validator("Verify this looks like a valid email address")

230

]

231

)

232

233

@validator('email')

234

def validate_email_format(cls, v):

235

"""Traditional regex validation."""

236

import re

237

pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'

238

if not re.match(pattern, v):

239

raise ValueError('Invalid email format')

240

return v

241

```

242

243

## Advanced Validation Patterns

244

245

### Contextual Validation

246

247

```python { .api }

248

class ContextualProduct(BaseModel):

249

category: str = Field(..., description="Product category")

250

name: str = Field(..., description="Product name")

251

price: float = Field(..., description="Price in USD")

252

253

@validator('price')

254

def validate_price_for_category(cls, v, values):

255

"""Use LLM to validate price based on category context."""

256

if 'category' in values:

257

category = values['category']

258

# Dynamic LLM validation based on context

259

validator_func = llm_validator(

260

f"Check if ${v} is a reasonable price for a {category} product"

261

)

262

return validator_func(v)

263

return v

264

265

class CompanyInfo(BaseModel):

266

name: str = Field(..., description="Company name")

267

industry: str = Field(..., description="Industry sector")

268

description: str = Field(

269

...,

270

description="Company description",

271

validator=llm_validator(

272

"Verify the description matches the company name and industry"

273

)

274

)

275

```

276

277

### Multi-Field Validation

278

279

```python { .api }

280

class CoherentResponse(BaseModel):

281

question: str = Field(..., description="The original question")

282

answer: str = Field(..., description="The answer to the question")

283

confidence: float = Field(..., description="Confidence score 0-1")

284

285

@validator('answer')

286

def validate_answer_coherence(cls, v, values):

287

"""Validate answer coherence with question."""

288

if 'question' in values:

289

question = values['question']

290

validator_func = llm_validator(

291

f"Check if this answer '{v}' properly addresses the question '{question}'"

292

)

293

return validator_func(v)

294

return v

295

296

@validator('confidence')

297

def validate_confidence_matches_answer(cls, v, values):

298

"""Validate confidence score matches answer quality."""

299

if 'answer' in values and 'question' in values:

300

answer = values['answer']

301

question = values['question']

302

validator_func = llm_validator(

303

f"Check if confidence score {v} is appropriate for this answer quality: '{answer}' to question '{question}'"

304

)

305

return validator_func(v)

306

return v

307

```

308

309

### Custom Validation Logic

310

311

```python { .api }

312

def create_domain_validator(domain: str, rules: List[str]) -> Callable:

313

"""Create domain-specific validator with custom rules."""

314

315

rule_text = "; ".join(rules)

316

statement = f"Validate this {domain} data according to these rules: {rule_text}"

317

318

return llm_validator(statement, temperature=0.1)

319

320

class MedicalRecord(BaseModel):

321

patient_id: str = Field(

322

...,

323

description="Patient identifier",

324

validator=create_domain_validator("medical", [

325

"Must be properly anonymized",

326

"Should not contain personally identifiable information",

327

"Must follow HIPAA guidelines"

328

])

329

)

330

331

diagnosis: str = Field(

332

...,

333

description="Medical diagnosis",

334

validator=create_domain_validator("medical", [

335

"Must use proper medical terminology",

336

"Should be specific and accurate",

337

"Must be a valid medical condition"

338

])

339

)

340

341

treatment: str = Field(

342

...,

343

description="Prescribed treatment",

344

validator=llm_validator(

345

"Verify this treatment is appropriate for the given diagnosis",

346

model="gpt-4",

347

max_retries=1

348

)

349

)

350

```

351

352

## Error Handling and Debugging

353

354

```python { .api }

355

from pydantic import ValidationError

356

import logging

357

358

# Set up logging to debug validation issues

359

logging.basicConfig(level=logging.DEBUG)

360

361

class DebugValidatedModel(BaseModel):

362

content: str = Field(

363

...,

364

description="Content to validate",

365

validator=llm_validator(

366

"Check if content is appropriate and well-written",

367

max_retries=2

368

)

369

)

370

371

try:

372

result = client.create(

373

model="gpt-4",

374

messages=[{"role": "user", "content": "Extract: Some problematic content"}],

375

response_model=DebugValidatedModel

376

)

377

except ValidationError as e:

378

print(f"Validation failed: {e}")

379

for error in e.errors():

380

print(f"Field: {error['loc']}")

381

print(f"Error: {error['msg']}")

382

print(f"Type: {error['type']}")

383

384

# Custom error handling for moderation failures

385

class SafeUserInput(BaseModel):

386

message: str = Field(

387

...,

388

description="User message",

389

validator=openai_moderation()

390

)

391

392

def safe_extract(user_input: str) -> SafeUserInput | None:

393

"""Safely extract user input with moderation."""

394

try:

395

return client.create(

396

model="gpt-4",

397

messages=[{"role": "user", "content": f"Extract: {user_input}"}],

398

response_model=SafeUserInput

399

)

400

except ValidationError as e:

401

# Check if it's a moderation failure

402

moderation_errors = [

403

error for error in e.errors()

404

if 'moderation' in str(error.get('type', ''))

405

]

406

if moderation_errors:

407

logging.warning(f"Content failed moderation: {user_input}")

408

return None

409

else:

410

# Re-raise other validation errors

411

raise

412

```

413

414

## Performance Considerations

415

416

```python { .api }

417

# Validation caching for repeated patterns

418

from functools import lru_cache

419

420

@lru_cache(maxsize=1000)

421

def cached_llm_validator(statement: str, value: str) -> bool:

422

"""Cached validation to avoid repeated LLM calls."""

423

validator_func = llm_validator(statement)

424

try:

425

validator_func(value)

426

return True

427

except ValidationError:

428

return False

429

430

class OptimizedModel(BaseModel):

431

"""Model with performance-optimized validation."""

432

433

email: str = Field(

434

...,

435

description="Email address"

436

)

437

438

@validator('email')

439

def validate_email_cached(cls, v):

440

"""Use cached validation for common patterns."""

441

if cached_llm_validator("Check if this is a valid email", v):

442

return v

443

else:

444

raise ValueError("Email validation failed")

445

446

# Batch validation for multiple items

447

def validate_batch_with_llm(items: List[str], validation_rule: str) -> List[bool]:

448

"""Validate multiple items in a single LLM call."""

449

450

batch_prompt = f"""

451

Validate each of these items according to the rule: {validation_rule}

452

453

Items:

454

{chr(10).join(f"{i+1}. {item}" for i, item in enumerate(items))}

455

456

Return a list of True/False for each item.

457

"""

458

459

# Implementation would use LLM to validate all items at once

460

# This is more efficient than individual validation calls

461

pass

462

```