Tessl Tile for pypi/instructor@1.11.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

batch-processing.md client-usage.md dsl-components.md index.md modes-and-configuration.md providers.md schema-generation.md validation.md

validation.mddocs/

0
# Validation System
1

2
The instructor package provides advanced validation capabilities including LLM-powered validation and content moderation. These validators can be applied to Pydantic model fields to ensure data quality and safety.
3

4
## LLM Validator
5

6
Use Large Language Models to validate field values with custom logic and natural language instructions.
7

8
```python { .api }
9
def llm_validator(
10
    statement: str,
11
    client: Optional[Any] = None,
12
    model: Optional[str] = None,
13
    temperature: float = 0.0,
14
    max_retries: int = 3,
15
    **kwargs: Any
16
) -> Callable[[Any], Any]:
17
    """
18
    Create LLM-based field validator.
19
    
20
    Args:
21
        statement: Natural language validation instruction
22
        client: Optional LLM client (uses global default if None)
23
        model: Optional model name (uses client default if None)
24
        temperature: Sampling temperature for validation
25
        max_retries: Maximum number of validation attempts
26
        **kwargs: Additional arguments for LLM call
27
        
28
    Returns:
29
        Validator function for use with Pydantic Field
30
    """
31
```
32

33
### LLM Validator Usage Examples
34

35
```python { .api }
36
from instructor import llm_validator
37
from pydantic import BaseModel, Field
38
from typing import List
39

40
class Product(BaseModel):
41
    name: str = Field(
42
        ...,
43
        description="Product name", 
44
        validator=llm_validator(
45
            "Check if this is a valid product name that makes sense"
46
        )
47
    )
48
    
49
    price: float = Field(
50
        ...,
51
        description="Product price in USD",
52
        validator=llm_validator(
53
            "Verify this is a reasonable price for the given product name",
54
            temperature=0.1
55
        )
56
    )
57
    
58
    description: str = Field(
59
        ...,
60
        description="Product description",
61
        validator=llm_validator(
62
            "Ensure the description accurately matches the product name and is marketing-appropriate"
63
        )
64
    )
65
    
66
    category: str = Field(
67
        ..., 
68
        description="Product category",
69
        validator=llm_validator(
70
            "Validate that the category is appropriate for this type of product"
71
        )
72
    )
73

74
# Usage with custom client and model
75
class ReviewedArticle(BaseModel):
76
    title: str = Field(
77
        ...,
78
        description="Article title",
79
        validator=llm_validator(
80
            "Check if this title is engaging and grammatically correct",
81
            model="gpt-4",
82
            max_retries=2
83
        )
84
    )
85
    
86
    content: str = Field(
87
        ...,
88
        description="Article content", 
89
        validator=llm_validator(
90
            "Verify the content is well-structured, informative, and free of factual errors",
91
            model="gpt-4",
92
            temperature=0.2
93
        )
94
    )
95
    
96
    tags: List[str] = Field(
97
        ...,
98
        description="Article tags",
99
        validator=llm_validator(
100
            "Ensure all tags are relevant to the article content and properly formatted"
101
        )
102
    )
103

104
# Extract with validation
105
product = client.create(
106
    model="gpt-4",
107
    messages=[{
108
        "role": "user", 
109
        "content": "Extract product: Premium Wireless Headphones, $299, High-quality audio experience"
110
    }],
111
    response_model=Product
112
)
113
# All fields are automatically validated by LLM before returning
114
```
115

116
## OpenAI Moderation
117

118
Use OpenAI's moderation API to check content safety and compliance.
119

120
```python { .api }
121
def openai_moderation(
122
    client: Optional[Any] = None,
123
    model: str = "text-moderation-latest", 
124
    **kwargs: Any
125
) -> Callable[[Any], Any]:
126
    """
127
    Create OpenAI moderation validator.
128
    
129
    Args:
130
        client: Optional OpenAI client (uses global default if None)
131
        model: Moderation model to use
132
        **kwargs: Additional arguments for moderation call
133
        
134
    Returns:
135
        Validator function that checks content safety
136
        
137
    Raises:
138
        ValidationError: If content fails moderation check
139
    """
140
```
141

142
### OpenAI Moderation Usage Examples
143

144
```python { .api }
145
from instructor import openai_moderation
146
from pydantic import BaseModel, Field
147

148
class UserContent(BaseModel):
149
    username: str = Field(
150
        ...,
151
        description="User's chosen username",
152
        validator=openai_moderation()  # Check for inappropriate usernames
153
    )
154
    
155
    bio: str = Field(
156
        ...,
157
        description="User biography",
158
        validator=openai_moderation()  # Check bio content
159
    )
160
    
161
    post_content: str = Field(
162
        ...,
163
        description="User's post content", 
164
        validator=openai_moderation(model="text-moderation-stable")
165
    )
166

167
class SafeComment(BaseModel):
168
    author: str = Field(
169
        ..., 
170
        description="Comment author"
171
    )
172
    
173
    text: str = Field(
174
        ...,
175
        description="Comment text content",
176
        validator=openai_moderation()
177
    )
178
    
179
    is_public: bool = Field(
180
        ...,
181
        description="Whether comment should be public"
182
    )
183

184
# Extract user content with safety checking
185
user_data = client.create(
186
    model="gpt-4",
187
    messages=[{
188
        "role": "user",
189
        "content": "Extract user info: JohnDoe, 'Love hiking and photography', 'Check out my latest mountain photos!'"
190
    }],
191
    response_model=UserContent
192
)
193
# Content automatically checked for safety violations
194
```
195

196
## Combining Validators
197

198
You can combine multiple validators on the same field for comprehensive validation.
199

200
```python { .api }
201
from pydantic import Field, validator
202
from typing import Any
203

204
class QualityContent(BaseModel):
205
    title: str = Field(
206
        ...,
207
        description="Content title",
208
        validators=[
209
            llm_validator("Check if title is engaging and appropriate"),
210
            openai_moderation()  # Also check for safety
211
        ]
212
    )
213
    
214
    body: str = Field(
215
        ...,
216
        description="Content body",
217
        validators=[
218
            openai_moderation(),  # Safety first
219
            llm_validator("Verify content is well-written and informative")  # Quality second
220
        ]
221
    )
222

223
# Custom validation with both LLM and traditional validation
224
class ValidatedEmail(BaseModel):
225
    email: str = Field(
226
        ...,
227
        description="Email address",
228
        validators=[
229
            llm_validator("Verify this looks like a valid email address")
230
        ]
231
    )
232
    
233
    @validator('email')
234
    def validate_email_format(cls, v):
235
        """Traditional regex validation."""
236
        import re
237
        pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
238
        if not re.match(pattern, v):
239
            raise ValueError('Invalid email format')
240
        return v
241
```
242

243
## Advanced Validation Patterns
244

245
### Contextual Validation
246

247
```python { .api }
248
class ContextualProduct(BaseModel):
249
    category: str = Field(..., description="Product category")
250
    name: str = Field(..., description="Product name") 
251
    price: float = Field(..., description="Price in USD")
252
    
253
    @validator('price')
254
    def validate_price_for_category(cls, v, values):
255
        """Use LLM to validate price based on category context."""
256
        if 'category' in values:
257
            category = values['category']
258
            # Dynamic LLM validation based on context
259
            validator_func = llm_validator(
260
                f"Check if ${v} is a reasonable price for a {category} product"
261
            )
262
            return validator_func(v)
263
        return v
264

265
class CompanyInfo(BaseModel):
266
    name: str = Field(..., description="Company name")
267
    industry: str = Field(..., description="Industry sector")
268
    description: str = Field(
269
        ..., 
270
        description="Company description",
271
        validator=llm_validator(
272
            "Verify the description matches the company name and industry"
273
        )
274
    )
275
```
276

277
### Multi-Field Validation
278

279
```python { .api }
280
class CoherentResponse(BaseModel):
281
    question: str = Field(..., description="The original question")
282
    answer: str = Field(..., description="The answer to the question")
283
    confidence: float = Field(..., description="Confidence score 0-1")
284
    
285
    @validator('answer')
286
    def validate_answer_coherence(cls, v, values):
287
        """Validate answer coherence with question."""
288
        if 'question' in values:
289
            question = values['question']
290
            validator_func = llm_validator(
291
                f"Check if this answer '{v}' properly addresses the question '{question}'"
292
            )
293
            return validator_func(v)
294
        return v
295
    
296
    @validator('confidence')  
297
    def validate_confidence_matches_answer(cls, v, values):
298
        """Validate confidence score matches answer quality."""
299
        if 'answer' in values and 'question' in values:
300
            answer = values['answer']
301
            question = values['question']
302
            validator_func = llm_validator(
303
                f"Check if confidence score {v} is appropriate for this answer quality: '{answer}' to question '{question}'"
304
            )
305
            return validator_func(v)
306
        return v
307
```
308

309
### Custom Validation Logic
310

311
```python { .api }
312
def create_domain_validator(domain: str, rules: List[str]) -> Callable:
313
    """Create domain-specific validator with custom rules."""
314
    
315
    rule_text = "; ".join(rules)
316
    statement = f"Validate this {domain} data according to these rules: {rule_text}"
317
    
318
    return llm_validator(statement, temperature=0.1)
319

320
class MedicalRecord(BaseModel):
321
    patient_id: str = Field(
322
        ...,
323
        description="Patient identifier", 
324
        validator=create_domain_validator("medical", [
325
            "Must be properly anonymized", 
326
            "Should not contain personally identifiable information",
327
            "Must follow HIPAA guidelines"
328
        ])
329
    )
330
    
331
    diagnosis: str = Field(
332
        ...,
333
        description="Medical diagnosis",
334
        validator=create_domain_validator("medical", [
335
            "Must use proper medical terminology",
336
            "Should be specific and accurate", 
337
            "Must be a valid medical condition"
338
        ])
339
    )
340
    
341
    treatment: str = Field(
342
        ...,
343
        description="Prescribed treatment",
344
        validator=llm_validator(
345
            "Verify this treatment is appropriate for the given diagnosis",
346
            model="gpt-4",
347
            max_retries=1
348
        )
349
    )
350
```
351

352
## Error Handling and Debugging
353

354
```python { .api }
355
from pydantic import ValidationError
356
import logging
357

358
# Set up logging to debug validation issues
359
logging.basicConfig(level=logging.DEBUG)
360

361
class DebugValidatedModel(BaseModel):
362
    content: str = Field(
363
        ...,
364
        description="Content to validate",
365
        validator=llm_validator(
366
            "Check if content is appropriate and well-written",
367
            max_retries=2
368
        )
369
    )
370

371
try:
372
    result = client.create(
373
        model="gpt-4",
374
        messages=[{"role": "user", "content": "Extract: Some problematic content"}],
375
        response_model=DebugValidatedModel
376
    )
377
except ValidationError as e:
378
    print(f"Validation failed: {e}")
379
    for error in e.errors():
380
        print(f"Field: {error['loc']}")
381
        print(f"Error: {error['msg']}")
382
        print(f"Type: {error['type']}")
383

384
# Custom error handling for moderation failures
385
class SafeUserInput(BaseModel):
386
    message: str = Field(
387
        ...,
388
        description="User message",
389
        validator=openai_moderation()
390
    )
391

392
def safe_extract(user_input: str) -> SafeUserInput | None:
393
    """Safely extract user input with moderation."""
394
    try:
395
        return client.create(
396
            model="gpt-4",
397
            messages=[{"role": "user", "content": f"Extract: {user_input}"}],
398
            response_model=SafeUserInput
399
        )
400
    except ValidationError as e:
401
        # Check if it's a moderation failure
402
        moderation_errors = [
403
            error for error in e.errors() 
404
            if 'moderation' in str(error.get('type', ''))
405
        ]
406
        if moderation_errors:
407
            logging.warning(f"Content failed moderation: {user_input}")
408
            return None
409
        else:
410
            # Re-raise other validation errors
411
            raise
412
```
413

414
## Performance Considerations
415

416
```python { .api }
417
# Validation caching for repeated patterns
418
from functools import lru_cache
419

420
@lru_cache(maxsize=1000)
421
def cached_llm_validator(statement: str, value: str) -> bool:
422
    """Cached validation to avoid repeated LLM calls."""
423
    validator_func = llm_validator(statement)
424
    try:
425
        validator_func(value)
426
        return True
427
    except ValidationError:
428
        return False
429

430
class OptimizedModel(BaseModel):
431
    """Model with performance-optimized validation."""
432
    
433
    email: str = Field(
434
        ...,
435
        description="Email address"
436
    )
437
    
438
    @validator('email')
439
    def validate_email_cached(cls, v):
440
        """Use cached validation for common patterns."""
441
        if cached_llm_validator("Check if this is a valid email", v):
442
            return v
443
        else:
444
            raise ValueError("Email validation failed")
445

446
# Batch validation for multiple items
447
def validate_batch_with_llm(items: List[str], validation_rule: str) -> List[bool]:
448
    """Validate multiple items in a single LLM call."""
449
    
450
    batch_prompt = f"""
451
    Validate each of these items according to the rule: {validation_rule}
452
    
453
    Items:
454
    {chr(10).join(f"{i+1}. {item}" for i, item in enumerate(items))}
455
    
456
    Return a list of True/False for each item.
457
    """
458
    
459
    # Implementation would use LLM to validate all items at once
460
    # This is more efficient than individual validation calls
461
    pass
462
```

Version

Tile

Files

validation.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

validation.mddocs/