0
# Data Models
1
2
Core data models for projects, tasks, runs, and configurations. These models represent the fundamental structures for organizing AI work in Kiln, including project management, task definitions, execution tracking, and data source metadata.
3
4
## Capabilities
5
6
### Project Management
7
8
Project class represents a Kiln project containing related tasks, organized in a file-based directory structure.
9
10
```python { .api }
11
class Project:
12
"""
13
Represents a Kiln project containing related tasks.
14
15
Properties:
16
- name (str): Project name
17
- description (str): Project description
18
- path (str): File system path to project directory
19
- id (str): Unique project identifier
20
"""
21
22
def tasks(self) -> list:
23
"""
24
Get all tasks in the project.
25
26
Returns:
27
list: List of Task instances
28
"""
29
30
def documents(self, readonly: bool = False) -> list:
31
"""
32
Get all documents in the project.
33
34
Parameters:
35
- readonly (bool): Whether to load in read-only mode
36
37
Returns:
38
list: List of Document instances
39
"""
40
41
def extractor_configs(self, readonly: bool = False) -> list:
42
"""
43
Get all extractor configurations in the project.
44
45
Parameters:
46
- readonly (bool): Whether to load in read-only mode
47
48
Returns:
49
list: List of ExtractorConfig instances
50
"""
51
52
def chunker_configs(self, readonly: bool = False) -> list:
53
"""
54
Get all chunker configurations in the project.
55
56
Parameters:
57
- readonly (bool): Whether to load in read-only mode
58
59
Returns:
60
list: List of ChunkerConfig instances
61
"""
62
63
def embedding_configs(self, readonly: bool = False) -> list:
64
"""
65
Get all embedding configurations in the project.
66
67
Parameters:
68
- readonly (bool): Whether to load in read-only mode
69
70
Returns:
71
list: List of EmbeddingConfig instances
72
"""
73
74
def rag_configs(self, readonly: bool = False) -> list:
75
"""
76
Get all RAG configurations in the project.
77
78
Parameters:
79
- readonly (bool): Whether to load in read-only mode
80
81
Returns:
82
list: List of RagConfig instances
83
"""
84
85
def vector_store_configs(self, readonly: bool = False) -> list:
86
"""
87
Get all vector store configurations in the project.
88
89
Parameters:
90
- readonly (bool): Whether to load in read-only mode
91
92
Returns:
93
list: List of VectorStoreConfig instances
94
"""
95
96
def external_tool_servers(self, readonly: bool = False) -> list:
97
"""
98
Get all external tool servers in the project.
99
100
Parameters:
101
- readonly (bool): Whether to load in read-only mode
102
103
Returns:
104
list: List of ExternalToolServer instances
105
"""
106
107
@staticmethod
108
def load_from_file(path: str) -> 'Project':
109
"""
110
Load project from .kiln file.
111
112
Parameters:
113
- path (str): Path to project.kiln file
114
115
Returns:
116
Project instance
117
"""
118
119
def save_to_file(self) -> None:
120
"""Save project to .kiln file."
121
```
122
123
### Task Definition
124
125
Task class defines an AI task with instructions, schemas, and requirements.
126
127
```python { .api }
128
class Task:
129
"""
130
Represents an AI task with instructions and schemas.
131
132
Properties:
133
- name (str): Task name
134
- description (str | None): Task description
135
- instruction (str): Instructions for completing the task
136
- input_json_schema (str | None): JSON schema for validating inputs
137
- output_json_schema (str | None): JSON schema for validating outputs
138
- requirements (list[TaskRequirement]): Requirements that outputs must satisfy (default: [])
139
- thinking_instruction (str | None): Instructions for model thinking/reasoning before answering
140
- default_run_config_id (str | None): ID of default run config to use for this task
141
- path (str): File system path to task directory
142
- id (str): Unique task identifier
143
- parent (Project | None): Parent project
144
"""
145
146
def runs(self, readonly: bool = False) -> list[TaskRun]:
147
"""
148
Get all runs for this task.
149
150
Parameters:
151
- readonly (bool): Whether to load in read-only mode
152
153
Returns:
154
list[TaskRun]: List of task run instances
155
"""
156
157
def dataset_splits(self, readonly: bool = False) -> list:
158
"""
159
Get all dataset splits for this task.
160
161
Parameters:
162
- readonly (bool): Whether to load in read-only mode
163
164
Returns:
165
list[DatasetSplit]: List of dataset split instances
166
"""
167
168
def finetunes(self, readonly: bool = False) -> list:
169
"""
170
Get all fine-tunes for this task.
171
172
Parameters:
173
- readonly (bool): Whether to load in read-only mode
174
175
Returns:
176
list[Finetune]: List of fine-tune instances
177
"""
178
179
def prompts(self, readonly: bool = False) -> list:
180
"""
181
Get all prompts for this task.
182
183
Parameters:
184
- readonly (bool): Whether to load in read-only mode
185
186
Returns:
187
list[Prompt]: List of prompt instances
188
"""
189
190
def evals(self, readonly: bool = False) -> list:
191
"""
192
Get all evaluations for this task.
193
194
Parameters:
195
- readonly (bool): Whether to load in read-only mode
196
197
Returns:
198
list[Eval]: List of evaluation instances
199
"""
200
201
def run_configs(self, readonly: bool = False) -> list:
202
"""
203
Get all run configurations for this task.
204
205
Parameters:
206
- readonly (bool): Whether to load in read-only mode
207
208
Returns:
209
list[TaskRunConfig]: List of run config instances
210
"""
211
212
def output_schema(self) -> dict | None:
213
"""
214
Get parsed output JSON schema as dictionary.
215
216
Returns:
217
dict | None: Parsed schema or None if not set
218
"""
219
220
def input_schema(self) -> dict | None:
221
"""
222
Get parsed input JSON schema as dictionary.
223
224
Returns:
225
dict | None: Parsed schema or None if not set
226
"""
227
228
def parent_project(self) -> 'Project' | None:
229
"""
230
Get parent project of this task.
231
232
Returns:
233
Project | None: Parent project or None
234
"""
235
236
@staticmethod
237
def load_from_file(path: str) -> 'Task':
238
"""
239
Load task from .kiln file.
240
241
Parameters:
242
- path (str): Path to task.kiln file
243
244
Returns:
245
Task instance
246
"""
247
248
def save_to_file(self) -> None:
249
"""Save task to .kiln file."""
250
251
class TaskRequirement:
252
"""
253
Requirements that task outputs must satisfy.
254
255
Properties:
256
- id (str): Unique requirement identifier
257
- name (str): Requirement name
258
- description (str | None): Optional description
259
- instruction (str): Instructions for meeting the requirement
260
- priority (Priority): Requirement priority level (p0-p3, default: p2)
261
- type (TaskOutputRatingType): Type of rating (five_star, pass_fail, pass_fail_critical, custom; default: five_star)
262
"""
263
```
264
265
### Task Run Configuration
266
267
TaskRunConfig defines a complete configuration for running a task, including model, provider, prompt, and parameters.
268
269
```python { .api }
270
class TaskRunConfig:
271
"""
272
Configuration for running a task (persisted in Kiln Project under a task).
273
274
A run config includes everything needed to run a task except the input. Running the same
275
RunConfig with the same input should make identical calls to the model (output may vary
276
as models are non-deterministic).
277
278
Properties:
279
- name (str): Run config name
280
- description (str | None): Optional description
281
- run_config_properties (RunConfigProperties): Complete run configuration properties
282
- prompt (BasePrompt | None): Frozen prompt to use (for consistency with dynamic prompts)
283
- id (str): Unique identifier
284
- parent (Task): Parent task
285
"""
286
287
def parent_task(self) -> 'Task' | None:
288
"""
289
Get parent task of this run config.
290
291
Returns:
292
Task | None: Parent task or None
293
"""
294
295
@staticmethod
296
def load_from_file(path: str) -> 'TaskRunConfig':
297
"""
298
Load run config from .kiln file.
299
300
Parameters:
301
- path (str): Path to run_config.kiln file
302
303
Returns:
304
TaskRunConfig instance
305
"""
306
307
def save_to_file(self) -> None:
308
"""Save run config to .kiln file."""
309
310
class RunConfigProperties:
311
"""
312
Properties defining how to run a task (model, provider, parameters, etc.).
313
314
Running the same RunConfigProperties with the same input should make identical
315
calls to the model (output may vary as models are non-deterministic).
316
317
Properties:
318
- model_name (str): Model identifier to use
319
- model_provider_name (ModelProviderName): Provider to use
320
- prompt_id (PromptId): Prompt type to use (defaults to simple if not provided)
321
- top_p (float): Top-p sampling parameter (0-1, default: 1.0)
322
- temperature (float): Temperature sampling parameter (0-2, default: 1.0)
323
- structured_output_mode (StructuredOutputMode): How to handle structured JSON output
324
- tools_config (ToolsRunConfig | None): Tools available to the model
325
"""
326
```
327
328
### Task Execution
329
330
TaskRun represents a single execution or sample of a task with input and output data.
331
332
```python { .api }
333
class TaskRun:
334
"""
335
Single execution/sample of a task.
336
337
Properties:
338
- input (str): Input data for the run (JSON string or plaintext)
339
- output (TaskOutput): Output from the run
340
- input_source (DataSource | None): Metadata about input data origin
341
- tags (list[str]): Tags for categorization and filtering
342
- prompt_id (str | None): Associated prompt identifier
343
- id (str): Unique run identifier
344
- parent (Task): Parent task
345
- path (str): File system path to run directory
346
"""
347
348
@staticmethod
349
def load_from_file(path: str) -> 'TaskRun':
350
"""
351
Load task run from .kiln file.
352
353
Parameters:
354
- path (str): Path to task_run.kiln file
355
356
Returns:
357
TaskRun instance
358
"""
359
360
def save_to_file(self) -> None:
361
"""Save task run to .kiln file."""
362
363
class TaskOutput:
364
"""
365
Output from a task execution.
366
367
Properties:
368
- output (str): Output data (JSON string or plaintext)
369
- source (DataSource): Source of the output data
370
- rating (TaskOutputRating | None): Quality rating
371
- requirement_ratings (list[RequirementRating]): Ratings for each requirement
372
"""
373
374
class TaskOutputRating:
375
"""
376
Rating for task output quality.
377
378
Properties:
379
- value (int | bool): Rating value (1-5 for five_star, True/False for pass_fail)
380
- type (TaskOutputRatingType): Type of rating (five_star or pass_fail)
381
"""
382
383
class RequirementRating:
384
"""
385
Rating for specific requirement satisfaction.
386
387
Properties:
388
- requirement_id (str): ID of the requirement being rated
389
- passed (bool): Whether requirement was satisfied
390
- reason (str | None): Explanation for the rating
391
"""
392
```
393
394
### Token Usage Tracking
395
396
Usage class tracks token consumption for API calls.
397
398
```python { .api }
399
class Usage:
400
"""
401
Token usage tracking for API calls.
402
403
Properties:
404
- prompt_tokens (int): Number of tokens in the prompt
405
- completion_tokens (int): Number of tokens in the completion
406
- total_tokens (int): Total tokens used (prompt + completion)
407
"""
408
```
409
410
### Data Sources
411
412
Data source metadata tracks the origin and properties of data.
413
414
```python { .api }
415
class DataSource:
416
"""
417
Metadata about data origin.
418
419
Properties:
420
- type (DataSourceType): Type of data source (human or synthetic)
421
- properties (dict): Custom properties like created_by, created_at
422
"""
423
424
class DataSourceType:
425
"""
426
Type of data source.
427
428
Values:
429
- human: Data created by humans
430
- synthetic: Data generated synthetically
431
"""
432
human = "human"
433
synthetic = "synthetic"
434
435
class DataSourceProperty:
436
"""
437
Custom properties for data sources.
438
439
Common properties:
440
- created_by (str): Creator identifier
441
- created_at (str): Creation timestamp
442
"""
443
```
444
445
### Prompts
446
447
Prompt management for saved prompt configurations.
448
449
```python { .api }
450
class Prompt:
451
"""
452
Saved prompt configuration.
453
454
Properties:
455
- id (str): Unique prompt identifier
456
- name (str): Prompt name
457
- content (str): Prompt content/template
458
- parent (Task): Parent task
459
"""
460
461
@staticmethod
462
def load_from_file(path: str) -> 'Prompt':
463
"""
464
Load prompt from .kiln file.
465
466
Parameters:
467
- path (str): Path to prompt.kiln file
468
469
Returns:
470
Prompt instance
471
"""
472
473
def save_to_file(self) -> None:
474
"""Save prompt to .kiln file."""
475
476
class BasePrompt:
477
"""Base interface for prompts."""
478
479
class PromptId:
480
"""
481
Validated prompt identifier type.
482
483
Valid format examples:
484
- "simple"
485
- "few_shot"
486
- "cot"
487
- "saved::prompt_id"
488
- "fine_tune::model_id"
489
"""
490
491
class PromptGenerators:
492
"""
493
Built-in prompt generator types.
494
495
Values:
496
- simple: Simple prompt construction
497
- short: Concise prompt construction
498
- multi_shot: Multiple examples
499
- few_shot: Few-shot learning
500
- cot: Chain-of-thought reasoning
501
- few_shot_cot: Few-shot with chain-of-thought
502
- multi_shot_cot: Multi-shot with chain-of-thought
503
- saved: Use saved/custom prompts
504
- fine_tune: Fine-tune formatted prompts
505
"""
506
simple = "simple"
507
short = "short"
508
multi_shot = "multi_shot"
509
few_shot = "few_shot"
510
cot = "cot"
511
few_shot_cot = "few_shot_cot"
512
multi_shot_cot = "multi_shot_cot"
513
saved = "saved"
514
fine_tune = "fine_tune"
515
516
# List of all prompt generator values
517
prompt_generator_values = [
518
"simple", "short", "multi_shot", "few_shot",
519
"cot", "few_shot_cot", "multi_shot_cot",
520
"saved", "fine_tune"
521
]
522
```
523
524
### Fine-tuning
525
526
Fine-tuning job configuration and status tracking.
527
528
```python { .api }
529
class Finetune:
530
"""
531
Fine-tuning job configuration and tracking.
532
533
Properties:
534
- id (str): Unique identifier
535
- status (FineTuneStatusType): Current job status
536
- model_id (str): Base model identifier
537
- provider (str): Fine-tuning provider name
538
- parent (Task): Parent task
539
"""
540
541
@staticmethod
542
def load_from_file(path: str) -> 'Finetune':
543
"""
544
Load fine-tune from .kiln file.
545
546
Parameters:
547
- path (str): Path to finetune.kiln file
548
549
Returns:
550
Finetune instance
551
"""
552
553
def save_to_file(self) -> None:
554
"""Save fine-tune to .kiln file."""
555
556
def start(self) -> None:
557
"""Start the fine-tuning job."""
558
559
def check_status(self) -> dict:
560
"""
561
Check current status of fine-tuning job.
562
563
Returns:
564
dict: Status information including progress and errors
565
"""
566
```
567
568
### Dataset Management
569
570
Dataset splitting for train/test/validation sets.
571
572
```python { .api }
573
class DatasetSplit:
574
"""
575
Frozen dataset split (train/test/validation).
576
577
Properties:
578
- definition (DatasetSplitDefinition): Split configuration
579
- created_at (str): Timestamp of split creation
580
- id (str): Unique split identifier
581
- parent (Task): Parent task
582
"""
583
584
@staticmethod
585
def load_from_file(path: str) -> 'DatasetSplit':
586
"""
587
Load dataset split from .kiln file.
588
589
Parameters:
590
- path (str): Path to dataset_split.kiln file
591
592
Returns:
593
DatasetSplit instance
594
"""
595
596
def save_to_file(self) -> None:
597
"""Save dataset split to .kiln file."""
598
599
class DatasetSplitDefinition:
600
"""
601
Definition for splitting dataset.
602
603
Properties:
604
- train_ratio (float): Ratio of data for training (0-1)
605
- test_ratio (float): Ratio of data for testing (0-1)
606
- validation_ratio (float): Ratio of data for validation (0-1)
607
608
Note: train_ratio + test_ratio + validation_ratio should equal 1.0
609
"""
610
```
611
612
### External Tools
613
614
MCP (Model Control Protocol) tool server configuration.
615
616
```python { .api }
617
class ExternalToolServer:
618
"""
619
MCP tool server configuration.
620
621
Properties:
622
- name (str): Server name
623
- server_url (str): Server URL endpoint
624
- api_key (str | None): API key for authentication
625
"""
626
```
627
628
### Enumerations
629
630
Core enumerations used throughout the data model.
631
632
```python { .api }
633
class Priority:
634
"""
635
Task requirement priority levels (IntEnum where lower number = higher priority).
636
637
Values:
638
- p0: Priority 0 (highest/critical priority)
639
- p1: Priority 1 (high priority)
640
- p2: Priority 2 (medium priority )
641
- p3: Priority 3 (lower priority)
642
"""
643
p0 = 0
644
p1 = 1
645
p2 = 2
646
p3 = 3
647
648
class TaskOutputRatingType:
649
"""
650
Type of rating system.
651
652
Values:
653
- five_star: 1-5 star rating
654
- pass_fail: Binary pass/fail rating
655
- pass_fail_critical: Critical pass/fail rating
656
- custom: Custom rating type
657
"""
658
five_star = "five_star"
659
pass_fail = "pass_fail"
660
pass_fail_critical = "pass_fail_critical"
661
custom = "custom"
662
663
class StructuredOutputMode:
664
"""
665
Enumeration of supported structured output modes for model API calls.
666
667
Values:
668
- default: Let the adapter decide (legacy, do not use for new use cases)
669
- json_schema: Request JSON using API capabilities for json_schema
670
- function_calling_weak: Weak function calling mode
671
- function_calling: Request JSON using API capabilities for function calling
672
- json_mode: Request JSON using API's JSON mode (valid JSON but no schema validation)
673
- json_instructions: Append instructions to prompt for JSON output (no API capabilities)
674
- json_instruction_and_object: Instructions + json_mode API capabilities
675
- json_custom_instructions: Model outputs JSON with custom system prompt instructions
676
- unknown: Mode not known (on old models), lookup best option at runtime
677
"""
678
default = "default"
679
json_schema = "json_schema"
680
function_calling_weak = "function_calling_weak"
681
function_calling = "function_calling"
682
json_mode = "json_mode"
683
json_instructions = "json_instructions"
684
json_instruction_and_object = "json_instruction_and_object"
685
json_custom_instructions = "json_custom_instructions"
686
unknown = "unknown"
687
688
class FineTuneStatusType:
689
"""
690
Status of fine-tuning job.
691
692
Values:
693
- unknown: Unknown status (server error)
694
- pending: Waiting to start
695
- running: Currently running
696
- completed: Completed successfully
697
- failed: Failed with error
698
"""
699
unknown = "unknown"
700
pending = "pending"
701
running = "running"
702
completed = "completed"
703
failed = "failed"
704
```
705
706
### Text Chunking
707
708
Configuration and data structures for text chunking.
709
710
```python { .api }
711
class ChunkerConfig:
712
"""
713
Configuration for text chunking.
714
715
Properties:
716
- chunker_type (ChunkerType): Type of chunker to use
717
- chunk_size (int): Size of each chunk in characters
718
- chunk_overlap (int): Overlap between chunks in characters
719
"""
720
721
class ChunkerType:
722
"""
723
Available chunker types.
724
725
Values:
726
- fixed_window: Fixed-size window chunking
727
"""
728
fixed_window = "fixed_window"
729
730
class Chunk:
731
"""
732
Single text chunk with metadata.
733
734
Properties:
735
- text (str): Chunk content
736
- start_index (int): Start position in source document
737
- end_index (int): End position in source document
738
- metadata (dict): Additional chunk metadata
739
"""
740
741
class ChunkedDocument:
742
"""
743
Document split into chunks.
744
745
Properties:
746
- chunks (list[Chunk]): List of text chunks
747
- source_document (str): Original document content
748
"""
749
```
750
751
### Embeddings
752
753
Embedding configuration and data structures.
754
755
```python { .api }
756
class EmbeddingConfig:
757
"""
758
Configuration for embeddings.
759
760
Properties:
761
- model_id (str): Embedding model identifier
762
- provider (str): Embedding provider name
763
- dimensions (int): Embedding vector dimensions
764
"""
765
766
class Embedding:
767
"""
768
Single embedding vector.
769
770
Properties:
771
- vector (list[float]): Embedding vector values
772
- metadata (dict): Additional embedding metadata
773
"""
774
775
class ChunkEmbeddings:
776
"""
777
Embeddings for document chunks.
778
779
Properties:
780
- embeddings (list[Embedding]): List of embedding vectors
781
- chunk_ids (list[str]): Corresponding chunk identifiers
782
"""
783
```
784
785
### Evaluation Data Models
786
787
Data models for evaluation configurations and results.
788
789
```python { .api }
790
class Eval:
791
"""
792
Evaluation configuration.
793
794
Properties:
795
- id (str): Unique identifier
796
- name (str): Evaluation name
797
- eval_type (str): Type of evaluation
798
- config (EvalConfig): Evaluation configuration
799
- parent (Task): Parent task
800
"""
801
802
@staticmethod
803
def load_from_file(path: str) -> 'Eval':
804
"""
805
Load evaluation from .kiln file.
806
807
Parameters:
808
- path (str): Path to eval.kiln file
809
810
Returns:
811
Eval instance
812
"""
813
814
def save_to_file(self) -> None:
815
"""Save evaluation to .kiln file."""
816
817
class EvalConfig:
818
"""
819
Configuration for specific evaluation type.
820
821
Properties:
822
- type (EvalConfigType): Type of evaluation configuration
823
- parameters (dict): Evaluation-specific parameters
824
"""
825
826
class EvalRun:
827
"""
828
Single evaluation run.
829
830
Properties:
831
- eval_id (str): Evaluation identifier
832
- task_run_id (str): Task run being evaluated
833
- score (EvalOutputScore): Evaluation score
834
- id (str): Unique run identifier
835
"""
836
837
@staticmethod
838
def load_from_file(path: str) -> 'EvalRun':
839
"""
840
Load evaluation run from .kiln file.
841
842
Parameters:
843
- path (str): Path to eval_run.kiln file
844
845
Returns:
846
EvalRun instance
847
"""
848
849
def save_to_file(self) -> None:
850
"""Save evaluation run to .kiln file."""
851
852
class EvalOutputScore:
853
"""
854
Score from evaluation.
855
856
Properties:
857
- value (float | int | bool): Score value
858
- reasoning (str | None): Explanation for the score
859
"""
860
861
class EvalTemplateId:
862
"""
863
Built-in evaluation templates.
864
865
Values:
866
- g_eval: G-Eval assessment
867
- llm_as_judge: LLM-based evaluation
868
"""
869
g_eval = "g_eval"
870
llm_as_judge = "llm_as_judge"
871
872
class EvalConfigType:
873
"""
874
Types of evaluation configs.
875
876
Values:
877
- g_eval: G-Eval configuration
878
- custom: Custom evaluation configuration
879
"""
880
g_eval = "g_eval"
881
custom = "custom"
882
```
883
884
### Document Extraction
885
886
Data models for document extraction and processing.
887
888
```python { .api }
889
class Document:
890
"""
891
Document with extracted content.
892
893
Properties:
894
- id (str): Unique identifier
895
- content (str): Extracted content
896
- metadata (dict): Document metadata
897
- kind (Kind): Type of document
898
"""
899
900
@staticmethod
901
def load_from_file(path: str) -> 'Document':
902
"""
903
Load document from .kiln file.
904
905
Parameters:
906
- path (str): Path to document.kiln file
907
908
Returns:
909
Document instance
910
"""
911
912
def save_to_file(self) -> None:
913
"""Save document to .kiln file."""
914
915
class Extraction:
916
"""
917
Result of document extraction.
918
919
Properties:
920
- document (Document): Extracted document
921
- extractor_config (ExtractorConfig): Configuration used for extraction
922
"""
923
924
class ExtractorConfig:
925
"""
926
Configuration for document extraction.
927
928
Properties:
929
- extractor_type (ExtractorType): Type of extractor
930
- options (dict): Extractor-specific options
931
"""
932
933
class FileInfo:
934
"""
935
Metadata about source file.
936
937
Properties:
938
- filename (str): Name of file
939
- path (str): File system path
940
- size (int): File size in bytes
941
- mime_type (str): MIME type
942
"""
943
944
class Kind:
945
"""
946
Type of document.
947
948
Values:
949
- text: Plain text document
950
- pdf: PDF document
951
- image: Image file
952
- html: HTML document
953
"""
954
text = "text"
955
pdf = "pdf"
956
image = "image"
957
html = "html"
958
959
class OutputFormat:
960
"""
961
Format for extracted output.
962
963
Values:
964
- markdown: Markdown format
965
- plain_text: Plain text format
966
- structured: Structured data format
967
"""
968
markdown = "markdown"
969
plain_text = "plain_text"
970
structured = "structured"
971
972
class ExtractorType:
973
"""
974
Type of extractor to use.
975
976
Values:
977
- litellm: LiteLLM-based extraction
978
- custom: Custom extractor
979
"""
980
litellm = "litellm"
981
custom = "custom"
982
983
class ExtractionSource:
984
"""
985
Source type for extraction.
986
987
Values:
988
- file: Extract from file
989
- url: Extract from URL
990
- text: Extract from text
991
"""
992
file = "file"
993
url = "url"
994
text = "text"
995
```
996
997
### RAG Configuration
998
999
Configuration for Retrieval-Augmented Generation.
1000
1001
```python { .api }
1002
class RagConfig:
1003
"""
1004
Configuration for RAG (Retrieval-Augmented Generation).
1005
1006
Properties:
1007
- vector_store_config (VectorStoreConfig): Vector database configuration
1008
- embedding_config (EmbeddingConfig): Embedding model configuration
1009
- chunker_config (ChunkerConfig): Text chunking configuration
1010
- top_k (int): Number of results to retrieve
1011
"""
1012
```
1013
1014
### Vector Store Configuration
1015
1016
Configuration for vector database integration.
1017
1018
```python { .api }
1019
class VectorStoreConfig:
1020
"""
1021
Configuration for vector database.
1022
1023
Properties:
1024
- vector_store_type (VectorStoreType): Type of vector store
1025
- connection_params (dict): Connection parameters
1026
"""
1027
1028
class VectorStoreType:
1029
"""
1030
Type of vector store.
1031
1032
Values:
1033
- lancedb: LanceDB vector database
1034
"""
1035
lancedb = "lancedb"
1036
1037
class LanceDBConfigBaseProperties:
1038
"""
1039
LanceDB-specific configuration.
1040
1041
Properties:
1042
- uri (str): Database URI
1043
- table_name (str): Table name for storage
1044
"""
1045
```
1046
1047
### Strict Mode
1048
1049
Validation mode control for data models.
1050
1051
```python { .api }
1052
def strict_mode() -> bool:
1053
"""
1054
Get current strict validation mode status.
1055
1056
Returns:
1057
bool: True if strict mode is enabled, False otherwise
1058
"""
1059
1060
def set_strict_mode(enabled: bool) -> None:
1061
"""
1062
Enable or disable strict validation mode.
1063
1064
Parameters:
1065
- enabled (bool): True to enable strict mode, False to disable
1066
"""
1067
```
1068
1069
## Usage Example
1070
1071
```python
1072
from kiln_ai.datamodel import (
1073
Project, Task, TaskRun, TaskOutput, TaskOutputRating,
1074
DataSource, DataSourceType, Priority, TaskRequirement
1075
)
1076
1077
# Create a new task
1078
task = Task(
1079
name="sentiment_analysis",
1080
description="Analyze sentiment of text",
1081
instruction="Classify the sentiment as positive, negative, or neutral."
1082
)
1083
1084
# Add a requirement
1085
requirement = TaskRequirement(
1086
name="valid_sentiment",
1087
instruction="Output must be one of: positive, negative, neutral",
1088
priority=Priority.p0 # p0 = highest priority
1089
)
1090
task.requirements.append(requirement)
1091
1092
# Save task
1093
task.save_to_file()
1094
1095
# Create a task run with data source
1096
run = TaskRun(
1097
parent=task,
1098
input="This product is amazing!",
1099
input_source=DataSource(
1100
type=DataSourceType.human,
1101
properties={"created_by": "annotator@example.com"}
1102
),
1103
output=TaskOutput(
1104
output="positive",
1105
source=DataSource(
1106
type=DataSourceType.human,
1107
properties={"created_by": "annotator@example.com"}
1108
),
1109
rating=TaskOutputRating(value=5, type="five_star")
1110
),
1111
tags=["training", "verified"]
1112
)
1113
1114
# Save run
1115
run.save_to_file()
1116
1117
# Load and work with the data
1118
loaded_task = Task.load_from_file(task.path)
1119
all_runs = loaded_task.runs()
1120
print(f"Task has {len(all_runs)} runs")
1121
1122
# Filter runs by tag
1123
training_runs = [r for r in all_runs if "training" in r.tags]
1124
```
1125