or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mddatamodel.mdevaluation.mdfine-tuning.mdindex.mdmodels.mdprompts.mdrag-embeddings.mdtask-execution.mdtools.md

datamodel.mddocs/

0

# Data Models

1

2

Core data models for projects, tasks, runs, and configurations. These models represent the fundamental structures for organizing AI work in Kiln, including project management, task definitions, execution tracking, and data source metadata.

3

4

## Capabilities

5

6

### Project Management

7

8

Project class represents a Kiln project containing related tasks, organized in a file-based directory structure.

9

10

```python { .api }

11

class Project:

12

"""

13

Represents a Kiln project containing related tasks.

14

15

Properties:

16

- name (str): Project name

17

- description (str): Project description

18

- path (str): File system path to project directory

19

- id (str): Unique project identifier

20

"""

21

22

def tasks(self) -> list:

23

"""

24

Get all tasks in the project.

25

26

Returns:

27

list: List of Task instances

28

"""

29

30

def documents(self, readonly: bool = False) -> list:

31

"""

32

Get all documents in the project.

33

34

Parameters:

35

- readonly (bool): Whether to load in read-only mode

36

37

Returns:

38

list: List of Document instances

39

"""

40

41

def extractor_configs(self, readonly: bool = False) -> list:

42

"""

43

Get all extractor configurations in the project.

44

45

Parameters:

46

- readonly (bool): Whether to load in read-only mode

47

48

Returns:

49

list: List of ExtractorConfig instances

50

"""

51

52

def chunker_configs(self, readonly: bool = False) -> list:

53

"""

54

Get all chunker configurations in the project.

55

56

Parameters:

57

- readonly (bool): Whether to load in read-only mode

58

59

Returns:

60

list: List of ChunkerConfig instances

61

"""

62

63

def embedding_configs(self, readonly: bool = False) -> list:

64

"""

65

Get all embedding configurations in the project.

66

67

Parameters:

68

- readonly (bool): Whether to load in read-only mode

69

70

Returns:

71

list: List of EmbeddingConfig instances

72

"""

73

74

def rag_configs(self, readonly: bool = False) -> list:

75

"""

76

Get all RAG configurations in the project.

77

78

Parameters:

79

- readonly (bool): Whether to load in read-only mode

80

81

Returns:

82

list: List of RagConfig instances

83

"""

84

85

def vector_store_configs(self, readonly: bool = False) -> list:

86

"""

87

Get all vector store configurations in the project.

88

89

Parameters:

90

- readonly (bool): Whether to load in read-only mode

91

92

Returns:

93

list: List of VectorStoreConfig instances

94

"""

95

96

def external_tool_servers(self, readonly: bool = False) -> list:

97

"""

98

Get all external tool servers in the project.

99

100

Parameters:

101

- readonly (bool): Whether to load in read-only mode

102

103

Returns:

104

list: List of ExternalToolServer instances

105

"""

106

107

@staticmethod

108

def load_from_file(path: str) -> 'Project':

109

"""

110

Load project from .kiln file.

111

112

Parameters:

113

- path (str): Path to project.kiln file

114

115

Returns:

116

Project instance

117

"""

118

119

def save_to_file(self) -> None:

120

"""Save project to .kiln file."

121

```

122

123

### Task Definition

124

125

Task class defines an AI task with instructions, schemas, and requirements.

126

127

```python { .api }

128

class Task:

129

"""

130

Represents an AI task with instructions and schemas.

131

132

Properties:

133

- name (str): Task name

134

- description (str | None): Task description

135

- instruction (str): Instructions for completing the task

136

- input_json_schema (str | None): JSON schema for validating inputs

137

- output_json_schema (str | None): JSON schema for validating outputs

138

- requirements (list[TaskRequirement]): Requirements that outputs must satisfy (default: [])

139

- thinking_instruction (str | None): Instructions for model thinking/reasoning before answering

140

- default_run_config_id (str | None): ID of default run config to use for this task

141

- path (str): File system path to task directory

142

- id (str): Unique task identifier

143

- parent (Project | None): Parent project

144

"""

145

146

def runs(self, readonly: bool = False) -> list[TaskRun]:

147

"""

148

Get all runs for this task.

149

150

Parameters:

151

- readonly (bool): Whether to load in read-only mode

152

153

Returns:

154

list[TaskRun]: List of task run instances

155

"""

156

157

def dataset_splits(self, readonly: bool = False) -> list:

158

"""

159

Get all dataset splits for this task.

160

161

Parameters:

162

- readonly (bool): Whether to load in read-only mode

163

164

Returns:

165

list[DatasetSplit]: List of dataset split instances

166

"""

167

168

def finetunes(self, readonly: bool = False) -> list:

169

"""

170

Get all fine-tunes for this task.

171

172

Parameters:

173

- readonly (bool): Whether to load in read-only mode

174

175

Returns:

176

list[Finetune]: List of fine-tune instances

177

"""

178

179

def prompts(self, readonly: bool = False) -> list:

180

"""

181

Get all prompts for this task.

182

183

Parameters:

184

- readonly (bool): Whether to load in read-only mode

185

186

Returns:

187

list[Prompt]: List of prompt instances

188

"""

189

190

def evals(self, readonly: bool = False) -> list:

191

"""

192

Get all evaluations for this task.

193

194

Parameters:

195

- readonly (bool): Whether to load in read-only mode

196

197

Returns:

198

list[Eval]: List of evaluation instances

199

"""

200

201

def run_configs(self, readonly: bool = False) -> list:

202

"""

203

Get all run configurations for this task.

204

205

Parameters:

206

- readonly (bool): Whether to load in read-only mode

207

208

Returns:

209

list[TaskRunConfig]: List of run config instances

210

"""

211

212

def output_schema(self) -> dict | None:

213

"""

214

Get parsed output JSON schema as dictionary.

215

216

Returns:

217

dict | None: Parsed schema or None if not set

218

"""

219

220

def input_schema(self) -> dict | None:

221

"""

222

Get parsed input JSON schema as dictionary.

223

224

Returns:

225

dict | None: Parsed schema or None if not set

226

"""

227

228

def parent_project(self) -> 'Project' | None:

229

"""

230

Get parent project of this task.

231

232

Returns:

233

Project | None: Parent project or None

234

"""

235

236

@staticmethod

237

def load_from_file(path: str) -> 'Task':

238

"""

239

Load task from .kiln file.

240

241

Parameters:

242

- path (str): Path to task.kiln file

243

244

Returns:

245

Task instance

246

"""

247

248

def save_to_file(self) -> None:

249

"""Save task to .kiln file."""

250

251

class TaskRequirement:

252

"""

253

Requirements that task outputs must satisfy.

254

255

Properties:

256

- id (str): Unique requirement identifier

257

- name (str): Requirement name

258

- description (str | None): Optional description

259

- instruction (str): Instructions for meeting the requirement

260

- priority (Priority): Requirement priority level (p0-p3, default: p2)

261

- type (TaskOutputRatingType): Type of rating (five_star, pass_fail, pass_fail_critical, custom; default: five_star)

262

"""

263

```

264

265

### Task Run Configuration

266

267

TaskRunConfig defines a complete configuration for running a task, including model, provider, prompt, and parameters.

268

269

```python { .api }

270

class TaskRunConfig:

271

"""

272

Configuration for running a task (persisted in Kiln Project under a task).

273

274

A run config includes everything needed to run a task except the input. Running the same

275

RunConfig with the same input should make identical calls to the model (output may vary

276

as models are non-deterministic).

277

278

Properties:

279

- name (str): Run config name

280

- description (str | None): Optional description

281

- run_config_properties (RunConfigProperties): Complete run configuration properties

282

- prompt (BasePrompt | None): Frozen prompt to use (for consistency with dynamic prompts)

283

- id (str): Unique identifier

284

- parent (Task): Parent task

285

"""

286

287

def parent_task(self) -> 'Task' | None:

288

"""

289

Get parent task of this run config.

290

291

Returns:

292

Task | None: Parent task or None

293

"""

294

295

@staticmethod

296

def load_from_file(path: str) -> 'TaskRunConfig':

297

"""

298

Load run config from .kiln file.

299

300

Parameters:

301

- path (str): Path to run_config.kiln file

302

303

Returns:

304

TaskRunConfig instance

305

"""

306

307

def save_to_file(self) -> None:

308

"""Save run config to .kiln file."""

309

310

class RunConfigProperties:

311

"""

312

Properties defining how to run a task (model, provider, parameters, etc.).

313

314

Running the same RunConfigProperties with the same input should make identical

315

calls to the model (output may vary as models are non-deterministic).

316

317

Properties:

318

- model_name (str): Model identifier to use

319

- model_provider_name (ModelProviderName): Provider to use

320

- prompt_id (PromptId): Prompt type to use (defaults to simple if not provided)

321

- top_p (float): Top-p sampling parameter (0-1, default: 1.0)

322

- temperature (float): Temperature sampling parameter (0-2, default: 1.0)

323

- structured_output_mode (StructuredOutputMode): How to handle structured JSON output

324

- tools_config (ToolsRunConfig | None): Tools available to the model

325

"""

326

```

327

328

### Task Execution

329

330

TaskRun represents a single execution or sample of a task with input and output data.

331

332

```python { .api }

333

class TaskRun:

334

"""

335

Single execution/sample of a task.

336

337

Properties:

338

- input (str): Input data for the run (JSON string or plaintext)

339

- output (TaskOutput): Output from the run

340

- input_source (DataSource | None): Metadata about input data origin

341

- tags (list[str]): Tags for categorization and filtering

342

- prompt_id (str | None): Associated prompt identifier

343

- id (str): Unique run identifier

344

- parent (Task): Parent task

345

- path (str): File system path to run directory

346

"""

347

348

@staticmethod

349

def load_from_file(path: str) -> 'TaskRun':

350

"""

351

Load task run from .kiln file.

352

353

Parameters:

354

- path (str): Path to task_run.kiln file

355

356

Returns:

357

TaskRun instance

358

"""

359

360

def save_to_file(self) -> None:

361

"""Save task run to .kiln file."""

362

363

class TaskOutput:

364

"""

365

Output from a task execution.

366

367

Properties:

368

- output (str): Output data (JSON string or plaintext)

369

- source (DataSource): Source of the output data

370

- rating (TaskOutputRating | None): Quality rating

371

- requirement_ratings (list[RequirementRating]): Ratings for each requirement

372

"""

373

374

class TaskOutputRating:

375

"""

376

Rating for task output quality.

377

378

Properties:

379

- value (int | bool): Rating value (1-5 for five_star, True/False for pass_fail)

380

- type (TaskOutputRatingType): Type of rating (five_star or pass_fail)

381

"""

382

383

class RequirementRating:

384

"""

385

Rating for specific requirement satisfaction.

386

387

Properties:

388

- requirement_id (str): ID of the requirement being rated

389

- passed (bool): Whether requirement was satisfied

390

- reason (str | None): Explanation for the rating

391

"""

392

```

393

394

### Token Usage Tracking

395

396

Usage class tracks token consumption for API calls.

397

398

```python { .api }

399

class Usage:

400

"""

401

Token usage tracking for API calls.

402

403

Properties:

404

- prompt_tokens (int): Number of tokens in the prompt

405

- completion_tokens (int): Number of tokens in the completion

406

- total_tokens (int): Total tokens used (prompt + completion)

407

"""

408

```

409

410

### Data Sources

411

412

Data source metadata tracks the origin and properties of data.

413

414

```python { .api }

415

class DataSource:

416

"""

417

Metadata about data origin.

418

419

Properties:

420

- type (DataSourceType): Type of data source (human or synthetic)

421

- properties (dict): Custom properties like created_by, created_at

422

"""

423

424

class DataSourceType:

425

"""

426

Type of data source.

427

428

Values:

429

- human: Data created by humans

430

- synthetic: Data generated synthetically

431

"""

432

human = "human"

433

synthetic = "synthetic"

434

435

class DataSourceProperty:

436

"""

437

Custom properties for data sources.

438

439

Common properties:

440

- created_by (str): Creator identifier

441

- created_at (str): Creation timestamp

442

"""

443

```

444

445

### Prompts

446

447

Prompt management for saved prompt configurations.

448

449

```python { .api }

450

class Prompt:

451

"""

452

Saved prompt configuration.

453

454

Properties:

455

- id (str): Unique prompt identifier

456

- name (str): Prompt name

457

- content (str): Prompt content/template

458

- parent (Task): Parent task

459

"""

460

461

@staticmethod

462

def load_from_file(path: str) -> 'Prompt':

463

"""

464

Load prompt from .kiln file.

465

466

Parameters:

467

- path (str): Path to prompt.kiln file

468

469

Returns:

470

Prompt instance

471

"""

472

473

def save_to_file(self) -> None:

474

"""Save prompt to .kiln file."""

475

476

class BasePrompt:

477

"""Base interface for prompts."""

478

479

class PromptId:

480

"""

481

Validated prompt identifier type.

482

483

Valid format examples:

484

- "simple"

485

- "few_shot"

486

- "cot"

487

- "saved::prompt_id"

488

- "fine_tune::model_id"

489

"""

490

491

class PromptGenerators:

492

"""

493

Built-in prompt generator types.

494

495

Values:

496

- simple: Simple prompt construction

497

- short: Concise prompt construction

498

- multi_shot: Multiple examples

499

- few_shot: Few-shot learning

500

- cot: Chain-of-thought reasoning

501

- few_shot_cot: Few-shot with chain-of-thought

502

- multi_shot_cot: Multi-shot with chain-of-thought

503

- saved: Use saved/custom prompts

504

- fine_tune: Fine-tune formatted prompts

505

"""

506

simple = "simple"

507

short = "short"

508

multi_shot = "multi_shot"

509

few_shot = "few_shot"

510

cot = "cot"

511

few_shot_cot = "few_shot_cot"

512

multi_shot_cot = "multi_shot_cot"

513

saved = "saved"

514

fine_tune = "fine_tune"

515

516

# List of all prompt generator values

517

prompt_generator_values = [

518

"simple", "short", "multi_shot", "few_shot",

519

"cot", "few_shot_cot", "multi_shot_cot",

520

"saved", "fine_tune"

521

]

522

```

523

524

### Fine-tuning

525

526

Fine-tuning job configuration and status tracking.

527

528

```python { .api }

529

class Finetune:

530

"""

531

Fine-tuning job configuration and tracking.

532

533

Properties:

534

- id (str): Unique identifier

535

- status (FineTuneStatusType): Current job status

536

- model_id (str): Base model identifier

537

- provider (str): Fine-tuning provider name

538

- parent (Task): Parent task

539

"""

540

541

@staticmethod

542

def load_from_file(path: str) -> 'Finetune':

543

"""

544

Load fine-tune from .kiln file.

545

546

Parameters:

547

- path (str): Path to finetune.kiln file

548

549

Returns:

550

Finetune instance

551

"""

552

553

def save_to_file(self) -> None:

554

"""Save fine-tune to .kiln file."""

555

556

def start(self) -> None:

557

"""Start the fine-tuning job."""

558

559

def check_status(self) -> dict:

560

"""

561

Check current status of fine-tuning job.

562

563

Returns:

564

dict: Status information including progress and errors

565

"""

566

```

567

568

### Dataset Management

569

570

Dataset splitting for train/test/validation sets.

571

572

```python { .api }

573

class DatasetSplit:

574

"""

575

Frozen dataset split (train/test/validation).

576

577

Properties:

578

- definition (DatasetSplitDefinition): Split configuration

579

- created_at (str): Timestamp of split creation

580

- id (str): Unique split identifier

581

- parent (Task): Parent task

582

"""

583

584

@staticmethod

585

def load_from_file(path: str) -> 'DatasetSplit':

586

"""

587

Load dataset split from .kiln file.

588

589

Parameters:

590

- path (str): Path to dataset_split.kiln file

591

592

Returns:

593

DatasetSplit instance

594

"""

595

596

def save_to_file(self) -> None:

597

"""Save dataset split to .kiln file."""

598

599

class DatasetSplitDefinition:

600

"""

601

Definition for splitting dataset.

602

603

Properties:

604

- train_ratio (float): Ratio of data for training (0-1)

605

- test_ratio (float): Ratio of data for testing (0-1)

606

- validation_ratio (float): Ratio of data for validation (0-1)

607

608

Note: train_ratio + test_ratio + validation_ratio should equal 1.0

609

"""

610

```

611

612

### External Tools

613

614

MCP (Model Control Protocol) tool server configuration.

615

616

```python { .api }

617

class ExternalToolServer:

618

"""

619

MCP tool server configuration.

620

621

Properties:

622

- name (str): Server name

623

- server_url (str): Server URL endpoint

624

- api_key (str | None): API key for authentication

625

"""

626

```

627

628

### Enumerations

629

630

Core enumerations used throughout the data model.

631

632

```python { .api }

633

class Priority:

634

"""

635

Task requirement priority levels (IntEnum where lower number = higher priority).

636

637

Values:

638

- p0: Priority 0 (highest/critical priority)

639

- p1: Priority 1 (high priority)

640

- p2: Priority 2 (medium priority )

641

- p3: Priority 3 (lower priority)

642

"""

643

p0 = 0

644

p1 = 1

645

p2 = 2

646

p3 = 3

647

648

class TaskOutputRatingType:

649

"""

650

Type of rating system.

651

652

Values:

653

- five_star: 1-5 star rating

654

- pass_fail: Binary pass/fail rating

655

- pass_fail_critical: Critical pass/fail rating

656

- custom: Custom rating type

657

"""

658

five_star = "five_star"

659

pass_fail = "pass_fail"

660

pass_fail_critical = "pass_fail_critical"

661

custom = "custom"

662

663

class StructuredOutputMode:

664

"""

665

Enumeration of supported structured output modes for model API calls.

666

667

Values:

668

- default: Let the adapter decide (legacy, do not use for new use cases)

669

- json_schema: Request JSON using API capabilities for json_schema

670

- function_calling_weak: Weak function calling mode

671

- function_calling: Request JSON using API capabilities for function calling

672

- json_mode: Request JSON using API's JSON mode (valid JSON but no schema validation)

673

- json_instructions: Append instructions to prompt for JSON output (no API capabilities)

674

- json_instruction_and_object: Instructions + json_mode API capabilities

675

- json_custom_instructions: Model outputs JSON with custom system prompt instructions

676

- unknown: Mode not known (on old models), lookup best option at runtime

677

"""

678

default = "default"

679

json_schema = "json_schema"

680

function_calling_weak = "function_calling_weak"

681

function_calling = "function_calling"

682

json_mode = "json_mode"

683

json_instructions = "json_instructions"

684

json_instruction_and_object = "json_instruction_and_object"

685

json_custom_instructions = "json_custom_instructions"

686

unknown = "unknown"

687

688

class FineTuneStatusType:

689

"""

690

Status of fine-tuning job.

691

692

Values:

693

- unknown: Unknown status (server error)

694

- pending: Waiting to start

695

- running: Currently running

696

- completed: Completed successfully

697

- failed: Failed with error

698

"""

699

unknown = "unknown"

700

pending = "pending"

701

running = "running"

702

completed = "completed"

703

failed = "failed"

704

```

705

706

### Text Chunking

707

708

Configuration and data structures for text chunking.

709

710

```python { .api }

711

class ChunkerConfig:

712

"""

713

Configuration for text chunking.

714

715

Properties:

716

- chunker_type (ChunkerType): Type of chunker to use

717

- chunk_size (int): Size of each chunk in characters

718

- chunk_overlap (int): Overlap between chunks in characters

719

"""

720

721

class ChunkerType:

722

"""

723

Available chunker types.

724

725

Values:

726

- fixed_window: Fixed-size window chunking

727

"""

728

fixed_window = "fixed_window"

729

730

class Chunk:

731

"""

732

Single text chunk with metadata.

733

734

Properties:

735

- text (str): Chunk content

736

- start_index (int): Start position in source document

737

- end_index (int): End position in source document

738

- metadata (dict): Additional chunk metadata

739

"""

740

741

class ChunkedDocument:

742

"""

743

Document split into chunks.

744

745

Properties:

746

- chunks (list[Chunk]): List of text chunks

747

- source_document (str): Original document content

748

"""

749

```

750

751

### Embeddings

752

753

Embedding configuration and data structures.

754

755

```python { .api }

756

class EmbeddingConfig:

757

"""

758

Configuration for embeddings.

759

760

Properties:

761

- model_id (str): Embedding model identifier

762

- provider (str): Embedding provider name

763

- dimensions (int): Embedding vector dimensions

764

"""

765

766

class Embedding:

767

"""

768

Single embedding vector.

769

770

Properties:

771

- vector (list[float]): Embedding vector values

772

- metadata (dict): Additional embedding metadata

773

"""

774

775

class ChunkEmbeddings:

776

"""

777

Embeddings for document chunks.

778

779

Properties:

780

- embeddings (list[Embedding]): List of embedding vectors

781

- chunk_ids (list[str]): Corresponding chunk identifiers

782

"""

783

```

784

785

### Evaluation Data Models

786

787

Data models for evaluation configurations and results.

788

789

```python { .api }

790

class Eval:

791

"""

792

Evaluation configuration.

793

794

Properties:

795

- id (str): Unique identifier

796

- name (str): Evaluation name

797

- eval_type (str): Type of evaluation

798

- config (EvalConfig): Evaluation configuration

799

- parent (Task): Parent task

800

"""

801

802

@staticmethod

803

def load_from_file(path: str) -> 'Eval':

804

"""

805

Load evaluation from .kiln file.

806

807

Parameters:

808

- path (str): Path to eval.kiln file

809

810

Returns:

811

Eval instance

812

"""

813

814

def save_to_file(self) -> None:

815

"""Save evaluation to .kiln file."""

816

817

class EvalConfig:

818

"""

819

Configuration for specific evaluation type.

820

821

Properties:

822

- type (EvalConfigType): Type of evaluation configuration

823

- parameters (dict): Evaluation-specific parameters

824

"""

825

826

class EvalRun:

827

"""

828

Single evaluation run.

829

830

Properties:

831

- eval_id (str): Evaluation identifier

832

- task_run_id (str): Task run being evaluated

833

- score (EvalOutputScore): Evaluation score

834

- id (str): Unique run identifier

835

"""

836

837

@staticmethod

838

def load_from_file(path: str) -> 'EvalRun':

839

"""

840

Load evaluation run from .kiln file.

841

842

Parameters:

843

- path (str): Path to eval_run.kiln file

844

845

Returns:

846

EvalRun instance

847

"""

848

849

def save_to_file(self) -> None:

850

"""Save evaluation run to .kiln file."""

851

852

class EvalOutputScore:

853

"""

854

Score from evaluation.

855

856

Properties:

857

- value (float | int | bool): Score value

858

- reasoning (str | None): Explanation for the score

859

"""

860

861

class EvalTemplateId:

862

"""

863

Built-in evaluation templates.

864

865

Values:

866

- g_eval: G-Eval assessment

867

- llm_as_judge: LLM-based evaluation

868

"""

869

g_eval = "g_eval"

870

llm_as_judge = "llm_as_judge"

871

872

class EvalConfigType:

873

"""

874

Types of evaluation configs.

875

876

Values:

877

- g_eval: G-Eval configuration

878

- custom: Custom evaluation configuration

879

"""

880

g_eval = "g_eval"

881

custom = "custom"

882

```

883

884

### Document Extraction

885

886

Data models for document extraction and processing.

887

888

```python { .api }

889

class Document:

890

"""

891

Document with extracted content.

892

893

Properties:

894

- id (str): Unique identifier

895

- content (str): Extracted content

896

- metadata (dict): Document metadata

897

- kind (Kind): Type of document

898

"""

899

900

@staticmethod

901

def load_from_file(path: str) -> 'Document':

902

"""

903

Load document from .kiln file.

904

905

Parameters:

906

- path (str): Path to document.kiln file

907

908

Returns:

909

Document instance

910

"""

911

912

def save_to_file(self) -> None:

913

"""Save document to .kiln file."""

914

915

class Extraction:

916

"""

917

Result of document extraction.

918

919

Properties:

920

- document (Document): Extracted document

921

- extractor_config (ExtractorConfig): Configuration used for extraction

922

"""

923

924

class ExtractorConfig:

925

"""

926

Configuration for document extraction.

927

928

Properties:

929

- extractor_type (ExtractorType): Type of extractor

930

- options (dict): Extractor-specific options

931

"""

932

933

class FileInfo:

934

"""

935

Metadata about source file.

936

937

Properties:

938

- filename (str): Name of file

939

- path (str): File system path

940

- size (int): File size in bytes

941

- mime_type (str): MIME type

942

"""

943

944

class Kind:

945

"""

946

Type of document.

947

948

Values:

949

- text: Plain text document

950

- pdf: PDF document

951

- image: Image file

952

- html: HTML document

953

"""

954

text = "text"

955

pdf = "pdf"

956

image = "image"

957

html = "html"

958

959

class OutputFormat:

960

"""

961

Format for extracted output.

962

963

Values:

964

- markdown: Markdown format

965

- plain_text: Plain text format

966

- structured: Structured data format

967

"""

968

markdown = "markdown"

969

plain_text = "plain_text"

970

structured = "structured"

971

972

class ExtractorType:

973

"""

974

Type of extractor to use.

975

976

Values:

977

- litellm: LiteLLM-based extraction

978

- custom: Custom extractor

979

"""

980

litellm = "litellm"

981

custom = "custom"

982

983

class ExtractionSource:

984

"""

985

Source type for extraction.

986

987

Values:

988

- file: Extract from file

989

- url: Extract from URL

990

- text: Extract from text

991

"""

992

file = "file"

993

url = "url"

994

text = "text"

995

```

996

997

### RAG Configuration

998

999

Configuration for Retrieval-Augmented Generation.

1000

1001

```python { .api }

1002

class RagConfig:

1003

"""

1004

Configuration for RAG (Retrieval-Augmented Generation).

1005

1006

Properties:

1007

- vector_store_config (VectorStoreConfig): Vector database configuration

1008

- embedding_config (EmbeddingConfig): Embedding model configuration

1009

- chunker_config (ChunkerConfig): Text chunking configuration

1010

- top_k (int): Number of results to retrieve

1011

"""

1012

```

1013

1014

### Vector Store Configuration

1015

1016

Configuration for vector database integration.

1017

1018

```python { .api }

1019

class VectorStoreConfig:

1020

"""

1021

Configuration for vector database.

1022

1023

Properties:

1024

- vector_store_type (VectorStoreType): Type of vector store

1025

- connection_params (dict): Connection parameters

1026

"""

1027

1028

class VectorStoreType:

1029

"""

1030

Type of vector store.

1031

1032

Values:

1033

- lancedb: LanceDB vector database

1034

"""

1035

lancedb = "lancedb"

1036

1037

class LanceDBConfigBaseProperties:

1038

"""

1039

LanceDB-specific configuration.

1040

1041

Properties:

1042

- uri (str): Database URI

1043

- table_name (str): Table name for storage

1044

"""

1045

```

1046

1047

### Strict Mode

1048

1049

Validation mode control for data models.

1050

1051

```python { .api }

1052

def strict_mode() -> bool:

1053

"""

1054

Get current strict validation mode status.

1055

1056

Returns:

1057

bool: True if strict mode is enabled, False otherwise

1058

"""

1059

1060

def set_strict_mode(enabled: bool) -> None:

1061

"""

1062

Enable or disable strict validation mode.

1063

1064

Parameters:

1065

- enabled (bool): True to enable strict mode, False to disable

1066

"""

1067

```

1068

1069

## Usage Example

1070

1071

```python

1072

from kiln_ai.datamodel import (

1073

Project, Task, TaskRun, TaskOutput, TaskOutputRating,

1074

DataSource, DataSourceType, Priority, TaskRequirement

1075

)

1076

1077

# Create a new task

1078

task = Task(

1079

name="sentiment_analysis",

1080

description="Analyze sentiment of text",

1081

instruction="Classify the sentiment as positive, negative, or neutral."

1082

)

1083

1084

# Add a requirement

1085

requirement = TaskRequirement(

1086

name="valid_sentiment",

1087

instruction="Output must be one of: positive, negative, neutral",

1088

priority=Priority.p0 # p0 = highest priority

1089

)

1090

task.requirements.append(requirement)

1091

1092

# Save task

1093

task.save_to_file()

1094

1095

# Create a task run with data source

1096

run = TaskRun(

1097

parent=task,

1098

input="This product is amazing!",

1099

input_source=DataSource(

1100

type=DataSourceType.human,

1101

properties={"created_by": "annotator@example.com"}

1102

),

1103

output=TaskOutput(

1104

output="positive",

1105

source=DataSource(

1106

type=DataSourceType.human,

1107

properties={"created_by": "annotator@example.com"}

1108

),

1109

rating=TaskOutputRating(value=5, type="five_star")

1110

),

1111

tags=["training", "verified"]

1112

)

1113

1114

# Save run

1115

run.save_to_file()

1116

1117

# Load and work with the data

1118

loaded_task = Task.load_from_file(task.path)

1119

all_runs = loaded_task.runs()

1120

print(f"Task has {len(all_runs)} runs")

1121

1122

# Filter runs by tag

1123

training_runs = [r for r in all_runs if "training" in r.tags]

1124

```

1125