tessl/pypi-kiln-ai

Kiln AI is a comprehensive platform for building, evaluating, and deploying AI systems with dataset management, model fine-tuning, RAG, and evaluation capabilities.

Overview

Eval results

Files

Data Models

Name: tessl/pypi-kiln-ai
Author: tessl

Core data models for projects, tasks, runs, and configurations. These models represent the fundamental structures for organizing AI work in Kiln, including project management, task definitions, execution tracking, and data source metadata.

Capabilities

Project Management

Project class represents a Kiln project containing related tasks, organized in a file-based directory structure.

class Project:
    """
    Represents a Kiln project containing related tasks.

    Properties:
    - name (str): Project name
    - description (str): Project description
    - path (str): File system path to project directory
    - id (str): Unique project identifier
    """

    def tasks(self) -> list:
        """
        Get all tasks in the project.

        Returns:
        list: List of Task instances
        """

    def documents(self, readonly: bool = False) -> list:
        """
        Get all documents in the project.

        Parameters:
        - readonly (bool): Whether to load in read-only mode

        Returns:
        list: List of Document instances
        """

    def extractor_configs(self, readonly: bool = False) -> list:
        """
        Get all extractor configurations in the project.

        Parameters:
        - readonly (bool): Whether to load in read-only mode

        Returns:
        list: List of ExtractorConfig instances
        """

    def chunker_configs(self, readonly: bool = False) -> list:
        """
        Get all chunker configurations in the project.

        Parameters:
        - readonly (bool): Whether to load in read-only mode

        Returns:
        list: List of ChunkerConfig instances
        """

    def embedding_configs(self, readonly: bool = False) -> list:
        """
        Get all embedding configurations in the project.

        Parameters:
        - readonly (bool): Whether to load in read-only mode

        Returns:
        list: List of EmbeddingConfig instances
        """

    def rag_configs(self, readonly: bool = False) -> list:
        """
        Get all RAG configurations in the project.

        Parameters:
        - readonly (bool): Whether to load in read-only mode

        Returns:
        list: List of RagConfig instances
        """

    def vector_store_configs(self, readonly: bool = False) -> list:
        """
        Get all vector store configurations in the project.

        Parameters:
        - readonly (bool): Whether to load in read-only mode

        Returns:
        list: List of VectorStoreConfig instances
        """

    def external_tool_servers(self, readonly: bool = False) -> list:
        """
        Get all external tool servers in the project.

        Parameters:
        - readonly (bool): Whether to load in read-only mode

        Returns:
        list: List of ExternalToolServer instances
        """

    @staticmethod
    def load_from_file(path: str) -> 'Project':
        """
        Load project from .kiln file.

        Parameters:
        - path (str): Path to project.kiln file

        Returns:
        Project instance
        """

    def save_to_file(self) -> None:
        """Save project to .kiln file."

Task Definition

Task class defines an AI task with instructions, schemas, and requirements.

class Task:
    """
    Represents an AI task with instructions and schemas.

    Properties:
    - name (str): Task name
    - description (str | None): Task description
    - instruction (str): Instructions for completing the task
    - input_json_schema (str | None): JSON schema for validating inputs
    - output_json_schema (str | None): JSON schema for validating outputs
    - requirements (list[TaskRequirement]): Requirements that outputs must satisfy (default: [])
    - thinking_instruction (str | None): Instructions for model thinking/reasoning before answering
    - default_run_config_id (str | None): ID of default run config to use for this task
    - path (str): File system path to task directory
    - id (str): Unique task identifier
    - parent (Project | None): Parent project
    """

    def runs(self, readonly: bool = False) -> list[TaskRun]:
        """
        Get all runs for this task.

        Parameters:
        - readonly (bool): Whether to load in read-only mode

        Returns:
        list[TaskRun]: List of task run instances
        """

    def dataset_splits(self, readonly: bool = False) -> list:
        """
        Get all dataset splits for this task.

        Parameters:
        - readonly (bool): Whether to load in read-only mode

        Returns:
        list[DatasetSplit]: List of dataset split instances
        """

    def finetunes(self, readonly: bool = False) -> list:
        """
        Get all fine-tunes for this task.

        Parameters:
        - readonly (bool): Whether to load in read-only mode

        Returns:
        list[Finetune]: List of fine-tune instances
        """

    def prompts(self, readonly: bool = False) -> list:
        """
        Get all prompts for this task.

        Parameters:
        - readonly (bool): Whether to load in read-only mode

        Returns:
        list[Prompt]: List of prompt instances
        """

    def evals(self, readonly: bool = False) -> list:
        """
        Get all evaluations for this task.

        Parameters:
        - readonly (bool): Whether to load in read-only mode

        Returns:
        list[Eval]: List of evaluation instances
        """

    def run_configs(self, readonly: bool = False) -> list:
        """
        Get all run configurations for this task.

        Parameters:
        - readonly (bool): Whether to load in read-only mode

        Returns:
        list[TaskRunConfig]: List of run config instances
        """

    def output_schema(self) -> dict | None:
        """
        Get parsed output JSON schema as dictionary.

        Returns:
        dict | None: Parsed schema or None if not set
        """

    def input_schema(self) -> dict | None:
        """
        Get parsed input JSON schema as dictionary.

        Returns:
        dict | None: Parsed schema or None if not set
        """

    def parent_project(self) -> 'Project' | None:
        """
        Get parent project of this task.

        Returns:
        Project | None: Parent project or None
        """

    @staticmethod
    def load_from_file(path: str) -> 'Task':
        """
        Load task from .kiln file.

        Parameters:
        - path (str): Path to task.kiln file

        Returns:
        Task instance
        """

    def save_to_file(self) -> None:
        """Save task to .kiln file."""

class TaskRequirement:
    """
    Requirements that task outputs must satisfy.

    Properties:
    - id (str): Unique requirement identifier
    - name (str): Requirement name
    - description (str | None): Optional description
    - instruction (str): Instructions for meeting the requirement
    - priority (Priority): Requirement priority level (p0-p3, default: p2)
    - type (TaskOutputRatingType): Type of rating (five_star, pass_fail, pass_fail_critical, custom; default: five_star)
    """

Task Run Configuration

TaskRunConfig defines a complete configuration for running a task, including model, provider, prompt, and parameters.

class TaskRunConfig:
    """
    Configuration for running a task (persisted in Kiln Project under a task).

    A run config includes everything needed to run a task except the input. Running the same
    RunConfig with the same input should make identical calls to the model (output may vary
    as models are non-deterministic).

    Properties:
    - name (str): Run config name
    - description (str | None): Optional description
    - run_config_properties (RunConfigProperties): Complete run configuration properties
    - prompt (BasePrompt | None): Frozen prompt to use (for consistency with dynamic prompts)
    - id (str): Unique identifier
    - parent (Task): Parent task
    """

    def parent_task(self) -> 'Task' | None:
        """
        Get parent task of this run config.

        Returns:
        Task | None: Parent task or None
        """

    @staticmethod
    def load_from_file(path: str) -> 'TaskRunConfig':
        """
        Load run config from .kiln file.

        Parameters:
        - path (str): Path to run_config.kiln file

        Returns:
        TaskRunConfig instance
        """

    def save_to_file(self) -> None:
        """Save run config to .kiln file."""

class RunConfigProperties:
    """
    Properties defining how to run a task (model, provider, parameters, etc.).

    Running the same RunConfigProperties with the same input should make identical
    calls to the model (output may vary as models are non-deterministic).

    Properties:
    - model_name (str): Model identifier to use
    - model_provider_name (ModelProviderName): Provider to use
    - prompt_id (PromptId): Prompt type to use (defaults to simple if not provided)
    - top_p (float): Top-p sampling parameter (0-1, default: 1.0)
    - temperature (float): Temperature sampling parameter (0-2, default: 1.0)
    - structured_output_mode (StructuredOutputMode): How to handle structured JSON output
    - tools_config (ToolsRunConfig | None): Tools available to the model
    """

Task Execution

TaskRun represents a single execution or sample of a task with input and output data.

class TaskRun:
    """
    Single execution/sample of a task.

    Properties:
    - input (str): Input data for the run (JSON string or plaintext)
    - output (TaskOutput): Output from the run
    - input_source (DataSource | None): Metadata about input data origin
    - tags (list[str]): Tags for categorization and filtering
    - prompt_id (str | None): Associated prompt identifier
    - id (str): Unique run identifier
    - parent (Task): Parent task
    - path (str): File system path to run directory
    """

    @staticmethod
    def load_from_file(path: str) -> 'TaskRun':
        """
        Load task run from .kiln file.

        Parameters:
        - path (str): Path to task_run.kiln file

        Returns:
        TaskRun instance
        """

    def save_to_file(self) -> None:
        """Save task run to .kiln file."""

class TaskOutput:
    """
    Output from a task execution.

    Properties:
    - output (str): Output data (JSON string or plaintext)
    - source (DataSource): Source of the output data
    - rating (TaskOutputRating | None): Quality rating
    - requirement_ratings (list[RequirementRating]): Ratings for each requirement
    """

class TaskOutputRating:
    """
    Rating for task output quality.

    Properties:
    - value (int | bool): Rating value (1-5 for five_star, True/False for pass_fail)
    - type (TaskOutputRatingType): Type of rating (five_star or pass_fail)
    """

class RequirementRating:
    """
    Rating for specific requirement satisfaction.

    Properties:
    - requirement_id (str): ID of the requirement being rated
    - passed (bool): Whether requirement was satisfied
    - reason (str | None): Explanation for the rating
    """

Token Usage Tracking

Usage class tracks token consumption for API calls.

class Usage:
    """
    Token usage tracking for API calls.

    Properties:
    - prompt_tokens (int): Number of tokens in the prompt
    - completion_tokens (int): Number of tokens in the completion
    - total_tokens (int): Total tokens used (prompt + completion)
    """

Data Sources

Data source metadata tracks the origin and properties of data.

class DataSource:
    """
    Metadata about data origin.

    Properties:
    - type (DataSourceType): Type of data source (human or synthetic)
    - properties (dict): Custom properties like created_by, created_at
    """

class DataSourceType:
    """
    Type of data source.

    Values:
    - human: Data created by humans
    - synthetic: Data generated synthetically
    """
    human = "human"
    synthetic = "synthetic"

class DataSourceProperty:
    """
    Custom properties for data sources.

    Common properties:
    - created_by (str): Creator identifier
    - created_at (str): Creation timestamp
    """

Prompts

Prompt management for saved prompt configurations.

class Prompt:
    """
    Saved prompt configuration.

    Properties:
    - id (str): Unique prompt identifier
    - name (str): Prompt name
    - content (str): Prompt content/template
    - parent (Task): Parent task
    """

    @staticmethod
    def load_from_file(path: str) -> 'Prompt':
        """
        Load prompt from .kiln file.

        Parameters:
        - path (str): Path to prompt.kiln file

        Returns:
        Prompt instance
        """

    def save_to_file(self) -> None:
        """Save prompt to .kiln file."""

class BasePrompt:
    """Base interface for prompts."""

class PromptId:
    """
    Validated prompt identifier type.

    Valid format examples:
    - "simple"
    - "few_shot"
    - "cot"
    - "saved::prompt_id"
    - "fine_tune::model_id"
    """

class PromptGenerators:
    """
    Built-in prompt generator types.

    Values:
    - simple: Simple prompt construction
    - short: Concise prompt construction
    - multi_shot: Multiple examples
    - few_shot: Few-shot learning
    - cot: Chain-of-thought reasoning
    - few_shot_cot: Few-shot with chain-of-thought
    - multi_shot_cot: Multi-shot with chain-of-thought
    - saved: Use saved/custom prompts
    - fine_tune: Fine-tune formatted prompts
    """
    simple = "simple"
    short = "short"
    multi_shot = "multi_shot"
    few_shot = "few_shot"
    cot = "cot"
    few_shot_cot = "few_shot_cot"
    multi_shot_cot = "multi_shot_cot"
    saved = "saved"
    fine_tune = "fine_tune"

# List of all prompt generator values
prompt_generator_values = [
    "simple", "short", "multi_shot", "few_shot",
    "cot", "few_shot_cot", "multi_shot_cot",
    "saved", "fine_tune"
]

Fine-tuning

Fine-tuning job configuration and status tracking.

class Finetune:
    """
    Fine-tuning job configuration and tracking.

    Properties:
    - id (str): Unique identifier
    - status (FineTuneStatusType): Current job status
    - model_id (str): Base model identifier
    - provider (str): Fine-tuning provider name
    - parent (Task): Parent task
    """

    @staticmethod
    def load_from_file(path: str) -> 'Finetune':
        """
        Load fine-tune from .kiln file.

        Parameters:
        - path (str): Path to finetune.kiln file

        Returns:
        Finetune instance
        """

    def save_to_file(self) -> None:
        """Save fine-tune to .kiln file."""

    def start(self) -> None:
        """Start the fine-tuning job."""

    def check_status(self) -> dict:
        """
        Check current status of fine-tuning job.

        Returns:
        dict: Status information including progress and errors
        """

Dataset Management

Dataset splitting for train/test/validation sets.

class DatasetSplit:
    """
    Frozen dataset split (train/test/validation).

    Properties:
    - definition (DatasetSplitDefinition): Split configuration
    - created_at (str): Timestamp of split creation
    - id (str): Unique split identifier
    - parent (Task): Parent task
    """

    @staticmethod
    def load_from_file(path: str) -> 'DatasetSplit':
        """
        Load dataset split from .kiln file.

        Parameters:
        - path (str): Path to dataset_split.kiln file

        Returns:
        DatasetSplit instance
        """

    def save_to_file(self) -> None:
        """Save dataset split to .kiln file."""

class DatasetSplitDefinition:
    """
    Definition for splitting dataset.

    Properties:
    - train_ratio (float): Ratio of data for training (0-1)
    - test_ratio (float): Ratio of data for testing (0-1)
    - validation_ratio (float): Ratio of data for validation (0-1)

    Note: train_ratio + test_ratio + validation_ratio should equal 1.0
    """

External Tools

MCP (Model Control Protocol) tool server configuration.

class ExternalToolServer:
    """
    MCP tool server configuration.

    Properties:
    - name (str): Server name
    - server_url (str): Server URL endpoint
    - api_key (str | None): API key for authentication
    """

Enumerations

Core enumerations used throughout the data model.

class Priority:
    """
    Task requirement priority levels (IntEnum where lower number = higher priority).

    Values:
    - p0: Priority 0 (highest/critical priority)
    - p1: Priority 1 (high priority)
    - p2: Priority 2 (medium priority  )
    - p3: Priority 3 (lower priority)
    """
    p0 = 0
    p1 = 1
    p2 = 2
    p3 = 3

class TaskOutputRatingType:
    """
    Type of rating system.

    Values:
    - five_star: 1-5 star rating
    - pass_fail: Binary pass/fail rating
    - pass_fail_critical: Critical pass/fail rating
    - custom: Custom rating type
    """
    five_star = "five_star"
    pass_fail = "pass_fail"
    pass_fail_critical = "pass_fail_critical"
    custom = "custom"

class StructuredOutputMode:
    """
    Enumeration of supported structured output modes for model API calls.

    Values:
    - default: Let the adapter decide (legacy, do not use for new use cases)
    - json_schema: Request JSON using API capabilities for json_schema
    - function_calling_weak: Weak function calling mode
    - function_calling: Request JSON using API capabilities for function calling
    - json_mode: Request JSON using API's JSON mode (valid JSON but no schema validation)
    - json_instructions: Append instructions to prompt for JSON output (no API capabilities)
    - json_instruction_and_object: Instructions + json_mode API capabilities
    - json_custom_instructions: Model outputs JSON with custom system prompt instructions
    - unknown: Mode not known (on old models), lookup best option at runtime
    """
    default = "default"
    json_schema = "json_schema"
    function_calling_weak = "function_calling_weak"
    function_calling = "function_calling"
    json_mode = "json_mode"
    json_instructions = "json_instructions"
    json_instruction_and_object = "json_instruction_and_object"
    json_custom_instructions = "json_custom_instructions"
    unknown = "unknown"

class FineTuneStatusType:
    """
    Status of fine-tuning job.

    Values:
    - unknown: Unknown status (server error)
    - pending: Waiting to start
    - running: Currently running
    - completed: Completed successfully
    - failed: Failed with error
    """
    unknown = "unknown"
    pending = "pending"
    running = "running"
    completed = "completed"
    failed = "failed"

Text Chunking

Configuration and data structures for text chunking.

class ChunkerConfig:
    """
    Configuration for text chunking.

    Properties:
    - chunker_type (ChunkerType): Type of chunker to use
    - chunk_size (int): Size of each chunk in characters
    - chunk_overlap (int): Overlap between chunks in characters
    """

class ChunkerType:
    """
    Available chunker types.

    Values:
    - fixed_window: Fixed-size window chunking
    """
    fixed_window = "fixed_window"

class Chunk:
    """
    Single text chunk with metadata.

    Properties:
    - text (str): Chunk content
    - start_index (int): Start position in source document
    - end_index (int): End position in source document
    - metadata (dict): Additional chunk metadata
    """

class ChunkedDocument:
    """
    Document split into chunks.

    Properties:
    - chunks (list[Chunk]): List of text chunks
    - source_document (str): Original document content
    """

Embeddings

Embedding configuration and data structures.

class EmbeddingConfig:
    """
    Configuration for embeddings.

    Properties:
    - model_id (str): Embedding model identifier
    - provider (str): Embedding provider name
    - dimensions (int): Embedding vector dimensions
    """

class Embedding:
    """
    Single embedding vector.

    Properties:
    - vector (list[float]): Embedding vector values
    - metadata (dict): Additional embedding metadata
    """

class ChunkEmbeddings:
    """
    Embeddings for document chunks.

    Properties:
    - embeddings (list[Embedding]): List of embedding vectors
    - chunk_ids (list[str]): Corresponding chunk identifiers
    """

Evaluation Data Models

Data models for evaluation configurations and results.

class Eval:
    """
    Evaluation configuration.

    Properties:
    - id (str): Unique identifier
    - name (str): Evaluation name
    - eval_type (str): Type of evaluation
    - config (EvalConfig): Evaluation configuration
    - parent (Task): Parent task
    """

    @staticmethod
    def load_from_file(path: str) -> 'Eval':
        """
        Load evaluation from .kiln file.

        Parameters:
        - path (str): Path to eval.kiln file

        Returns:
        Eval instance
        """

    def save_to_file(self) -> None:
        """Save evaluation to .kiln file."""

class EvalConfig:
    """
    Configuration for specific evaluation type.

    Properties:
    - type (EvalConfigType): Type of evaluation configuration
    - parameters (dict): Evaluation-specific parameters
    """

class EvalRun:
    """
    Single evaluation run.

    Properties:
    - eval_id (str): Evaluation identifier
    - task_run_id (str): Task run being evaluated
    - score (EvalOutputScore): Evaluation score
    - id (str): Unique run identifier
    """

    @staticmethod
    def load_from_file(path: str) -> 'EvalRun':
        """
        Load evaluation run from .kiln file.

        Parameters:
        - path (str): Path to eval_run.kiln file

        Returns:
        EvalRun instance
        """

    def save_to_file(self) -> None:
        """Save evaluation run to .kiln file."""

class EvalOutputScore:
    """
    Score from evaluation.

    Properties:
    - value (float | int | bool): Score value
    - reasoning (str | None): Explanation for the score
    """

class EvalTemplateId:
    """
    Built-in evaluation templates.

    Values:
    - g_eval: G-Eval assessment
    - llm_as_judge: LLM-based evaluation
    """
    g_eval = "g_eval"
    llm_as_judge = "llm_as_judge"

class EvalConfigType:
    """
    Types of evaluation configs.

    Values:
    - g_eval: G-Eval configuration
    - custom: Custom evaluation configuration
    """
    g_eval = "g_eval"
    custom = "custom"

Document Extraction

Data models for document extraction and processing.

class Document:
    """
    Document with extracted content.

    Properties:
    - id (str): Unique identifier
    - content (str): Extracted content
    - metadata (dict): Document metadata
    - kind (Kind): Type of document
    """

    @staticmethod
    def load_from_file(path: str) -> 'Document':
        """
        Load document from .kiln file.

        Parameters:
        - path (str): Path to document.kiln file

        Returns:
        Document instance
        """

    def save_to_file(self) -> None:
        """Save document to .kiln file."""

class Extraction:
    """
    Result of document extraction.

    Properties:
    - document (Document): Extracted document
    - extractor_config (ExtractorConfig): Configuration used for extraction
    """

class ExtractorConfig:
    """
    Configuration for document extraction.

    Properties:
    - extractor_type (ExtractorType): Type of extractor
    - options (dict): Extractor-specific options
    """

class FileInfo:
    """
    Metadata about source file.

    Properties:
    - filename (str): Name of file
    - path (str): File system path
    - size (int): File size in bytes
    - mime_type (str): MIME type
    """

class Kind:
    """
    Type of document.

    Values:
    - text: Plain text document
    - pdf: PDF document
    - image: Image file
    - html: HTML document
    """
    text = "text"
    pdf = "pdf"
    image = "image"
    html = "html"

class OutputFormat:
    """
    Format for extracted output.

    Values:
    - markdown: Markdown format
    - plain_text: Plain text format
    - structured: Structured data format
    """
    markdown = "markdown"
    plain_text = "plain_text"
    structured = "structured"

class ExtractorType:
    """
    Type of extractor to use.

    Values:
    - litellm: LiteLLM-based extraction
    - custom: Custom extractor
    """
    litellm = "litellm"
    custom = "custom"

class ExtractionSource:
    """
    Source type for extraction.

    Values:
    - file: Extract from file
    - url: Extract from URL
    - text: Extract from text
    """
    file = "file"
    url = "url"
    text = "text"

RAG Configuration

Configuration for Retrieval-Augmented Generation.

class RagConfig:
    """
    Configuration for RAG (Retrieval-Augmented Generation).

    Properties:
    - vector_store_config (VectorStoreConfig): Vector database configuration
    - embedding_config (EmbeddingConfig): Embedding model configuration
    - chunker_config (ChunkerConfig): Text chunking configuration
    - top_k (int): Number of results to retrieve
    """

Vector Store Configuration

Configuration for vector database integration.

class VectorStoreConfig:
    """
    Configuration for vector database.

    Properties:
    - vector_store_type (VectorStoreType): Type of vector store
    - connection_params (dict): Connection parameters
    """

class VectorStoreType:
    """
    Type of vector store.

    Values:
    - lancedb: LanceDB vector database
    """
    lancedb = "lancedb"

class LanceDBConfigBaseProperties:
    """
    LanceDB-specific configuration.

    Properties:
    - uri (str): Database URI
    - table_name (str): Table name for storage
    """

Strict Mode

Validation mode control for data models.

def strict_mode() -> bool:
    """
    Get current strict validation mode status.

    Returns:
    bool: True if strict mode is enabled, False otherwise
    """

def set_strict_mode(enabled: bool) -> None:
    """
    Enable or disable strict validation mode.

    Parameters:
    - enabled (bool): True to enable strict mode, False to disable
    """

Usage Example

from kiln_ai.datamodel import (
    Project, Task, TaskRun, TaskOutput, TaskOutputRating,
    DataSource, DataSourceType, Priority, TaskRequirement
)

# Create a new task
task = Task(
    name="sentiment_analysis",
    description="Analyze sentiment of text",
    instruction="Classify the sentiment as positive, negative, or neutral."
)

# Add a requirement
requirement = TaskRequirement(
    name="valid_sentiment",
    instruction="Output must be one of: positive, negative, neutral",
    priority=Priority.p0  # p0 = highest priority
)
task.requirements.append(requirement)

# Save task
task.save_to_file()

# Create a task run with data source
run = TaskRun(
    parent=task,
    input="This product is amazing!",
    input_source=DataSource(
        type=DataSourceType.human,
        properties={"created_by": "annotator@example.com"}
    ),
    output=TaskOutput(
        output="positive",
        source=DataSource(
            type=DataSourceType.human,
            properties={"created_by": "annotator@example.com"}
        ),
        rating=TaskOutputRating(value=5, type="five_star")
    ),
    tags=["training", "verified"]
)

# Save run
run.save_to_file()

# Load and work with the data
loaded_task = Task.load_from_file(task.path)
all_runs = loaded_task.runs()
print(f"Task has {len(all_runs)} runs")

# Filter runs by tag
training_runs = [r for r in all_runs if "training" in r.tags]

Install with Tessl CLI