Kiln AI is a comprehensive platform for building, evaluating, and deploying AI systems with dataset management, model fine-tuning, RAG, and evaluation capabilities.
Fine-tuning capabilities for training custom models on task datasets with provider integrations, dataset formatting, and status tracking.
Core fine-tuning configuration and lifecycle management.
from kiln_ai.datamodel import Finetune, FineTuneStatusType
class Finetune:
"""
Fine-tuning job configuration and tracking.
Properties:
- id (str): Unique identifier
- status (FineTuneStatusType): Current job status
- model_id (str): Base model identifier
- provider (str): Fine-tuning provider name
- parent (Task): Parent task
- path (str): File system path
- created_at (str): Creation timestamp
- provider_id (str | None): Provider-specific job ID
"""
@staticmethod
def load_from_file(path: str) -> 'Finetune':
"""
Load fine-tune from .kiln file.
Parameters:
- path (str): Path to finetune.kiln file
Returns:
Finetune instance
"""
def save_to_file(self) -> None:
"""Save fine-tune to .kiln file."""
def start(self) -> None:
"""
Start the fine-tuning job.
Uploads training data and initiates fine-tuning with provider.
Updates status to 'queued' or 'running'.
"""
def check_status(self) -> dict:
"""
Check current status of fine-tuning job.
Returns:
dict: Status information including progress, errors, and completion
"""
class FineTuneStatusType:
"""
Status of fine-tuning job.
Values:
- queued: Waiting to start
- running: Currently training
- succeeded: Completed successfully
- failed: Failed with error
- cancelled: Manually cancelled
"""
queued = "queued"
running = "running"
succeeded = "succeeded"
failed = "failed"
cancelled = "cancelled"Abstract interface for fine-tuning adapters.
from kiln_ai.adapters.fine_tune import BaseFinetuneAdapter, FineTuneStatus, FineTuneParameter
class BaseFinetuneAdapter:
"""
Abstract fine-tune adapter interface.
Methods:
- start(): Start fine-tuning job
- check_status(): Check job status
- cancel(): Cancel running job
"""
async def start(self, training_data: list, validation_data: list = None) -> str:
"""
Start fine-tuning job.
Parameters:
- training_data (list): Training dataset
- validation_data (list | None): Optional validation dataset
Returns:
str: Provider job ID
"""
async def check_status(self, job_id: str) -> 'FineTuneStatus':
"""
Check fine-tuning job status.
Parameters:
- job_id (str): Provider job identifier
Returns:
FineTuneStatus: Current status with progress info
"""
async def cancel(self, job_id: str) -> None:
"""
Cancel running fine-tuning job.
Parameters:
- job_id (str): Provider job identifier
"""
class FineTuneStatus:
"""
Status of fine-tune job.
Properties:
- status (FineTuneStatusType): Current status
- progress (float | None): Training progress percentage (0-100)
- error (str | None): Error message if failed
- completed_at (str | None): Completion timestamp
- model_id (str | None): Fine-tuned model ID when succeeded
"""
class FineTuneParameter:
"""
Fine-tuning parameter configuration.
Properties:
- name (str): Parameter name (e.g., "learning_rate", "epochs")
- value: Parameter value
- description (str): Parameter description
"""OpenAI-specific fine-tuning adapter.
from kiln_ai.adapters.fine_tune import OpenAIFinetune
class OpenAIFinetune(BaseFinetuneAdapter):
"""
OpenAI fine-tuning adapter.
Supports:
- GPT-3.5-turbo
- GPT-4
- GPT-4o-mini
Methods:
- start(): Upload training data and start job
- check_status(): Poll OpenAI API for status
- cancel(): Cancel running job
"""
def __init__(self, model_id: str, config: dict = None):
"""
Initialize OpenAI fine-tune adapter.
Parameters:
- model_id (str): Base model to fine-tune
- config (dict | None): Fine-tuning parameters
- n_epochs (int): Number of training epochs
- learning_rate_multiplier (float): Learning rate scaling
- batch_size (int): Training batch size
"""
async def start(self, training_data: list, validation_data: list = None) -> str:
"""
Start OpenAI fine-tuning job.
Parameters:
- training_data (list): Chat format training examples
- validation_data (list | None): Optional validation examples
Returns:
str: OpenAI fine-tuning job ID
"""
async def check_status(self, job_id: str) -> 'FineTuneStatus':
"""
Check OpenAI fine-tuning status.
Parameters:
- job_id (str): OpenAI job ID
Returns:
FineTuneStatus: Current job status
"""
async def cancel(self, job_id: str) -> None:
"""
Cancel OpenAI fine-tuning job.
Parameters:
- job_id (str): OpenAI job ID
"""Format datasets for fine-tuning across different providers.
from kiln_ai.adapters.fine_tune import DatasetFormatter, DatasetFormat
class DatasetFormatter:
"""
Format datasets for fine-tuning.
Methods:
- format(): Format dataset to specific format
- format_to_file(): Format and write to file
"""
def __init__(self, format_type: 'DatasetFormat'):
"""
Initialize dataset formatter.
Parameters:
- format_type (DatasetFormat): Target format
"""
def format(self, task_runs: list) -> list:
"""
Format task runs to target format.
Parameters:
- task_runs (list[TaskRun]): Task runs to format
Returns:
list: Formatted dataset
"""
def format_to_file(self, task_runs: list, output_path: str) -> None:
"""
Format dataset and write to file.
Parameters:
- task_runs (list[TaskRun]): Task runs to format
- output_path (str): Output file path
"""
class DatasetFormat:
"""
Dataset format types.
Values:
- openai_chat: OpenAI chat completion format (JSONL)
- jsonl: Generic JSONL format
- csv: CSV format
"""
openai_chat = "openai_chat"
jsonl = "jsonl"
csv = "csv"Get fine-tune adapters by provider.
from kiln_ai.adapters.fine_tune.finetune_registry import finetune_adapter_from_provider
def finetune_adapter_from_provider(
provider: str,
model_id: str,
config: dict = None
):
"""
Get fine-tune adapter for provider.
Parameters:
- provider (str): Provider name (e.g., "openai")
- model_id (str): Base model identifier
- config (dict | None): Fine-tuning configuration
Returns:
BaseFinetuneAdapter: Fine-tune adapter instance
"""from kiln_ai.datamodel import Task, Finetune, FineTuneStatusType
# Load task with training data
task = Task.load_from_file("path/to/task.kiln")
runs = task.runs()
print(f"Task has {len(runs)} training examples")
# Create fine-tune configuration
finetune = Finetune(
parent=task,
model_id="gpt-3.5-turbo",
provider="openai",
status=FineTuneStatusType.queued
)
finetune.save_to_file()
# Start fine-tuning
finetune.start()
print(f"Fine-tune job started: {finetune.id}")
print(f"Status: {finetune.status}")from kiln_ai.datamodel import Finetune, FineTuneStatusType
import asyncio
# Load fine-tune
finetune = Finetune.load_from_file("path/to/finetune.kiln")
# Poll for status updates
async def monitor_finetune(finetune):
while finetune.status in [FineTuneStatusType.queued, FineTuneStatusType.running]:
status_info = finetune.check_status()
print(f"Status: {status_info['status']}")
if 'progress' in status_info and status_info['progress']:
print(f"Progress: {status_info['progress']:.1f}%")
# Save updated status
finetune.save_to_file()
# Wait before next check
await asyncio.sleep(60) # Check every minute
print(f"Fine-tune completed with status: {finetune.status}")
if finetune.status == FineTuneStatusType.succeeded:
print(f"Fine-tuned model ready!")
elif finetune.status == FineTuneStatusType.failed:
print(f"Fine-tune failed: {status_info.get('error')}")
# Run monitoring
await monitor_finetune(finetune)from kiln_ai.datamodel import Task, Finetune
from kiln_ai.adapters import adapter_for_task
# Load task and fine-tune
task = Task.load_from_file("path/to/task.kiln")
finetune = Finetune.load_from_file("path/to/finetune.kiln")
# Check if fine-tune succeeded
if finetune.status != FineTuneStatusType.succeeded:
print(f"Fine-tune not ready: {finetune.status}")
else:
# Create adapter with fine-tuned model
adapter = adapter_for_task(
task,
model_name=None,
provider=None,
config={"finetune_id": finetune.id}
)
# Use fine-tuned model
result = await adapter.invoke("test input")
print(f"Output: {result.output}")from kiln_ai.datamodel import Task, DatasetSplit, DatasetSplitDefinition
# Load task
task = Task.load_from_file("path/to/task.kiln")
all_runs = task.runs()
print(f"Total runs: {len(all_runs)}")
# Create train/validation split
split_definition = DatasetSplitDefinition(
train_ratio=0.8,
test_ratio=0.0,
validation_ratio=0.2
)
dataset_split = DatasetSplit(
parent=task,
definition=split_definition
)
dataset_split.save_to_file()
# Get training and validation sets
train_runs = [r for r in all_runs if r.id in dataset_split.train_ids]
val_runs = [r for r in all_runs if r.id in dataset_split.validation_ids]
print(f"Training runs: {len(train_runs)}")
print(f"Validation runs: {len(val_runs)}")from kiln_ai.datamodel import Task
from kiln_ai.adapters.fine_tune import DatasetFormatter, DatasetFormat
# Load task
task = Task.load_from_file("path/to/task.kiln")
runs = task.runs()
# Create formatter for OpenAI chat format
formatter = DatasetFormatter(DatasetFormat.openai_chat)
# Format and save dataset
output_path = "/tmp/training_data.jsonl"
formatter.format_to_file(runs, output_path)
print(f"Dataset saved to {output_path}")
# The file will contain JSONL with format:
# {"messages": [{"role": "system", "content": "..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]}from kiln_ai.adapters.fine_tune import OpenAIFinetune
from kiln_ai.datamodel import Task
task = Task.load_from_file("path/to/task.kiln")
# Configure fine-tuning parameters
config = {
"n_epochs": 3,
"learning_rate_multiplier": 0.1,
"batch_size": 4
}
# Create adapter with custom config
adapter = OpenAIFinetune(
model_id="gpt-3.5-turbo",
config=config
)
# Format training data
from kiln_ai.adapters.fine_tune import DatasetFormatter, DatasetFormat
formatter = DatasetFormatter(DatasetFormat.openai_chat)
training_data = formatter.format(task.runs())
# Start fine-tuning
job_id = await adapter.start(training_data)
print(f"Job started: {job_id}")from kiln_ai.datamodel import Task, Finetune
from kiln_ai.adapters import adapter_for_task
task = Task.load_from_file("path/to/task.kiln")
finetune = Finetune.load_from_file("path/to/finetune.kiln")
# Create adapters for both models
base_adapter = adapter_for_task(
task,
model_name="gpt-3.5-turbo",
provider="openai"
)
finetuned_adapter = adapter_for_task(
task,
model_name=None,
provider=None,
config={"finetune_id": finetune.id}
)
# Test both on same inputs
test_inputs = ["input1", "input2", "input3"]
print("Comparing base vs fine-tuned model:\n")
for input_data in test_inputs:
base_result = await base_adapter.invoke(input_data)
ft_result = await finetuned_adapter.invoke(input_data)
print(f"Input: {input_data}")
print(f"Base model: {base_result.output}")
print(f"Fine-tuned: {ft_result.output}")
print()from kiln_ai.datamodel import Task, Finetune, FineTuneStatusType
# Load task
task = Task.load_from_file("path/to/task.kiln")
# First fine-tune
finetune_v1 = Finetune(
parent=task,
model_id="gpt-3.5-turbo",
provider="openai",
status=FineTuneStatusType.queued
)
finetune_v1.save_to_file()
finetune_v1.start()
# Wait for completion...
# (monitoring code here)
# Second fine-tune on top of first
if finetune_v1.status == FineTuneStatusType.succeeded:
finetune_v2 = Finetune(
parent=task,
model_id=finetune_v1.provider_id, # Use fine-tuned model as base
provider="openai",
status=FineTuneStatusType.queued
)
finetune_v2.save_to_file()
finetune_v2.start()from kiln_ai.datamodel import Finetune, FineTuneStatusType
finetune = Finetune.load_from_file("path/to/finetune.kiln")
try:
# Start fine-tuning
finetune.start()
except Exception as e:
print(f"Failed to start fine-tune: {e}")
finetune.status = FineTuneStatusType.failed
finetune.save_to_file()
# Check for failures during training
status = finetune.check_status()
if finetune.status == FineTuneStatusType.failed:
print(f"Fine-tune failed: {status.get('error')}")
# Retry or adjust parametersfrom kiln_ai.datamodel import Task, Finetune
from kiln_ai.adapters.fine_tune.finetune_registry import finetune_adapter_from_provider
task = Task.load_from_file("path/to/task.kiln")
# Fine-tune on multiple providers
providers = [
("openai", "gpt-3.5-turbo"),
# Could add more providers here
]
finetunes = []
for provider, model_id in providers:
# Create fine-tune
finetune = Finetune(
parent=task,
model_id=model_id,
provider=provider,
status=FineTuneStatusType.queued
)
finetune.save_to_file()
# Get adapter
adapter = finetune_adapter_from_provider(provider, model_id)
# Start fine-tuning
finetune.start()
finetunes.append(finetune)
print(f"Started fine-tune on {provider}: {model_id}")
# Monitor all jobs
print("\nMonitoring fine-tune jobs...")
# (monitoring code for all jobs)from kiln_ai.datamodel import Task
task = Task.load_from_file("path/to/task.kiln")
runs = task.runs()
# Check data quality before fine-tuning
print("Validating training data...")
# Check minimum dataset size
min_size = 50
if len(runs) < min_size:
print(f"Warning: Dataset has {len(runs)} examples, recommended minimum is {min_size}")
# Check for high-quality ratings
high_quality = [r for r in runs if r.output.rating and r.output.rating.value >= 4]
quality_ratio = len(high_quality) / len(runs)
print(f"High quality examples: {len(high_quality)} ({quality_ratio*100:.1f}%)")
if quality_ratio < 0.7:
print("Warning: Less than 70% of examples are high quality")
# Check schema validation
from kiln_ai.datamodel import strict_mode, set_strict_mode
set_strict_mode(True)
valid_runs = []
for run in runs:
try:
# Validation happens on load with strict mode
valid_runs.append(run)
except Exception as e:
print(f"Invalid run {run.id}: {e}")
print(f"Valid runs: {len(valid_runs)}/{len(runs)}")
if len(valid_runs) >= min_size:
print("Dataset ready for fine-tuning")
else:
print("Dataset needs more valid examples")Install with Tessl CLI
npx tessl i tessl/pypi-kiln-ai