0
# Fine-tuning
1
2
Fine-tuning capabilities for training custom models on task datasets with provider integrations, dataset formatting, and status tracking.
3
4
## Capabilities
5
6
### Fine-tune Management
7
8
Core fine-tuning configuration and lifecycle management.
9
10
```python { .api }
11
from kiln_ai.datamodel import Finetune, FineTuneStatusType
12
13
class Finetune:
14
"""
15
Fine-tuning job configuration and tracking.
16
17
Properties:
18
- id (str): Unique identifier
19
- status (FineTuneStatusType): Current job status
20
- model_id (str): Base model identifier
21
- provider (str): Fine-tuning provider name
22
- parent (Task): Parent task
23
- path (str): File system path
24
- created_at (str): Creation timestamp
25
- provider_id (str | None): Provider-specific job ID
26
"""
27
28
@staticmethod
29
def load_from_file(path: str) -> 'Finetune':
30
"""
31
Load fine-tune from .kiln file.
32
33
Parameters:
34
- path (str): Path to finetune.kiln file
35
36
Returns:
37
Finetune instance
38
"""
39
40
def save_to_file(self) -> None:
41
"""Save fine-tune to .kiln file."""
42
43
def start(self) -> None:
44
"""
45
Start the fine-tuning job.
46
47
Uploads training data and initiates fine-tuning with provider.
48
Updates status to 'queued' or 'running'.
49
"""
50
51
def check_status(self) -> dict:
52
"""
53
Check current status of fine-tuning job.
54
55
Returns:
56
dict: Status information including progress, errors, and completion
57
"""
58
59
class FineTuneStatusType:
60
"""
61
Status of fine-tuning job.
62
63
Values:
64
- queued: Waiting to start
65
- running: Currently training
66
- succeeded: Completed successfully
67
- failed: Failed with error
68
- cancelled: Manually cancelled
69
"""
70
queued = "queued"
71
running = "running"
72
succeeded = "succeeded"
73
failed = "failed"
74
cancelled = "cancelled"
75
```
76
77
### Base Fine-tune Adapter
78
79
Abstract interface for fine-tuning adapters.
80
81
```python { .api }
82
from kiln_ai.adapters.fine_tune import BaseFinetuneAdapter, FineTuneStatus, FineTuneParameter
83
84
class BaseFinetuneAdapter:
85
"""
86
Abstract fine-tune adapter interface.
87
88
Methods:
89
- start(): Start fine-tuning job
90
- check_status(): Check job status
91
- cancel(): Cancel running job
92
"""
93
94
async def start(self, training_data: list, validation_data: list = None) -> str:
95
"""
96
Start fine-tuning job.
97
98
Parameters:
99
- training_data (list): Training dataset
100
- validation_data (list | None): Optional validation dataset
101
102
Returns:
103
str: Provider job ID
104
"""
105
106
async def check_status(self, job_id: str) -> 'FineTuneStatus':
107
"""
108
Check fine-tuning job status.
109
110
Parameters:
111
- job_id (str): Provider job identifier
112
113
Returns:
114
FineTuneStatus: Current status with progress info
115
"""
116
117
async def cancel(self, job_id: str) -> None:
118
"""
119
Cancel running fine-tuning job.
120
121
Parameters:
122
- job_id (str): Provider job identifier
123
"""
124
125
class FineTuneStatus:
126
"""
127
Status of fine-tune job.
128
129
Properties:
130
- status (FineTuneStatusType): Current status
131
- progress (float | None): Training progress percentage (0-100)
132
- error (str | None): Error message if failed
133
- completed_at (str | None): Completion timestamp
134
- model_id (str | None): Fine-tuned model ID when succeeded
135
"""
136
137
class FineTuneParameter:
138
"""
139
Fine-tuning parameter configuration.
140
141
Properties:
142
- name (str): Parameter name (e.g., "learning_rate", "epochs")
143
- value: Parameter value
144
- description (str): Parameter description
145
"""
146
```
147
148
### OpenAI Fine-tuning
149
150
OpenAI-specific fine-tuning adapter.
151
152
```python { .api }
153
from kiln_ai.adapters.fine_tune import OpenAIFinetune
154
155
class OpenAIFinetune(BaseFinetuneAdapter):
156
"""
157
OpenAI fine-tuning adapter.
158
159
Supports:
160
- GPT-3.5-turbo
161
- GPT-4
162
- GPT-4o-mini
163
164
Methods:
165
- start(): Upload training data and start job
166
- check_status(): Poll OpenAI API for status
167
- cancel(): Cancel running job
168
"""
169
170
def __init__(self, model_id: str, config: dict = None):
171
"""
172
Initialize OpenAI fine-tune adapter.
173
174
Parameters:
175
- model_id (str): Base model to fine-tune
176
- config (dict | None): Fine-tuning parameters
177
- n_epochs (int): Number of training epochs
178
- learning_rate_multiplier (float): Learning rate scaling
179
- batch_size (int): Training batch size
180
"""
181
182
async def start(self, training_data: list, validation_data: list = None) -> str:
183
"""
184
Start OpenAI fine-tuning job.
185
186
Parameters:
187
- training_data (list): Chat format training examples
188
- validation_data (list | None): Optional validation examples
189
190
Returns:
191
str: OpenAI fine-tuning job ID
192
"""
193
194
async def check_status(self, job_id: str) -> 'FineTuneStatus':
195
"""
196
Check OpenAI fine-tuning status.
197
198
Parameters:
199
- job_id (str): OpenAI job ID
200
201
Returns:
202
FineTuneStatus: Current job status
203
"""
204
205
async def cancel(self, job_id: str) -> None:
206
"""
207
Cancel OpenAI fine-tuning job.
208
209
Parameters:
210
- job_id (str): OpenAI job ID
211
"""
212
```
213
214
### Dataset Formatting
215
216
Format datasets for fine-tuning across different providers.
217
218
```python { .api }
219
from kiln_ai.adapters.fine_tune import DatasetFormatter, DatasetFormat
220
221
class DatasetFormatter:
222
"""
223
Format datasets for fine-tuning.
224
225
Methods:
226
- format(): Format dataset to specific format
227
- format_to_file(): Format and write to file
228
"""
229
230
def __init__(self, format_type: 'DatasetFormat'):
231
"""
232
Initialize dataset formatter.
233
234
Parameters:
235
- format_type (DatasetFormat): Target format
236
"""
237
238
def format(self, task_runs: list) -> list:
239
"""
240
Format task runs to target format.
241
242
Parameters:
243
- task_runs (list[TaskRun]): Task runs to format
244
245
Returns:
246
list: Formatted dataset
247
"""
248
249
def format_to_file(self, task_runs: list, output_path: str) -> None:
250
"""
251
Format dataset and write to file.
252
253
Parameters:
254
- task_runs (list[TaskRun]): Task runs to format
255
- output_path (str): Output file path
256
"""
257
258
class DatasetFormat:
259
"""
260
Dataset format types.
261
262
Values:
263
- openai_chat: OpenAI chat completion format (JSONL)
264
- jsonl: Generic JSONL format
265
- csv: CSV format
266
"""
267
openai_chat = "openai_chat"
268
jsonl = "jsonl"
269
csv = "csv"
270
```
271
272
### Fine-tune Registry
273
274
Get fine-tune adapters by provider.
275
276
```python { .api }
277
from kiln_ai.adapters.fine_tune.finetune_registry import finetune_adapter_from_provider
278
279
def finetune_adapter_from_provider(
280
provider: str,
281
model_id: str,
282
config: dict = None
283
):
284
"""
285
Get fine-tune adapter for provider.
286
287
Parameters:
288
- provider (str): Provider name (e.g., "openai")
289
- model_id (str): Base model identifier
290
- config (dict | None): Fine-tuning configuration
291
292
Returns:
293
BaseFinetuneAdapter: Fine-tune adapter instance
294
"""
295
```
296
297
## Usage Examples
298
299
### Creating a Fine-tune Job
300
301
```python
302
from kiln_ai.datamodel import Task, Finetune, FineTuneStatusType
303
304
# Load task with training data
305
task = Task.load_from_file("path/to/task.kiln")
306
runs = task.runs()
307
print(f"Task has {len(runs)} training examples")
308
309
# Create fine-tune configuration
310
finetune = Finetune(
311
parent=task,
312
model_id="gpt-3.5-turbo",
313
provider="openai",
314
status=FineTuneStatusType.queued
315
)
316
finetune.save_to_file()
317
318
# Start fine-tuning
319
finetune.start()
320
print(f"Fine-tune job started: {finetune.id}")
321
print(f"Status: {finetune.status}")
322
```
323
324
### Monitoring Fine-tune Progress
325
326
```python
327
from kiln_ai.datamodel import Finetune, FineTuneStatusType
328
import asyncio
329
330
# Load fine-tune
331
finetune = Finetune.load_from_file("path/to/finetune.kiln")
332
333
# Poll for status updates
334
async def monitor_finetune(finetune):
335
while finetune.status in [FineTuneStatusType.queued, FineTuneStatusType.running]:
336
status_info = finetune.check_status()
337
338
print(f"Status: {status_info['status']}")
339
if 'progress' in status_info and status_info['progress']:
340
print(f"Progress: {status_info['progress']:.1f}%")
341
342
# Save updated status
343
finetune.save_to_file()
344
345
# Wait before next check
346
await asyncio.sleep(60) # Check every minute
347
348
print(f"Fine-tune completed with status: {finetune.status}")
349
if finetune.status == FineTuneStatusType.succeeded:
350
print(f"Fine-tuned model ready!")
351
elif finetune.status == FineTuneStatusType.failed:
352
print(f"Fine-tune failed: {status_info.get('error')}")
353
354
# Run monitoring
355
await monitor_finetune(finetune)
356
```
357
358
### Using Fine-tuned Model
359
360
```python
361
from kiln_ai.datamodel import Task, Finetune
362
from kiln_ai.adapters import adapter_for_task
363
364
# Load task and fine-tune
365
task = Task.load_from_file("path/to/task.kiln")
366
finetune = Finetune.load_from_file("path/to/finetune.kiln")
367
368
# Check if fine-tune succeeded
369
if finetune.status != FineTuneStatusType.succeeded:
370
print(f"Fine-tune not ready: {finetune.status}")
371
else:
372
# Create adapter with fine-tuned model
373
adapter = adapter_for_task(
374
task,
375
model_name=None,
376
provider=None,
377
config={"finetune_id": finetune.id}
378
)
379
380
# Use fine-tuned model
381
result = await adapter.invoke("test input")
382
print(f"Output: {result.output}")
383
```
384
385
### Dataset Splitting for Fine-tuning
386
387
```python
388
from kiln_ai.datamodel import Task, DatasetSplit, DatasetSplitDefinition
389
390
# Load task
391
task = Task.load_from_file("path/to/task.kiln")
392
all_runs = task.runs()
393
print(f"Total runs: {len(all_runs)}")
394
395
# Create train/validation split
396
split_definition = DatasetSplitDefinition(
397
train_ratio=0.8,
398
test_ratio=0.0,
399
validation_ratio=0.2
400
)
401
402
dataset_split = DatasetSplit(
403
parent=task,
404
definition=split_definition
405
)
406
dataset_split.save_to_file()
407
408
# Get training and validation sets
409
train_runs = [r for r in all_runs if r.id in dataset_split.train_ids]
410
val_runs = [r for r in all_runs if r.id in dataset_split.validation_ids]
411
412
print(f"Training runs: {len(train_runs)}")
413
print(f"Validation runs: {len(val_runs)}")
414
```
415
416
### Formatting Dataset for OpenAI
417
418
```python
419
from kiln_ai.datamodel import Task
420
from kiln_ai.adapters.fine_tune import DatasetFormatter, DatasetFormat
421
422
# Load task
423
task = Task.load_from_file("path/to/task.kiln")
424
runs = task.runs()
425
426
# Create formatter for OpenAI chat format
427
formatter = DatasetFormatter(DatasetFormat.openai_chat)
428
429
# Format and save dataset
430
output_path = "/tmp/training_data.jsonl"
431
formatter.format_to_file(runs, output_path)
432
print(f"Dataset saved to {output_path}")
433
434
# The file will contain JSONL with format:
435
# {"messages": [{"role": "system", "content": "..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]}
436
```
437
438
### Custom Fine-tune Parameters
439
440
```python
441
from kiln_ai.adapters.fine_tune import OpenAIFinetune
442
from kiln_ai.datamodel import Task
443
444
task = Task.load_from_file("path/to/task.kiln")
445
446
# Configure fine-tuning parameters
447
config = {
448
"n_epochs": 3,
449
"learning_rate_multiplier": 0.1,
450
"batch_size": 4
451
}
452
453
# Create adapter with custom config
454
adapter = OpenAIFinetune(
455
model_id="gpt-3.5-turbo",
456
config=config
457
)
458
459
# Format training data
460
from kiln_ai.adapters.fine_tune import DatasetFormatter, DatasetFormat
461
462
formatter = DatasetFormatter(DatasetFormat.openai_chat)
463
training_data = formatter.format(task.runs())
464
465
# Start fine-tuning
466
job_id = await adapter.start(training_data)
467
print(f"Job started: {job_id}")
468
```
469
470
### Comparing Base vs Fine-tuned Model
471
472
```python
473
from kiln_ai.datamodel import Task, Finetune
474
from kiln_ai.adapters import adapter_for_task
475
476
task = Task.load_from_file("path/to/task.kiln")
477
finetune = Finetune.load_from_file("path/to/finetune.kiln")
478
479
# Create adapters for both models
480
base_adapter = adapter_for_task(
481
task,
482
model_name="gpt-3.5-turbo",
483
provider="openai"
484
)
485
486
finetuned_adapter = adapter_for_task(
487
task,
488
model_name=None,
489
provider=None,
490
config={"finetune_id": finetune.id}
491
)
492
493
# Test both on same inputs
494
test_inputs = ["input1", "input2", "input3"]
495
496
print("Comparing base vs fine-tuned model:\n")
497
for input_data in test_inputs:
498
base_result = await base_adapter.invoke(input_data)
499
ft_result = await finetuned_adapter.invoke(input_data)
500
501
print(f"Input: {input_data}")
502
print(f"Base model: {base_result.output}")
503
print(f"Fine-tuned: {ft_result.output}")
504
print()
505
```
506
507
### Incremental Fine-tuning
508
509
```python
510
from kiln_ai.datamodel import Task, Finetune, FineTuneStatusType
511
512
# Load task
513
task = Task.load_from_file("path/to/task.kiln")
514
515
# First fine-tune
516
finetune_v1 = Finetune(
517
parent=task,
518
model_id="gpt-3.5-turbo",
519
provider="openai",
520
status=FineTuneStatusType.queued
521
)
522
finetune_v1.save_to_file()
523
finetune_v1.start()
524
525
# Wait for completion...
526
# (monitoring code here)
527
528
# Second fine-tune on top of first
529
if finetune_v1.status == FineTuneStatusType.succeeded:
530
finetune_v2 = Finetune(
531
parent=task,
532
model_id=finetune_v1.provider_id, # Use fine-tuned model as base
533
provider="openai",
534
status=FineTuneStatusType.queued
535
)
536
finetune_v2.save_to_file()
537
finetune_v2.start()
538
```
539
540
### Error Handling
541
542
```python
543
from kiln_ai.datamodel import Finetune, FineTuneStatusType
544
545
finetune = Finetune.load_from_file("path/to/finetune.kiln")
546
547
try:
548
# Start fine-tuning
549
finetune.start()
550
except Exception as e:
551
print(f"Failed to start fine-tune: {e}")
552
finetune.status = FineTuneStatusType.failed
553
finetune.save_to_file()
554
555
# Check for failures during training
556
status = finetune.check_status()
557
if finetune.status == FineTuneStatusType.failed:
558
print(f"Fine-tune failed: {status.get('error')}")
559
# Retry or adjust parameters
560
```
561
562
### Multi-provider Fine-tuning
563
564
```python
565
from kiln_ai.datamodel import Task, Finetune
566
from kiln_ai.adapters.fine_tune.finetune_registry import finetune_adapter_from_provider
567
568
task = Task.load_from_file("path/to/task.kiln")
569
570
# Fine-tune on multiple providers
571
providers = [
572
("openai", "gpt-3.5-turbo"),
573
# Could add more providers here
574
]
575
576
finetunes = []
577
578
for provider, model_id in providers:
579
# Create fine-tune
580
finetune = Finetune(
581
parent=task,
582
model_id=model_id,
583
provider=provider,
584
status=FineTuneStatusType.queued
585
)
586
finetune.save_to_file()
587
588
# Get adapter
589
adapter = finetune_adapter_from_provider(provider, model_id)
590
591
# Start fine-tuning
592
finetune.start()
593
finetunes.append(finetune)
594
print(f"Started fine-tune on {provider}: {model_id}")
595
596
# Monitor all jobs
597
print("\nMonitoring fine-tune jobs...")
598
# (monitoring code for all jobs)
599
```
600
601
### Validating Training Data Quality
602
603
```python
604
from kiln_ai.datamodel import Task
605
606
task = Task.load_from_file("path/to/task.kiln")
607
runs = task.runs()
608
609
# Check data quality before fine-tuning
610
print("Validating training data...")
611
612
# Check minimum dataset size
613
min_size = 50
614
if len(runs) < min_size:
615
print(f"Warning: Dataset has {len(runs)} examples, recommended minimum is {min_size}")
616
617
# Check for high-quality ratings
618
high_quality = [r for r in runs if r.output.rating and r.output.rating.value >= 4]
619
quality_ratio = len(high_quality) / len(runs)
620
print(f"High quality examples: {len(high_quality)} ({quality_ratio*100:.1f}%)")
621
622
if quality_ratio < 0.7:
623
print("Warning: Less than 70% of examples are high quality")
624
625
# Check schema validation
626
from kiln_ai.datamodel import strict_mode, set_strict_mode
627
628
set_strict_mode(True)
629
valid_runs = []
630
for run in runs:
631
try:
632
# Validation happens on load with strict mode
633
valid_runs.append(run)
634
except Exception as e:
635
print(f"Invalid run {run.id}: {e}")
636
637
print(f"Valid runs: {len(valid_runs)}/{len(runs)}")
638
639
if len(valid_runs) >= min_size:
640
print("Dataset ready for fine-tuning")
641
else:
642
print("Dataset needs more valid examples")
643
```
644