pypi-openai

Description
Official Python library for the OpenAI API providing chat completions, embeddings, audio, images, and more
Author
tessl
Last updated

How to use

npx @tessl/cli registry install tessl/pypi-openai@1.106.0

fine-tuning.md docs/

1
# Fine-tuning
2
3
Create and manage custom model training jobs to adapt OpenAI models to specific use cases and domains with your own data.
4
5
## Capabilities
6
7
### Fine-tuning Jobs
8
9
Create and manage fine-tuning jobs to customize models for specific tasks and domains.
10
11
```python { .api }
12
def create(
13
self,
14
*,
15
model: Union[str, FineTuningModel],
16
training_file: str,
17
hyperparameters: HyperparametersParam | NotGiven = NOT_GIVEN,
18
suffix: str | NotGiven = NOT_GIVEN,
19
validation_file: str | NotGiven = NOT_GIVEN,
20
integrations: List[IntegrationParam] | NotGiven = NOT_GIVEN,
21
seed: int | NotGiven = NOT_GIVEN,
22
extra_headers: Headers | None = None,
23
extra_query: Query | None = None,
24
extra_body: Body | None = None,
25
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
26
) -> FineTuningJob: ...
27
28
def list(
29
self,
30
*,
31
after: str | NotGiven = NOT_GIVEN,
32
limit: int | NotGiven = NOT_GIVEN,
33
extra_headers: Headers | None = None,
34
extra_query: Query | None = None,
35
extra_body: Body | None = None,
36
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
37
) -> SyncPage[FineTuningJob]: ...
38
39
def retrieve(
40
self,
41
fine_tuning_job_id: str,
42
*,
43
extra_headers: Headers | None = None,
44
extra_query: Query | None = None,
45
extra_body: Body | None = None,
46
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
47
) -> FineTuningJob: ...
48
49
def cancel(
50
self,
51
fine_tuning_job_id: str,
52
*,
53
extra_headers: Headers | None = None,
54
extra_query: Query | None = None,
55
extra_body: Body | None = None,
56
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
57
) -> FineTuningJob: ...
58
```
59
60
Usage examples:
61
62
```python
63
from openai import OpenAI
64
65
client = OpenAI()
66
67
# Basic fine-tuning job
68
fine_tuning_job = client.fine_tuning.jobs.create(
69
training_file="file-abc123",
70
model="gpt-3.5-turbo-0125"
71
)
72
73
print(f"Fine-tuning job created: {fine_tuning_job.id}")
74
print(f"Status: {fine_tuning_job.status}")
75
print(f"Model: {fine_tuning_job.model}")
76
77
# Fine-tuning with custom hyperparameters
78
fine_tuning_job = client.fine_tuning.jobs.create(
79
training_file="file-abc123",
80
validation_file="file-def456",
81
model="gpt-3.5-turbo-0125",
82
hyperparameters={
83
"n_epochs": 3,
84
"batch_size": "auto",
85
"learning_rate_multiplier": "auto"
86
},
87
suffix="my-custom-model"
88
)
89
90
print(f"Custom fine-tuning job: {fine_tuning_job.id}")
91
92
# List all fine-tuning jobs
93
jobs = client.fine_tuning.jobs.list(limit=10)
94
95
print("Recent fine-tuning jobs:")
96
for job in jobs:
97
print(f" {job.id}: {job.status} - {job.fine_tuned_model or 'In progress'}")
98
99
# Retrieve specific job
100
job_id = "ftjob-abc123"
101
job = client.fine_tuning.jobs.retrieve(job_id)
102
103
print(f"Job details:")
104
print(f" ID: {job.id}")
105
print(f" Status: {job.status}")
106
print(f" Model: {job.model}")
107
print(f" Fine-tuned model: {job.fine_tuned_model}")
108
print(f" Created: {job.created_at}")
109
print(f" Training file: {job.training_file}")
110
111
# Cancel running job
112
if job.status in ["running", "queued"]:
113
cancelled_job = client.fine_tuning.jobs.cancel(job_id)
114
print(f"Job cancelled: {cancelled_job.status}")
115
```
116
117
### Job Events and Monitoring
118
119
Track fine-tuning progress through job events and status monitoring.
120
121
```python { .api }
122
def list_events(
123
self,
124
fine_tuning_job_id: str,
125
*,
126
after: str | NotGiven = NOT_GIVEN,
127
limit: int | NotGiven = NOT_GIVEN,
128
extra_headers: Headers | None = None,
129
extra_query: Query | None = None,
130
extra_body: Body | None = None,
131
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
132
) -> SyncPage[FineTuningJobEvent]: ...
133
```
134
135
Usage examples:
136
137
```python
138
import time
139
140
# Monitor fine-tuning job progress
141
def monitor_fine_tuning_job(job_id: str):
142
"""Monitor a fine-tuning job until completion"""
143
144
print(f"Monitoring job {job_id}...")
145
146
while True:
147
job = client.fine_tuning.jobs.retrieve(job_id)
148
print(f"Status: {job.status}")
149
150
if job.status in ["succeeded", "failed", "cancelled"]:
151
print(f"Job finished with status: {job.status}")
152
if job.status == "succeeded":
153
print(f"Fine-tuned model: {job.fine_tuned_model}")
154
break
155
156
# Wait before checking again
157
time.sleep(30)
158
159
return job
160
161
# Monitor job
162
job_id = "ftjob-abc123"
163
final_job = monitor_fine_tuning_job(job_id)
164
165
# Get job events for detailed progress
166
events = client.fine_tuning.jobs.list_events(job_id, limit=50)
167
168
print(f"\nJob events:")
169
for event in events:
170
print(f" [{event.created_at}] {event.level}: {event.message}")
171
172
# Get recent events only
173
recent_events = client.fine_tuning.jobs.list_events(
174
job_id,
175
limit=10
176
)
177
178
print(f"\nRecent events:")
179
for event in recent_events:
180
print(f" {event.message}")
181
182
# Real-time event streaming (polling)
183
def stream_job_events(job_id: str, poll_interval: int = 10):
184
"""Stream job events in real-time"""
185
186
last_event_id = None
187
188
while True:
189
job = client.fine_tuning.jobs.retrieve(job_id)
190
191
# Get new events
192
if last_event_id:
193
events = client.fine_tuning.jobs.list_events(
194
job_id,
195
after=last_event_id,
196
limit=100
197
)
198
else:
199
events = client.fine_tuning.jobs.list_events(
200
job_id,
201
limit=10
202
)
203
204
# Process new events
205
for event in reversed(list(events)):
206
print(f"[{job.id}] {event.level}: {event.message}")
207
last_event_id = event.id
208
209
# Check if job is complete
210
if job.status in ["succeeded", "failed", "cancelled"]:
211
print(f"Job completed with status: {job.status}")
212
break
213
214
time.sleep(poll_interval)
215
216
# Stream events
217
# stream_job_events("ftjob-abc123")
218
```
219
220
### Training Data Preparation
221
222
Prepare and validate training data for optimal fine-tuning results.
223
224
Usage examples:
225
226
```python
227
import json
228
from typing import List, Dict, Any
229
230
def prepare_chat_training_data(conversations: List[Dict]) -> str:
231
"""Convert conversations to JSONL format for fine-tuning"""
232
233
jsonl_data = []
234
235
for conversation in conversations:
236
# Ensure proper chat format
237
if "messages" not in conversation:
238
# Convert simple Q&A to chat format
239
if "input" in conversation and "output" in conversation:
240
messages = [
241
{"role": "user", "content": conversation["input"]},
242
{"role": "assistant", "content": conversation["output"]}
243
]
244
else:
245
continue
246
else:
247
messages = conversation["messages"]
248
249
# Validate message format
250
valid_conversation = {"messages": []}
251
252
for message in messages:
253
if "role" in message and "content" in message:
254
if message["role"] in ["system", "user", "assistant"]:
255
valid_conversation["messages"].append({
256
"role": message["role"],
257
"content": str(message["content"])
258
})
259
260
# Only include conversations with at least user+assistant
261
if len(valid_conversation["messages"]) >= 2:
262
jsonl_data.append(valid_conversation)
263
264
# Write to JSONL file
265
filename = "training_data.jsonl"
266
with open(filename, 'w') as f:
267
for item in jsonl_data:
268
f.write(json.dumps(item) + '\n')
269
270
print(f"Prepared {len(jsonl_data)} training examples in {filename}")
271
return filename
272
273
# Example conversations
274
conversations = [
275
{
276
"input": "What is machine learning?",
277
"output": "Machine learning is a subset of artificial intelligence that enables computers to learn and improve from experience without being explicitly programmed."
278
},
279
{
280
"messages": [
281
{"role": "system", "content": "You are a helpful coding assistant."},
282
{"role": "user", "content": "How do I reverse a string in Python?"},
283
{"role": "assistant", "content": "You can reverse a string in Python using slicing: `reversed_string = original_string[::-1]`"}
284
]
285
},
286
{
287
"input": "Explain neural networks",
288
"output": "Neural networks are computing systems inspired by biological neural networks. They consist of interconnected nodes (neurons) that process information and learn patterns from data."
289
}
290
]
291
292
# Prepare training data
293
training_file_path = prepare_chat_training_data(conversations)
294
295
# Upload training file
296
with open(training_file_path, "rb") as f:
297
training_file = client.files.create(
298
file=f,
299
purpose="fine-tune"
300
)
301
302
print(f"Training file uploaded: {training_file.id}")
303
304
# Create validation data (optional)
305
def create_validation_split(jsonl_file: str, split_ratio: float = 0.2):
306
"""Split training data into training and validation sets"""
307
308
with open(jsonl_file, 'r') as f:
309
lines = f.readlines()
310
311
# Shuffle and split
312
import random
313
random.shuffle(lines)
314
315
split_point = int(len(lines) * (1 - split_ratio))
316
train_lines = lines[:split_point]
317
val_lines = lines[split_point:]
318
319
# Write training set
320
train_file = "train_data.jsonl"
321
with open(train_file, 'w') as f:
322
f.writelines(train_lines)
323
324
# Write validation set
325
val_file = "val_data.jsonl"
326
with open(val_file, 'w') as f:
327
f.writelines(val_lines)
328
329
print(f"Split into {len(train_lines)} training and {len(val_lines)} validation examples")
330
return train_file, val_file
331
332
# Create train/validation split
333
train_file, val_file = create_validation_split("training_data.jsonl")
334
335
# Upload both files
336
with open(train_file, "rb") as f:
337
train_file_obj = client.files.create(file=f, purpose="fine-tune")
338
339
with open(val_file, "rb") as f:
340
val_file_obj = client.files.create(file=f, purpose="fine-tune")
341
342
# Start fine-tuning with validation
343
job = client.fine_tuning.jobs.create(
344
training_file=train_file_obj.id,
345
validation_file=val_file_obj.id,
346
model="gpt-3.5-turbo-0125",
347
hyperparameters={
348
"n_epochs": 3
349
}
350
)
351
352
print(f"Fine-tuning job with validation: {job.id}")
353
```
354
355
### Model Checkpoints
356
357
Access and manage model checkpoints created during fine-tuning.
358
359
```python { .api }
360
def list(
361
self,
362
fine_tuning_job_id: str,
363
*,
364
after: str | NotGiven = NOT_GIVEN,
365
limit: int | NotGiven = NOT_GIVEN,
366
extra_headers: Headers | None = None,
367
extra_query: Query | None = None,
368
extra_body: Body | None = None,
369
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
370
) -> SyncPage[FineTuningJobCheckpoint]: ...
371
```
372
373
Usage examples:
374
375
```python
376
# List checkpoints for a job
377
job_id = "ftjob-abc123"
378
checkpoints = client.fine_tuning.jobs.checkpoints.list(job_id)
379
380
print(f"Checkpoints for job {job_id}:")
381
for checkpoint in checkpoints:
382
print(f" Step {checkpoint.step_number}: {checkpoint.fine_tuned_model_checkpoint}")
383
print(f" Metrics: {checkpoint.metrics}")
384
385
# Use checkpoint for inference
386
if checkpoints.data:
387
checkpoint_model = checkpoints.data[0].fine_tuned_model_checkpoint
388
389
# Test the checkpoint model
390
response = client.chat.completions.create(
391
model=checkpoint_model,
392
messages=[
393
{"role": "user", "content": "Test the fine-tuned model"}
394
]
395
)
396
397
print(f"Checkpoint model response: {response.choices[0].message.content}")
398
399
# Compare checkpoint performance
400
def evaluate_checkpoint(checkpoint_model: str, test_cases: List[Dict]):
401
"""Evaluate a checkpoint model on test cases"""
402
403
results = []
404
405
for test_case in test_cases:
406
response = client.chat.completions.create(
407
model=checkpoint_model,
408
messages=test_case["messages"],
409
max_tokens=100
410
)
411
412
result = {
413
"input": test_case["messages"][-1]["content"],
414
"expected": test_case.get("expected", ""),
415
"actual": response.choices[0].message.content,
416
"model": checkpoint_model
417
}
418
419
results.append(result)
420
421
return results
422
423
# Test cases for evaluation
424
test_cases = [
425
{
426
"messages": [{"role": "user", "content": "What is AI?"}],
427
"expected": "AI explanation"
428
},
429
{
430
"messages": [{"role": "user", "content": "How does ML work?"}],
431
"expected": "ML explanation"
432
}
433
]
434
435
# Evaluate each checkpoint
436
checkpoint_results = {}
437
for checkpoint in checkpoints.data[:3]: # Test first 3 checkpoints
438
model_id = checkpoint.fine_tuned_model_checkpoint
439
results = evaluate_checkpoint(model_id, test_cases)
440
checkpoint_results[checkpoint.step_number] = results
441
442
print(f"Checkpoint {checkpoint.step_number} evaluation completed")
443
```
444
445
### Production Deployment
446
447
Deploy and use fine-tuned models in production applications.
448
449
Usage examples:
450
451
```python
452
# Get completed fine-tuning job
453
job = client.fine_tuning.jobs.retrieve("ftjob-abc123")
454
455
if job.status == "succeeded":
456
fine_tuned_model = job.fine_tuned_model
457
print(f"Using fine-tuned model: {fine_tuned_model}")
458
459
# Production usage
460
def generate_response(user_input: str, model: str = fine_tuned_model):
461
"""Generate response using fine-tuned model"""
462
463
response = client.chat.completions.create(
464
model=model,
465
messages=[
466
{"role": "user", "content": user_input}
467
],
468
temperature=0.7,
469
max_tokens=150
470
)
471
472
return response.choices[0].message.content
473
474
# Test production deployment
475
test_inputs = [
476
"Explain quantum computing",
477
"What are the benefits of renewable energy?",
478
"How does blockchain technology work?"
479
]
480
481
for input_text in test_inputs:
482
response = generate_response(input_text)
483
print(f"Input: {input_text}")
484
print(f"Response: {response}\n")
485
486
# A/B testing between base and fine-tuned model
487
def compare_models(input_text: str, base_model: str = "gpt-3.5-turbo"):
488
"""Compare base model vs fine-tuned model responses"""
489
490
# Base model response
491
base_response = client.chat.completions.create(
492
model=base_model,
493
messages=[{"role": "user", "content": input_text}]
494
)
495
496
# Fine-tuned model response
497
ft_response = client.chat.completions.create(
498
model=fine_tuned_model,
499
messages=[{"role": "user", "content": input_text}]
500
)
501
502
return {
503
"input": input_text,
504
"base_model": {
505
"model": base_model,
506
"response": base_response.choices[0].message.content
507
},
508
"fine_tuned_model": {
509
"model": fine_tuned_model,
510
"response": ft_response.choices[0].message.content
511
}
512
}
513
514
# Compare models
515
comparison = compare_models("Explain machine learning")
516
print("Model comparison:")
517
print(f"Base: {comparison['base_model']['response']}")
518
print(f"Fine-tuned: {comparison['fine_tuned_model']['response']}")
519
520
else:
521
print(f"Job not ready: {job.status}")
522
523
# Model management
524
def manage_fine_tuned_models():
525
"""List and manage fine-tuned models"""
526
527
# List models to find fine-tuned ones
528
models = client.models.list()
529
530
fine_tuned_models = []
531
for model in models:
532
if "ft:" in model.id: # Fine-tuned models have "ft:" prefix
533
fine_tuned_models.append(model)
534
535
print(f"Found {len(fine_tuned_models)} fine-tuned models:")
536
for model in fine_tuned_models:
537
print(f" {model.id} (created: {model.created})")
538
539
return fine_tuned_models
540
541
# List fine-tuned models
542
ft_models = manage_fine_tuned_models()
543
544
# Delete old fine-tuned model (if needed)
545
# Note: Model deletion may not be immediately available
546
# for model in ft_models:
547
# if "old-model-suffix" in model.id:
548
# try:
549
# client.models.delete(model.id)
550
# print(f"Deleted model: {model.id}")
551
# except Exception as e:
552
# print(f"Could not delete {model.id}: {e}")
553
```
554
555
## Types
556
557
### Core Response Types
558
559
```python { .api }
560
class FineTuningJob(BaseModel):
561
id: str
562
created_at: int
563
error: Optional[FineTuningJobError]
564
fine_tuned_model: Optional[str]
565
finished_at: Optional[int]
566
hyperparameters: FineTuningJobHyperparameters
567
model: str
568
object: Literal["fine_tuning.job"]
569
organization_id: str
570
result_files: List[str]
571
seed: int
572
status: Literal["validating_files", "queued", "running", "succeeded", "failed", "cancelled"]
573
trained_tokens: Optional[int]
574
training_file: str
575
validation_file: Optional[str]
576
user_provided_suffix: Optional[str]
577
integrations: Optional[List[FineTuningJobIntegration]]
578
579
class FineTuningJobEvent(BaseModel):
580
id: str
581
created_at: int
582
level: Literal["info", "warn", "error"]
583
message: str
584
object: Literal["fine_tuning.job.event"]
585
586
class FineTuningJobCheckpoint(BaseModel):
587
id: str
588
created_at: int
589
fine_tuned_model_checkpoint: str
590
fine_tuning_job_id: str
591
metrics: Dict[str, float]
592
object: Literal["fine_tuning.job.checkpoint"]
593
step_number: int
594
```
595
596
### Parameter Types
597
598
```python { .api }
599
# Fine-tuning job creation parameters
600
FineTuningJobCreateParams = TypedDict('FineTuningJobCreateParams', {
601
'model': Required[Union[str, FineTuningModel]],
602
'training_file': Required[str],
603
'hyperparameters': NotRequired[HyperparametersParam],
604
'suffix': NotRequired[str],
605
'validation_file': NotRequired[str],
606
'integrations': NotRequired[List[IntegrationParam]],
607
'seed': NotRequired[int],
608
}, total=False)
609
610
# Hyperparameters configuration
611
class HyperparametersParam(TypedDict, total=False):
612
batch_size: Union[Literal["auto"], int]
613
learning_rate_multiplier: Union[Literal["auto"], float]
614
n_epochs: Union[Literal["auto"], int]
615
616
# Integration parameters
617
class IntegrationParam(TypedDict, total=False):
618
type: Required[Literal["wandb"]]
619
wandb: Required[WandbIntegrationParam]
620
621
class WandbIntegrationParam(TypedDict, total=False):
622
project: Required[str]
623
name: str
624
entity: str
625
tags: List[str]
626
```
627
628
### Model and Status Types
629
630
```python { .api }
631
# Supported fine-tuning models
632
FineTuningModel = Literal[
633
"babbage-002",
634
"davinci-002",
635
"gpt-3.5-turbo-0125",
636
"gpt-3.5-turbo-1106",
637
"gpt-4-0613",
638
"gpt-4o-mini-2024-07-18",
639
"gpt-4o-2024-08-06"
640
]
641
642
# Job status enumeration
643
FineTuningJobStatus = Literal[
644
"validating_files",
645
"queued",
646
"running",
647
"succeeded",
648
"failed",
649
"cancelled"
650
]
651
652
# Event levels
653
EventLevel = Literal["info", "warn", "error"]
654
655
# Error information
656
class FineTuningJobError(BaseModel):
657
code: str
658
message: str
659
param: Optional[str]
660
```
661
662
### Training Configuration
663
664
```python { .api }
665
# Hyperparameter ranges and defaults
666
class HyperparameterDefaults:
667
n_epochs: Union[Literal["auto"], int] = "auto" # Typically 1-50
668
batch_size: Union[Literal["auto"], int] = "auto" # Powers of 2, up to 256
669
learning_rate_multiplier: Union[Literal["auto"], float] = "auto" # 0.02 to 2.0
670
671
# Training data requirements
672
class TrainingDataRequirements:
673
min_examples: int = 10
674
recommended_examples: int = 50
675
format: str = "jsonl"
676
required_fields: List[str] = ["messages"]
677
678
# Message format requirements
679
message_roles: List[str] = ["system", "user", "assistant"]
680
min_messages_per_example: int = 2
681
682
# File size limits
683
max_file_size_mb: int = 100
684
max_tokens_per_example: int = 4096
685
686
# Cost and timing estimates
687
class FineTuningEstimates:
688
# Approximate costs (varies by model and usage)
689
cost_per_1k_tokens_training: Dict[str, float] = {
690
"gpt-3.5-turbo": 0.008,
691
"gpt-4": 0.030,
692
"babbage-002": 0.0004,
693
"davinci-002": 0.0060
694
}
695
696
# Typical training times
697
typical_duration_hours: Dict[str, Tuple[float, float]] = {
698
"small_dataset": (0.1, 1.0), # < 1000 examples
699
"medium_dataset": (1.0, 6.0), # 1000-10000 examples
700
"large_dataset": (6.0, 24.0) # > 10000 examples
701
}
702
```
703
704
## Best Practices
705
706
### Data Preparation
707
708
- Use high-quality, diverse training examples
709
- Ensure consistent formatting across all examples
710
- Include system messages for context and behavior
711
- Balance your dataset to avoid bias
712
- Use validation data to monitor overfitting
713
714
### Model Selection
715
716
- Start with `gpt-3.5-turbo-0125` for most use cases
717
- Use `gpt-4` models for complex reasoning tasks
718
- Consider `babbage-002` or `davinci-002` for simple tasks with cost constraints
719
- Test different base models to find the best fit
720
721
### Hyperparameter Tuning
722
723
- Start with "auto" settings for initial experiments
724
- Use 1-3 epochs to avoid overfitting
725
- Monitor validation loss if using validation data
726
- Experiment with learning rate if auto settings don't work well
727
728
### Monitoring and Evaluation
729
730
- Track job events for training progress
731
- Use checkpoints to evaluate intermediate models
732
- Compare against base model performance
733
- Implement A/B testing for production deployment
734
735
### Production Considerations
736
737
- Test thoroughly before deploying fine-tuned models
738
- Monitor model performance in production
739
- Keep training data and models organized
740
- Plan for model updates and retraining
741
- Consider cost implications of fine-tuned model usage