pypi-openai

Description
Official Python library for the OpenAI API providing chat completions, embeddings, audio, images, and more
Author
tessl
Last updated

How to use

npx @tessl/cli registry install tessl/pypi-openai@1.106.0

other-apis.md docs/

1
# Other APIs
2
3
Additional functionality including models management, content moderation, vector stores, webhooks, and experimental features.
4
5
## Capabilities
6
7
### Models API
8
9
List, retrieve, and manage available OpenAI models including base models and fine-tuned variants.
10
11
```python { .api }
12
def list(
13
self,
14
*,
15
extra_headers: Headers | None = None,
16
extra_query: Query | None = None,
17
extra_body: Body | None = None,
18
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
19
) -> SyncPage[Model]: ...
20
21
def retrieve(
22
self,
23
model: str,
24
*,
25
extra_headers: Headers | None = None,
26
extra_query: Query | None = None,
27
extra_body: Body | None = None,
28
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
29
) -> Model: ...
30
31
def delete(
32
self,
33
model: str,
34
*,
35
extra_headers: Headers | None = None,
36
extra_query: Query | None = None,
37
extra_body: Body | None = None,
38
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
39
) -> ModelDeleted: ...
40
```
41
42
Usage examples:
43
44
```python
45
from openai import OpenAI
46
47
client = OpenAI()
48
49
# List all available models
50
models = client.models.list()
51
52
print("Available models:")
53
for model in models:
54
print(f" {model.id}: owned by {model.owned_by}")
55
56
# Filter models by type
57
base_models = []
58
fine_tuned_models = []
59
60
for model in models:
61
if "ft:" in model.id:
62
fine_tuned_models.append(model)
63
else:
64
base_models.append(model)
65
66
print(f"\nBase models: {len(base_models)}")
67
print(f"Fine-tuned models: {len(fine_tuned_models)}")
68
69
# Get specific model details
70
model_info = client.models.retrieve("gpt-3.5-turbo")
71
72
print(f"\nModel details for gpt-3.5-turbo:")
73
print(f" ID: {model_info.id}")
74
print(f" Created: {model_info.created}")
75
print(f" Owned by: {model_info.owned_by}")
76
77
# List fine-tuned models only
78
print("\nYour fine-tuned models:")
79
for model in fine_tuned_models:
80
print(f" {model.id} (created: {model.created})")
81
82
# Delete fine-tuned model (if needed)
83
# Note: Only fine-tuned models can be deleted
84
# if fine_tuned_models:
85
# model_to_delete = fine_tuned_models[0].id
86
# deletion_result = client.models.delete(model_to_delete)
87
# print(f"Deleted model: {deletion_result.deleted}")
88
89
# Model capabilities lookup
90
def get_model_capabilities(model_id: str):
91
"""Get model capabilities and specifications"""
92
93
capabilities = {
94
# Chat models
95
"gpt-4": {"type": "chat", "context": 8192, "training_data": "Sep 2021"},
96
"gpt-4-turbo": {"type": "chat", "context": 128000, "training_data": "Dec 2023"},
97
"gpt-4o": {"type": "chat", "context": 128000, "training_data": "Oct 2023"},
98
"gpt-3.5-turbo": {"type": "chat", "context": 16385, "training_data": "Sep 2021"},
99
100
# Embedding models
101
"text-embedding-3-small": {"type": "embedding", "dimensions": 1536, "max_input": 8191},
102
"text-embedding-3-large": {"type": "embedding", "dimensions": 3072, "max_input": 8191},
103
"text-embedding-ada-002": {"type": "embedding", "dimensions": 1536, "max_input": 8191},
104
105
# Image models
106
"dall-e-3": {"type": "image", "max_size": "1792x1024", "styles": ["vivid", "natural"]},
107
"dall-e-2": {"type": "image", "max_size": "1024x1024", "variations": True},
108
109
# Audio models
110
"whisper-1": {"type": "audio", "capabilities": ["transcription", "translation"]},
111
"tts-1": {"type": "tts", "voices": 6, "formats": ["mp3", "opus", "aac", "flac"]},
112
"tts-1-hd": {"type": "tts", "voices": 6, "formats": ["mp3", "opus", "aac", "flac"], "quality": "hd"},
113
114
# Legacy models
115
"gpt-3.5-turbo-instruct": {"type": "completion", "context": 4097, "training_data": "Sep 2021"}
116
}
117
118
return capabilities.get(model_id, {"type": "unknown"})
119
120
# Check model capabilities
121
test_models = ["gpt-4", "text-embedding-3-small", "dall-e-3"]
122
123
for model_id in test_models:
124
caps = get_model_capabilities(model_id)
125
print(f"\n{model_id} capabilities:")
126
for key, value in caps.items():
127
print(f" {key}: {value}")
128
```
129
130
### Moderations API
131
132
Analyze content for policy violations and safety concerns using OpenAI's moderation models.
133
134
```python { .api }
135
def create(
136
self,
137
*,
138
input: Union[str, List[str], List[ModerationMultiModalInputParam]],
139
model: Union[str, ModerationModel] | NotGiven = NOT_GIVEN,
140
extra_headers: Headers | None = None,
141
extra_query: Query | None = None,
142
extra_body: Body | None = None,
143
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
144
) -> ModerationCreateResponse: ...
145
```
146
147
Usage examples:
148
149
```python
150
# Basic content moderation
151
text_to_check = "This is a sample text to check for policy violations."
152
153
moderation_result = client.moderations.create(
154
input=text_to_check
155
)
156
157
result = moderation_result.results[0]
158
159
print(f"Flagged: {result.flagged}")
160
print(f"Categories: {result.categories}")
161
print(f"Category scores: {result.category_scores}")
162
163
# Check multiple texts
164
texts_to_check = [
165
"Hello, how are you today?",
166
"This is normal conversation.",
167
"I love programming and AI technology."
168
]
169
170
batch_moderation = client.moderations.create(
171
input=texts_to_check
172
)
173
174
for i, result in enumerate(batch_moderation.results):
175
text = texts_to_check[i]
176
flagged = result.flagged
177
178
print(f"Text {i+1}: {'⚠️ FLAGGED' if flagged else '✅ CLEAN'}")
179
print(f" Content: {text[:50]}...")
180
181
if flagged:
182
# Show which categories were flagged
183
flagged_categories = [cat for cat, flagged in result.categories.model_dump().items() if flagged]
184
print(f" Flagged for: {', '.join(flagged_categories)}")
185
186
# Advanced moderation with multimodal input
187
multimodal_input = [
188
{
189
"type": "text",
190
"text": "Please review this content for safety"
191
},
192
{
193
"type": "image_url",
194
"image_url": {"url": "https://example.com/image.jpg"}
195
}
196
]
197
198
# Note: Multimodal moderation may require specific model
199
multimodal_result = client.moderations.create(
200
input=multimodal_input,
201
model="omni-moderation-latest"
202
)
203
204
# Content filtering function
205
def content_filter(text: str, threshold: float = 0.5):
206
"""Filter content based on moderation scores"""
207
208
moderation = client.moderations.create(input=text)
209
result = moderation.results[0]
210
211
if result.flagged:
212
return {
213
"allowed": False,
214
"reason": "Content flagged by moderation",
215
"categories": [cat for cat, flagged in result.categories.model_dump().items() if flagged]
216
}
217
218
# Check individual category scores against threshold
219
high_risk_categories = []
220
for category, score in result.category_scores.model_dump().items():
221
if score > threshold:
222
high_risk_categories.append(category)
223
224
if high_risk_categories:
225
return {
226
"allowed": False,
227
"reason": f"High risk scores (>{threshold})",
228
"categories": high_risk_categories
229
}
230
231
return {"allowed": True, "reason": "Content passed moderation"}
232
233
# Test content filter
234
test_content = "This is educational content about AI safety."
235
filter_result = content_filter(test_content)
236
237
print(f"Content filter result: {filter_result}")
238
239
# Batch content moderation for user-generated content
240
def moderate_user_content(contents: list):
241
"""Moderate multiple pieces of user content"""
242
243
# Process in batches to handle API limits
244
batch_size = 20
245
all_results = []
246
247
for i in range(0, len(contents), batch_size):
248
batch = contents[i:i + batch_size]
249
250
moderation = client.moderations.create(input=batch)
251
252
for j, result in enumerate(moderation.results):
253
content_idx = i + j
254
all_results.append({
255
"content_id": content_idx,
256
"content": batch[j][:100] + "..." if len(batch[j]) > 100 else batch[j],
257
"flagged": result.flagged,
258
"categories": result.categories.model_dump(),
259
"scores": result.category_scores.model_dump()
260
})
261
262
return all_results
263
264
# Example user content moderation
265
user_posts = [
266
"Just had an amazing coffee this morning!",
267
"Check out this cool AI project I'm working on.",
268
"Programming is such a fun hobby.",
269
] * 10 # 30 posts
270
271
moderation_results = moderate_user_content(user_posts)
272
273
flagged_count = sum(1 for r in moderation_results if r["flagged"])
274
print(f"Moderated {len(moderation_results)} posts, {flagged_count} flagged")
275
```
276
277
### Vector Stores API
278
279
Create and manage vector stores for efficient similarity search and retrieval operations.
280
281
```python { .api }
282
def create(
283
self,
284
*,
285
file_ids: List[str] | NotGiven = NOT_GIVEN,
286
name: str | NotGiven = NOT_GIVEN,
287
expires_after: VectorStoreExpiresAfter | NotGiven = NOT_GIVEN,
288
chunking_strategy: ChunkingStrategyParam | NotGiven = NOT_GIVEN,
289
metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN
290
) -> VectorStore: ...
291
292
def list(
293
self,
294
*,
295
after: str | NotGiven = NOT_GIVEN,
296
before: str | NotGiven = NOT_GIVEN,
297
limit: int | NotGiven = NOT_GIVEN,
298
order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN
299
) -> SyncCursorPage[VectorStore]: ...
300
301
def retrieve(
302
self,
303
vector_store_id: str
304
) -> VectorStore: ...
305
306
def update(
307
self,
308
vector_store_id: str,
309
*,
310
name: str | NotGiven = NOT_GIVEN,
311
expires_after: VectorStoreExpiresAfter | NotGiven = NOT_GIVEN,
312
metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN
313
) -> VectorStore: ...
314
315
def delete(
316
self,
317
vector_store_id: str
318
) -> VectorStoreDeleted: ...
319
```
320
321
Usage examples:
322
323
```python
324
# Upload files for vector store
325
documents = ["doc1.txt", "doc2.pdf", "doc3.md"]
326
file_ids = []
327
328
for doc in documents:
329
with open(doc, "rb") as f:
330
file_obj = client.files.create(
331
file=f,
332
purpose="assistants"
333
)
334
file_ids.append(file_obj.id)
335
336
# Create vector store
337
vector_store = client.beta.vector_stores.create(
338
name="Knowledge Base",
339
file_ids=file_ids,
340
expires_after={
341
"anchor": "last_active_at",
342
"days": 30
343
},
344
metadata={"project": "documentation", "version": "1.0"}
345
)
346
347
print(f"Created vector store: {vector_store.id}")
348
print(f"Status: {vector_store.status}")
349
print(f"File counts: {vector_store.file_counts}")
350
351
# List vector stores
352
vector_stores = client.beta.vector_stores.list()
353
354
print("Your vector stores:")
355
for vs in vector_stores:
356
print(f" {vs.id}: {vs.name} ({vs.file_counts.total} files)")
357
358
# Update vector store
359
updated_store = client.beta.vector_stores.update(
360
vector_store.id,
361
name="Updated Knowledge Base",
362
metadata={"project": "documentation", "version": "2.0", "updated": "true"}
363
)
364
365
# Vector store file management
366
# Add files to existing vector store
367
additional_files = ["doc4.txt", "doc5.pdf"]
368
369
for doc in additional_files:
370
with open(doc, "rb") as f:
371
file_obj = client.files.create(file=f, purpose="assistants")
372
373
# Add to vector store
374
vs_file = client.beta.vector_stores.files.create(
375
vector_store_id=vector_store.id,
376
file_id=file_obj.id
377
)
378
379
print(f"Added file {file_obj.id} to vector store")
380
381
# List files in vector store
382
vs_files = client.beta.vector_stores.files.list(vector_store.id)
383
384
print(f"Files in vector store {vector_store.id}:")
385
for vs_file in vs_files:
386
print(f" {vs_file.id}: {vs_file.status}")
387
388
# Delete file from vector store
389
if vs_files.data:
390
file_to_remove = vs_files.data[0].id
391
392
deletion_result = client.beta.vector_stores.files.delete(
393
vector_store_id=vector_store.id,
394
file_id=file_to_remove
395
)
396
397
print(f"Removed file {file_to_remove}: {deletion_result.deleted}")
398
399
# Batch file operations
400
batch_files = ["batch1.txt", "batch2.txt", "batch3.txt"]
401
batch_file_ids = []
402
403
for doc in batch_files:
404
with open(doc, "rb") as f:
405
file_obj = client.files.create(file=f, purpose="assistants")
406
batch_file_ids.append(file_obj.id)
407
408
# Create file batch for vector store
409
file_batch = client.beta.vector_stores.file_batches.create(
410
vector_store_id=vector_store.id,
411
file_ids=batch_file_ids
412
)
413
414
print(f"Created file batch: {file_batch.id}")
415
print(f"Status: {file_batch.status}")
416
417
# Monitor batch progress
418
import time
419
420
def monitor_file_batch(vector_store_id: str, batch_id: str):
421
"""Monitor file batch processing"""
422
423
while True:
424
batch = client.beta.vector_stores.file_batches.retrieve(
425
vector_store_id=vector_store_id,
426
batch_id=batch_id
427
)
428
429
print(f"Batch status: {batch.status}")
430
print(f"File counts: {batch.file_counts}")
431
432
if batch.status in ["completed", "failed", "cancelled"]:
433
return batch
434
435
time.sleep(5)
436
437
# Monitor the batch
438
# final_batch = monitor_file_batch(vector_store.id, file_batch.id)
439
440
# Delete vector store
441
# deletion_result = client.beta.vector_stores.delete(vector_store.id)
442
# print(f"Vector store deleted: {deletion_result.deleted}")
443
```
444
445
### Webhooks API
446
447
Set up and manage webhooks for real-time notifications of API events and job completions.
448
449
```python { .api }
450
# Webhook verification utilities
451
def verify_webhook_signature(
452
payload: bytes,
453
signature: str,
454
secret: str
455
) -> bool: ...
456
457
def construct_webhook_payload(
458
timestamp: int,
459
payload: str,
460
secret: str
461
) -> str: ...
462
```
463
464
Usage examples:
465
466
```python
467
import hmac
468
import hashlib
469
import time
470
471
# Webhook signature verification
472
def verify_webhook_signature(payload: bytes, signature: str, secret: str) -> bool:
473
"""Verify webhook signature for security"""
474
475
# Extract timestamp and signature from header
476
elements = signature.split(',')
477
timestamp = None
478
sig = None
479
480
for element in elements:
481
if element.startswith('t='):
482
timestamp = element[2:]
483
elif element.startswith('v1='):
484
sig = element[3:]
485
486
if not timestamp or not sig:
487
return False
488
489
# Create expected signature
490
signed_payload = f"{timestamp}.{payload.decode()}"
491
expected_sig = hmac.new(
492
secret.encode(),
493
signed_payload.encode(),
494
hashlib.sha256
495
).hexdigest()
496
497
return hmac.compare_digest(sig, expected_sig)
498
499
# Webhook event handler example
500
class WebhookHandler:
501
"""Handle OpenAI webhook events"""
502
503
def __init__(self, webhook_secret: str):
504
self.secret = webhook_secret
505
self.handlers = {}
506
507
def register_handler(self, event_type: str, handler_func):
508
"""Register handler for specific event type"""
509
self.handlers[event_type] = handler_func
510
511
def handle_webhook(self, payload: bytes, signature: str):
512
"""Process incoming webhook"""
513
514
# Verify signature
515
if not verify_webhook_signature(payload, signature, self.secret):
516
raise ValueError("Invalid webhook signature")
517
518
# Parse event
519
import json
520
event_data = json.loads(payload)
521
522
event_type = event_data.get('type')
523
524
if event_type in self.handlers:
525
self.handlers[event_type](event_data)
526
else:
527
print(f"No handler for event type: {event_type}")
528
529
def handle_fine_tuning_job_completed(self, event):
530
"""Handle fine-tuning job completion"""
531
job_id = event['data']['id']
532
status = event['data']['status']
533
model = event['data'].get('fine_tuned_model')
534
535
print(f"Fine-tuning job {job_id} completed with status: {status}")
536
if model:
537
print(f"New model available: {model}")
538
539
def handle_batch_completed(self, event):
540
"""Handle batch job completion"""
541
batch_id = event['data']['id']
542
status = event['data']['status']
543
544
print(f"Batch {batch_id} completed with status: {status}")
545
546
# Download results if successful
547
if status == 'completed':
548
output_file_id = event['data'].get('output_file_id')
549
if output_file_id:
550
print(f"Results available in file: {output_file_id}")
551
552
# Set up webhook handler
553
webhook_handler = WebhookHandler("your_webhook_secret")
554
555
# Register event handlers
556
webhook_handler.register_handler(
557
"fine_tuning.job.completed",
558
webhook_handler.handle_fine_tuning_job_completed
559
)
560
561
webhook_handler.register_handler(
562
"batch.completed",
563
webhook_handler.handle_batch_completed
564
)
565
566
# Example Flask webhook endpoint
567
"""
568
from flask import Flask, request
569
570
app = Flask(__name__)
571
572
@app.route('/webhooks/openai', methods=['POST'])
573
def handle_openai_webhook():
574
payload = request.get_data()
575
signature = request.headers.get('OpenAI-Signature')
576
577
try:
578
webhook_handler.handle_webhook(payload, signature)
579
return {'status': 'success'}, 200
580
except ValueError as e:
581
return {'error': str(e)}, 400
582
except Exception as e:
583
return {'error': 'Internal server error'}, 500
584
585
if __name__ == '__main__':
586
app.run(port=8000)
587
"""
588
589
# Webhook testing utility
590
def create_test_webhook_payload(event_type: str, data: dict, secret: str):
591
"""Create test webhook payload for testing"""
592
593
import json
594
595
timestamp = int(time.time())
596
597
event = {
598
"type": event_type,
599
"data": data,
600
"created_at": timestamp
601
}
602
603
payload = json.dumps(event)
604
605
# Create signature
606
signed_payload = f"{timestamp}.{payload}"
607
signature = hmac.new(
608
secret.encode(),
609
signed_payload.encode(),
610
hashlib.sha256
611
).hexdigest()
612
613
webhook_signature = f"t={timestamp},v1={signature}"
614
615
return payload.encode(), webhook_signature
616
617
# Test webhook handling
618
test_event_data = {
619
"id": "ftjob-test123",
620
"status": "succeeded",
621
"fine_tuned_model": "ft:gpt-3.5-turbo-0125:org:model:abc123"
622
}
623
624
test_payload, test_signature = create_test_webhook_payload(
625
"fine_tuning.job.completed",
626
test_event_data,
627
"your_webhook_secret"
628
)
629
630
# Process test webhook
631
webhook_handler.handle_webhook(test_payload, test_signature)
632
```
633
634
### Upload Management
635
636
Handle large file uploads with chunked upload support for efficient data transfer.
637
638
```python { .api }
639
def create(
640
self,
641
*,
642
bytes: int,
643
filename: str,
644
mime_type: str,
645
purpose: Literal["assistants", "batch", "fine-tune", "vision"]
646
) -> Upload: ...
647
648
def cancel(
649
self,
650
upload_id: str
651
) -> Upload: ...
652
653
def complete(
654
self,
655
upload_id: str,
656
*,
657
part_ids: List[str],
658
md5: str | NotGiven = NOT_GIVEN
659
) -> Upload: ...
660
```
661
662
Usage examples:
663
664
```python
665
import os
666
import hashlib
667
668
# Large file upload with chunking
669
def upload_large_file(file_path: str, purpose: str, chunk_size: int = 8 * 1024 * 1024):
670
"""Upload large file using chunked upload"""
671
672
file_size = os.path.getsize(file_path)
673
filename = os.path.basename(file_path)
674
675
print(f"Uploading {filename} ({file_size} bytes)")
676
677
# Create upload
678
upload = client.uploads.create(
679
bytes=file_size,
680
filename=filename,
681
mime_type="application/octet-stream",
682
purpose=purpose
683
)
684
685
print(f"Created upload: {upload.id}")
686
687
# Upload parts
688
part_ids = []
689
690
with open(file_path, 'rb') as f:
691
part_number = 0
692
693
while True:
694
chunk = f.read(chunk_size)
695
if not chunk:
696
break
697
698
part_number += 1
699
700
# Upload part
701
part = client.uploads.parts.create(
702
upload_id=upload.id,
703
data=chunk
704
)
705
706
part_ids.append(part.id)
707
708
progress = (part_number * chunk_size) / file_size * 100
709
print(f"Uploaded part {part_number}: {min(progress, 100):.1f}%")
710
711
# Calculate MD5 hash
712
with open(file_path, 'rb') as f:
713
file_hash = hashlib.md5(f.read()).hexdigest()
714
715
# Complete upload
716
completed_upload = client.uploads.complete(
717
upload_id=upload.id,
718
part_ids=part_ids,
719
md5=file_hash
720
)
721
722
print(f"Upload completed: {completed_upload.file.id}")
723
return completed_upload.file
724
725
# Example large file upload
726
# large_file = upload_large_file("large_dataset.jsonl", "fine-tune")
727
728
# Upload with error handling
729
def robust_file_upload(file_path: str, purpose: str, max_retries: int = 3):
730
"""Upload file with retry logic"""
731
732
for attempt in range(max_retries):
733
try:
734
if os.path.getsize(file_path) > 100 * 1024 * 1024: # > 100MB
735
# Use chunked upload
736
return upload_large_file(file_path, purpose)
737
else:
738
# Use regular upload
739
with open(file_path, "rb") as f:
740
return client.files.create(file=f, purpose=purpose)
741
742
except Exception as e:
743
print(f"Upload attempt {attempt + 1} failed: {e}")
744
745
if attempt == max_retries - 1:
746
raise
747
748
time.sleep(2 ** attempt) # Exponential backoff
749
750
# Upload with progress tracking
751
class UploadProgressTracker:
752
"""Track upload progress for multiple files"""
753
754
def __init__(self):
755
self.uploads = {}
756
757
def track_upload(self, file_path: str, purpose: str):
758
"""Track file upload with progress"""
759
760
filename = os.path.basename(file_path)
761
file_size = os.path.getsize(file_path)
762
763
self.uploads[filename] = {
764
"size": file_size,
765
"uploaded": 0,
766
"status": "starting"
767
}
768
769
try:
770
if file_size > 100 * 1024 * 1024: # Large file
771
file_obj = self._upload_large_with_progress(file_path, purpose)
772
else:
773
file_obj = self._upload_regular_with_progress(file_path, purpose)
774
775
self.uploads[filename]["status"] = "completed"
776
self.uploads[filename]["file_id"] = file_obj.id
777
778
return file_obj
779
780
except Exception as e:
781
self.uploads[filename]["status"] = "failed"
782
self.uploads[filename]["error"] = str(e)
783
raise
784
785
def _upload_regular_with_progress(self, file_path: str, purpose: str):
786
"""Upload regular file with progress tracking"""
787
788
filename = os.path.basename(file_path)
789
790
with open(file_path, "rb") as f:
791
self.uploads[filename]["status"] = "uploading"
792
793
file_obj = client.files.create(file=f, purpose=purpose)
794
795
self.uploads[filename]["uploaded"] = self.uploads[filename]["size"]
796
797
return file_obj
798
799
def _upload_large_with_progress(self, file_path: str, purpose: str):
800
"""Upload large file with detailed progress tracking"""
801
802
# Implementation would track chunk-by-chunk progress
803
# Similar to upload_large_file but with progress updates
804
return upload_large_file(file_path, purpose)
805
806
def get_progress(self):
807
"""Get upload progress summary"""
808
809
total_files = len(self.uploads)
810
completed_files = sum(1 for u in self.uploads.values() if u["status"] == "completed")
811
failed_files = sum(1 for u in self.uploads.values() if u["status"] == "failed")
812
813
return {
814
"total": total_files,
815
"completed": completed_files,
816
"failed": failed_files,
817
"in_progress": total_files - completed_files - failed_files
818
}
819
820
# Example usage
821
tracker = UploadProgressTracker()
822
823
files_to_upload = ["dataset1.jsonl", "dataset2.jsonl", "dataset3.jsonl"]
824
825
for file_path in files_to_upload:
826
try:
827
file_obj = tracker.track_upload(file_path, "fine-tune")
828
print(f"✅ Uploaded {file_path}: {file_obj.id}")
829
except Exception as e:
830
print(f"❌ Failed to upload {file_path}: {e}")
831
832
progress = tracker.get_progress()
833
print(f"Upload summary: {progress}")
834
```
835
836
## Types
837
838
### Models API Types
839
840
```python { .api }
841
class Model(BaseModel):
842
id: str
843
created: int
844
object: Literal["model"]
845
owned_by: str
846
847
class ModelDeleted(BaseModel):
848
id: str
849
deleted: bool
850
object: Literal["model"]
851
```
852
853
### Moderations API Types
854
855
```python { .api }
856
class ModerationCreateResponse(BaseModel):
857
id: str
858
model: str
859
results: List[ModerationResult]
860
861
class ModerationResult(BaseModel):
862
categories: ModerationCategories
863
category_scores: ModerationCategoryScores
864
flagged: bool
865
866
class ModerationCategories(BaseModel):
867
harassment: bool
868
harassment_threatening: bool
869
hate: bool
870
hate_threatening: bool
871
illicit: bool
872
illicit_violent: bool
873
self_harm: bool
874
self_harm_instructions: bool
875
self_harm_intent: bool
876
sexual: bool
877
sexual_minors: bool
878
violence: bool
879
violence_graphic: bool
880
881
class ModerationCategoryScores(BaseModel):
882
harassment: float
883
harassment_threatening: float
884
hate: float
885
hate_threatening: float
886
illicit: float
887
illicit_violent: float
888
self_harm: float
889
self_harm_instructions: float
890
self_harm_intent: float
891
sexual: float
892
sexual_minors: float
893
violence: float
894
violence_graphic: float
895
896
ModerationModel = Literal[
897
"omni-moderation-latest",
898
"omni-moderation-2024-09-26",
899
"text-moderation-latest",
900
"text-moderation-stable"
901
]
902
903
# Multimodal input types
904
ModerationMultiModalInputParam = Union[
905
ModerationImageURLInputParam,
906
ModerationTextInputParam
907
]
908
909
class ModerationTextInputParam(TypedDict, total=False):
910
type: Required[Literal["text"]]
911
text: Required[str]
912
913
class ModerationImageURLInputParam(TypedDict, total=False):
914
type: Required[Literal["image_url"]]
915
image_url: Required[ModerationImageURL]
916
```
917
918
### Vector Stores Types
919
920
```python { .api }
921
class VectorStore(BaseModel):
922
id: str
923
created_at: int
924
file_counts: VectorStoreFileCounts
925
last_active_at: Optional[int]
926
metadata: Optional[Dict[str, str]]
927
name: str
928
object: Literal["vector_store"]
929
status: Literal["expired", "in_progress", "completed"]
930
usage_bytes: int
931
expires_after: Optional[VectorStoreExpiresAfter]
932
expires_at: Optional[int]
933
934
class VectorStoreFileCounts(BaseModel):
935
cancelled: int
936
completed: int
937
failed: int
938
in_progress: int
939
total: int
940
941
class VectorStoreExpiresAfter(BaseModel):
942
anchor: Literal["last_active_at"]
943
days: int
944
945
class VectorStoreDeleted(BaseModel):
946
id: str
947
deleted: bool
948
object: Literal["vector_store.deleted"]
949
950
# Chunking strategy types
951
ChunkingStrategyParam = Union[
952
AutoChunkingStrategyParam,
953
StaticChunkingStrategyParam
954
]
955
956
class AutoChunkingStrategyParam(TypedDict, total=False):
957
type: Required[Literal["auto"]]
958
959
class StaticChunkingStrategyParam(TypedDict, total=False):
960
type: Required[Literal["static"]]
961
static: Required[StaticChunkingStrategy]
962
963
class StaticChunkingStrategy(TypedDict, total=False):
964
max_chunk_size_tokens: Required[int]
965
chunk_overlap_tokens: Required[int]
966
```
967
968
### Upload Types
969
970
```python { .api }
971
class Upload(BaseModel):
972
id: str
973
bytes: int
974
created_at: int
975
expires_at: int
976
filename: str
977
object: Literal["upload"]
978
purpose: str
979
status: Literal["pending", "completed", "cancelled", "expired"]
980
file: Optional[FileObject]
981
982
class UploadPart(BaseModel):
983
id: str
984
created_at: int
985
object: Literal["upload.part"]
986
upload_id: str
987
988
# Upload parameters
989
class UploadCreateParams(TypedDict, total=False):
990
bytes: Required[int]
991
filename: Required[str]
992
mime_type: Required[str]
993
purpose: Required[Literal["assistants", "batch", "fine-tune", "vision"]]
994
995
class UploadCompleteParams(TypedDict, total=False):
996
part_ids: Required[List[str]]
997
md5: NotRequired[str]
998
```
999
1000
## Best Practices
1001
1002
### Models Management
1003
1004
- Regularly check for new model releases and capabilities
1005
- Monitor fine-tuned model performance and update as needed
1006
- Keep track of model deprecation schedules
1007
- Use appropriate models for specific tasks to optimize costs
1008
1009
### Content Moderation
1010
1011
- Implement moderation for all user-generated content
1012
- Use appropriate moderation models for your content type
1013
- Set reasonable thresholds for different risk categories
1014
- Regularly review and update moderation policies
1015
1016
### Vector Stores
1017
1018
- Organize documents logically within vector stores
1019
- Use meaningful names and metadata for easy management
1020
- Monitor storage usage and implement cleanup procedures
1021
- Keep vector stores updated when source documents change
1022
1023
### Webhooks
1024
1025
- Always verify webhook signatures for security
1026
- Implement proper error handling for webhook events
1027
- Use webhooks to trigger automated workflows
1028
- Monitor webhook delivery and implement retry logic for failures
1029
1030
### File Uploads
1031
1032
- Use chunked uploads for large files (>100MB)
1033
- Implement progress tracking for better user experience
1034
- Add retry logic for upload failures
1035
- Validate file integrity with checksums when possible