pypi-openai

Description: Official Python library for the OpenAI API providing chat completions, embeddings, audio, images, and more

Author: tessl

Last updated: 21 days ago

How to use

npx @tessl/cli registry install tessl/pypi-openai@1.106.0

Provide feedback Docs

other-apis.md docs/

1
# Other APIs
2

3
Additional functionality including models management, content moderation, vector stores, webhooks, and experimental features.
4

5
## Capabilities
6

7
### Models API
8

9
List, retrieve, and manage available OpenAI models including base models and fine-tuned variants.
10

11
```python { .api }
12
def list(
13
    self,
14
    *,
15
    extra_headers: Headers | None = None,
16
    extra_query: Query | None = None,
17
    extra_body: Body | None = None,
18
    timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
19
) -> SyncPage[Model]: ...
20

21
def retrieve(
22
    self,
23
    model: str,
24
    *,
25
    extra_headers: Headers | None = None,
26
    extra_query: Query | None = None,
27
    extra_body: Body | None = None,
28
    timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
29
) -> Model: ...
30

31
def delete(
32
    self,
33
    model: str,
34
    *,
35
    extra_headers: Headers | None = None,
36
    extra_query: Query | None = None,
37
    extra_body: Body | None = None,
38
    timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
39
) -> ModelDeleted: ...
40
```
41

42
Usage examples:
43

44
```python
45
from openai import OpenAI
46

47
client = OpenAI()
48

49
# List all available models
50
models = client.models.list()
51

52
print("Available models:")
53
for model in models:
54
    print(f"  {model.id}: owned by {model.owned_by}")
55

56
# Filter models by type
57
base_models = []
58
fine_tuned_models = []
59

60
for model in models:
61
    if "ft:" in model.id:
62
        fine_tuned_models.append(model)
63
    else:
64
        base_models.append(model)
65

66
print(f"\nBase models: {len(base_models)}")
67
print(f"Fine-tuned models: {len(fine_tuned_models)}")
68

69
# Get specific model details
70
model_info = client.models.retrieve("gpt-3.5-turbo")
71

72
print(f"\nModel details for gpt-3.5-turbo:")
73
print(f"  ID: {model_info.id}")
74
print(f"  Created: {model_info.created}")
75
print(f"  Owned by: {model_info.owned_by}")
76

77
# List fine-tuned models only
78
print("\nYour fine-tuned models:")
79
for model in fine_tuned_models:
80
    print(f"  {model.id} (created: {model.created})")
81

82
# Delete fine-tuned model (if needed)
83
# Note: Only fine-tuned models can be deleted
84
# if fine_tuned_models:
85
#     model_to_delete = fine_tuned_models[0].id
86
#     deletion_result = client.models.delete(model_to_delete)
87
#     print(f"Deleted model: {deletion_result.deleted}")
88

89
# Model capabilities lookup
90
def get_model_capabilities(model_id: str):
91
    """Get model capabilities and specifications"""
92
    
93
    capabilities = {
94
        # Chat models
95
        "gpt-4": {"type": "chat", "context": 8192, "training_data": "Sep 2021"},
96
        "gpt-4-turbo": {"type": "chat", "context": 128000, "training_data": "Dec 2023"},
97
        "gpt-4o": {"type": "chat", "context": 128000, "training_data": "Oct 2023"},
98
        "gpt-3.5-turbo": {"type": "chat", "context": 16385, "training_data": "Sep 2021"},
99
        
100
        # Embedding models
101
        "text-embedding-3-small": {"type": "embedding", "dimensions": 1536, "max_input": 8191},
102
        "text-embedding-3-large": {"type": "embedding", "dimensions": 3072, "max_input": 8191},
103
        "text-embedding-ada-002": {"type": "embedding", "dimensions": 1536, "max_input": 8191},
104
        
105
        # Image models
106
        "dall-e-3": {"type": "image", "max_size": "1792x1024", "styles": ["vivid", "natural"]},
107
        "dall-e-2": {"type": "image", "max_size": "1024x1024", "variations": True},
108
        
109
        # Audio models
110
        "whisper-1": {"type": "audio", "capabilities": ["transcription", "translation"]},
111
        "tts-1": {"type": "tts", "voices": 6, "formats": ["mp3", "opus", "aac", "flac"]},
112
        "tts-1-hd": {"type": "tts", "voices": 6, "formats": ["mp3", "opus", "aac", "flac"], "quality": "hd"},
113
        
114
        # Legacy models
115
        "gpt-3.5-turbo-instruct": {"type": "completion", "context": 4097, "training_data": "Sep 2021"}
116
    }
117
    
118
    return capabilities.get(model_id, {"type": "unknown"})
119

120
# Check model capabilities
121
test_models = ["gpt-4", "text-embedding-3-small", "dall-e-3"]
122

123
for model_id in test_models:
124
    caps = get_model_capabilities(model_id)
125
    print(f"\n{model_id} capabilities:")
126
    for key, value in caps.items():
127
        print(f"  {key}: {value}")
128
```
129

130
### Moderations API
131

132
Analyze content for policy violations and safety concerns using OpenAI's moderation models.
133

134
```python { .api }
135
def create(
136
    self,
137
    *,
138
    input: Union[str, List[str], List[ModerationMultiModalInputParam]],
139
    model: Union[str, ModerationModel] | NotGiven = NOT_GIVEN,
140
    extra_headers: Headers | None = None,
141
    extra_query: Query | None = None,
142
    extra_body: Body | None = None,
143
    timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
144
) -> ModerationCreateResponse: ...
145
```
146

147
Usage examples:
148

149
```python
150
# Basic content moderation
151
text_to_check = "This is a sample text to check for policy violations."
152

153
moderation_result = client.moderations.create(
154
    input=text_to_check
155
)
156

157
result = moderation_result.results[0]
158

159
print(f"Flagged: {result.flagged}")
160
print(f"Categories: {result.categories}")
161
print(f"Category scores: {result.category_scores}")
162

163
# Check multiple texts
164
texts_to_check = [
165
    "Hello, how are you today?",
166
    "This is normal conversation.",
167
    "I love programming and AI technology."
168
]
169

170
batch_moderation = client.moderations.create(
171
    input=texts_to_check
172
)
173

174
for i, result in enumerate(batch_moderation.results):
175
    text = texts_to_check[i]
176
    flagged = result.flagged
177
    
178
    print(f"Text {i+1}: {'⚠️ FLAGGED' if flagged else '✅ CLEAN'}")
179
    print(f"  Content: {text[:50]}...")
180
    
181
    if flagged:
182
        # Show which categories were flagged
183
        flagged_categories = [cat for cat, flagged in result.categories.model_dump().items() if flagged]
184
        print(f"  Flagged for: {', '.join(flagged_categories)}")
185

186
# Advanced moderation with multimodal input
187
multimodal_input = [
188
    {
189
        "type": "text",
190
        "text": "Please review this content for safety"
191
    },
192
    {
193
        "type": "image_url",
194
        "image_url": {"url": "https://example.com/image.jpg"}
195
    }
196
]
197

198
# Note: Multimodal moderation may require specific model
199
multimodal_result = client.moderations.create(
200
    input=multimodal_input,
201
    model="omni-moderation-latest"
202
)
203

204
# Content filtering function
205
def content_filter(text: str, threshold: float = 0.5):
206
    """Filter content based on moderation scores"""
207
    
208
    moderation = client.moderations.create(input=text)
209
    result = moderation.results[0]
210
    
211
    if result.flagged:
212
        return {
213
            "allowed": False,
214
            "reason": "Content flagged by moderation",
215
            "categories": [cat for cat, flagged in result.categories.model_dump().items() if flagged]
216
        }
217
    
218
    # Check individual category scores against threshold
219
    high_risk_categories = []
220
    for category, score in result.category_scores.model_dump().items():
221
        if score > threshold:
222
            high_risk_categories.append(category)
223
    
224
    if high_risk_categories:
225
        return {
226
            "allowed": False,
227
            "reason": f"High risk scores (>{threshold})",
228
            "categories": high_risk_categories
229
        }
230
    
231
    return {"allowed": True, "reason": "Content passed moderation"}
232

233
# Test content filter
234
test_content = "This is educational content about AI safety."
235
filter_result = content_filter(test_content)
236

237
print(f"Content filter result: {filter_result}")
238

239
# Batch content moderation for user-generated content
240
def moderate_user_content(contents: list):
241
    """Moderate multiple pieces of user content"""
242
    
243
    # Process in batches to handle API limits
244
    batch_size = 20
245
    all_results = []
246
    
247
    for i in range(0, len(contents), batch_size):
248
        batch = contents[i:i + batch_size]
249
        
250
        moderation = client.moderations.create(input=batch)
251
        
252
        for j, result in enumerate(moderation.results):
253
            content_idx = i + j
254
            all_results.append({
255
                "content_id": content_idx,
256
                "content": batch[j][:100] + "..." if len(batch[j]) > 100 else batch[j],
257
                "flagged": result.flagged,
258
                "categories": result.categories.model_dump(),
259
                "scores": result.category_scores.model_dump()
260
            })
261
    
262
    return all_results
263

264
# Example user content moderation
265
user_posts = [
266
    "Just had an amazing coffee this morning!",
267
    "Check out this cool AI project I'm working on.",
268
    "Programming is such a fun hobby.",
269
] * 10  # 30 posts
270

271
moderation_results = moderate_user_content(user_posts)
272

273
flagged_count = sum(1 for r in moderation_results if r["flagged"])
274
print(f"Moderated {len(moderation_results)} posts, {flagged_count} flagged")
275
```
276

277
### Vector Stores API
278

279
Create and manage vector stores for efficient similarity search and retrieval operations.
280

281
```python { .api }
282
def create(
283
    self,
284
    *,
285
    file_ids: List[str] | NotGiven = NOT_GIVEN,
286
    name: str | NotGiven = NOT_GIVEN,
287
    expires_after: VectorStoreExpiresAfter | NotGiven = NOT_GIVEN,
288
    chunking_strategy: ChunkingStrategyParam | NotGiven = NOT_GIVEN,
289
    metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN
290
) -> VectorStore: ...
291

292
def list(
293
    self,
294
    *,
295
    after: str | NotGiven = NOT_GIVEN,
296
    before: str | NotGiven = NOT_GIVEN,
297
    limit: int | NotGiven = NOT_GIVEN,
298
    order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN
299
) -> SyncCursorPage[VectorStore]: ...
300

301
def retrieve(
302
    self,
303
    vector_store_id: str
304
) -> VectorStore: ...
305

306
def update(
307
    self,
308
    vector_store_id: str,
309
    *,
310
    name: str | NotGiven = NOT_GIVEN,
311
    expires_after: VectorStoreExpiresAfter | NotGiven = NOT_GIVEN,
312
    metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN
313
) -> VectorStore: ...
314

315
def delete(
316
    self,
317
    vector_store_id: str
318
) -> VectorStoreDeleted: ...
319
```
320

321
Usage examples:
322

323
```python
324
# Upload files for vector store
325
documents = ["doc1.txt", "doc2.pdf", "doc3.md"]
326
file_ids = []
327

328
for doc in documents:
329
    with open(doc, "rb") as f:
330
        file_obj = client.files.create(
331
            file=f,
332
            purpose="assistants"
333
        )
334
    file_ids.append(file_obj.id)
335

336
# Create vector store
337
vector_store = client.beta.vector_stores.create(
338
    name="Knowledge Base",
339
    file_ids=file_ids,
340
    expires_after={
341
        "anchor": "last_active_at",
342
        "days": 30
343
    },
344
    metadata={"project": "documentation", "version": "1.0"}
345
)
346

347
print(f"Created vector store: {vector_store.id}")
348
print(f"Status: {vector_store.status}")
349
print(f"File counts: {vector_store.file_counts}")
350

351
# List vector stores
352
vector_stores = client.beta.vector_stores.list()
353

354
print("Your vector stores:")
355
for vs in vector_stores:
356
    print(f"  {vs.id}: {vs.name} ({vs.file_counts.total} files)")
357

358
# Update vector store
359
updated_store = client.beta.vector_stores.update(
360
    vector_store.id,
361
    name="Updated Knowledge Base",
362
    metadata={"project": "documentation", "version": "2.0", "updated": "true"}
363
)
364

365
# Vector store file management
366
# Add files to existing vector store
367
additional_files = ["doc4.txt", "doc5.pdf"]
368

369
for doc in additional_files:
370
    with open(doc, "rb") as f:
371
        file_obj = client.files.create(file=f, purpose="assistants")
372
    
373
    # Add to vector store
374
    vs_file = client.beta.vector_stores.files.create(
375
        vector_store_id=vector_store.id,
376
        file_id=file_obj.id
377
    )
378
    
379
    print(f"Added file {file_obj.id} to vector store")
380

381
# List files in vector store
382
vs_files = client.beta.vector_stores.files.list(vector_store.id)
383

384
print(f"Files in vector store {vector_store.id}:")
385
for vs_file in vs_files:
386
    print(f"  {vs_file.id}: {vs_file.status}")
387

388
# Delete file from vector store
389
if vs_files.data:
390
    file_to_remove = vs_files.data[0].id
391
    
392
    deletion_result = client.beta.vector_stores.files.delete(
393
        vector_store_id=vector_store.id,
394
        file_id=file_to_remove
395
    )
396
    
397
    print(f"Removed file {file_to_remove}: {deletion_result.deleted}")
398

399
# Batch file operations
400
batch_files = ["batch1.txt", "batch2.txt", "batch3.txt"]
401
batch_file_ids = []
402

403
for doc in batch_files:
404
    with open(doc, "rb") as f:
405
        file_obj = client.files.create(file=f, purpose="assistants")
406
    batch_file_ids.append(file_obj.id)
407

408
# Create file batch for vector store
409
file_batch = client.beta.vector_stores.file_batches.create(
410
    vector_store_id=vector_store.id,
411
    file_ids=batch_file_ids
412
)
413

414
print(f"Created file batch: {file_batch.id}")
415
print(f"Status: {file_batch.status}")
416

417
# Monitor batch progress
418
import time
419

420
def monitor_file_batch(vector_store_id: str, batch_id: str):
421
    """Monitor file batch processing"""
422
    
423
    while True:
424
        batch = client.beta.vector_stores.file_batches.retrieve(
425
            vector_store_id=vector_store_id,
426
            batch_id=batch_id
427
        )
428
        
429
        print(f"Batch status: {batch.status}")
430
        print(f"File counts: {batch.file_counts}")
431
        
432
        if batch.status in ["completed", "failed", "cancelled"]:
433
            return batch
434
        
435
        time.sleep(5)
436

437
# Monitor the batch
438
# final_batch = monitor_file_batch(vector_store.id, file_batch.id)
439

440
# Delete vector store
441
# deletion_result = client.beta.vector_stores.delete(vector_store.id)
442
# print(f"Vector store deleted: {deletion_result.deleted}")
443
```
444

445
### Webhooks API
446

447
Set up and manage webhooks for real-time notifications of API events and job completions.
448

449
```python { .api }
450
# Webhook verification utilities
451
def verify_webhook_signature(
452
    payload: bytes,
453
    signature: str,
454
    secret: str
455
) -> bool: ...
456

457
def construct_webhook_payload(
458
    timestamp: int,
459
    payload: str,
460
    secret: str
461
) -> str: ...
462
```
463

464
Usage examples:
465

466
```python
467
import hmac
468
import hashlib
469
import time
470

471
# Webhook signature verification
472
def verify_webhook_signature(payload: bytes, signature: str, secret: str) -> bool:
473
    """Verify webhook signature for security"""
474
    
475
    # Extract timestamp and signature from header
476
    elements = signature.split(',')
477
    timestamp = None
478
    sig = None
479
    
480
    for element in elements:
481
        if element.startswith('t='):
482
            timestamp = element[2:]
483
        elif element.startswith('v1='):
484
            sig = element[3:]
485
    
486
    if not timestamp or not sig:
487
        return False
488
    
489
    # Create expected signature
490
    signed_payload = f"{timestamp}.{payload.decode()}"
491
    expected_sig = hmac.new(
492
        secret.encode(),
493
        signed_payload.encode(),
494
        hashlib.sha256
495
    ).hexdigest()
496
    
497
    return hmac.compare_digest(sig, expected_sig)
498

499
# Webhook event handler example
500
class WebhookHandler:
501
    """Handle OpenAI webhook events"""
502
    
503
    def __init__(self, webhook_secret: str):
504
        self.secret = webhook_secret
505
        self.handlers = {}
506
    
507
    def register_handler(self, event_type: str, handler_func):
508
        """Register handler for specific event type"""
509
        self.handlers[event_type] = handler_func
510
    
511
    def handle_webhook(self, payload: bytes, signature: str):
512
        """Process incoming webhook"""
513
        
514
        # Verify signature
515
        if not verify_webhook_signature(payload, signature, self.secret):
516
            raise ValueError("Invalid webhook signature")
517
        
518
        # Parse event
519
        import json
520
        event_data = json.loads(payload)
521
        
522
        event_type = event_data.get('type')
523
        
524
        if event_type in self.handlers:
525
            self.handlers[event_type](event_data)
526
        else:
527
            print(f"No handler for event type: {event_type}")
528
    
529
    def handle_fine_tuning_job_completed(self, event):
530
        """Handle fine-tuning job completion"""
531
        job_id = event['data']['id']
532
        status = event['data']['status']
533
        model = event['data'].get('fine_tuned_model')
534
        
535
        print(f"Fine-tuning job {job_id} completed with status: {status}")
536
        if model:
537
            print(f"New model available: {model}")
538
    
539
    def handle_batch_completed(self, event):
540
        """Handle batch job completion"""
541
        batch_id = event['data']['id']
542
        status = event['data']['status']
543
        
544
        print(f"Batch {batch_id} completed with status: {status}")
545
        
546
        # Download results if successful
547
        if status == 'completed':
548
            output_file_id = event['data'].get('output_file_id')
549
            if output_file_id:
550
                print(f"Results available in file: {output_file_id}")
551

552
# Set up webhook handler
553
webhook_handler = WebhookHandler("your_webhook_secret")
554

555
# Register event handlers
556
webhook_handler.register_handler(
557
    "fine_tuning.job.completed",
558
    webhook_handler.handle_fine_tuning_job_completed
559
)
560

561
webhook_handler.register_handler(
562
    "batch.completed", 
563
    webhook_handler.handle_batch_completed
564
)
565

566
# Example Flask webhook endpoint
567
"""
568
from flask import Flask, request
569

570
app = Flask(__name__)
571

572
@app.route('/webhooks/openai', methods=['POST'])
573
def handle_openai_webhook():
574
    payload = request.get_data()
575
    signature = request.headers.get('OpenAI-Signature')
576
    
577
    try:
578
        webhook_handler.handle_webhook(payload, signature)
579
        return {'status': 'success'}, 200
580
    except ValueError as e:
581
        return {'error': str(e)}, 400
582
    except Exception as e:
583
        return {'error': 'Internal server error'}, 500
584

585
if __name__ == '__main__':
586
    app.run(port=8000)
587
"""
588

589
# Webhook testing utility
590
def create_test_webhook_payload(event_type: str, data: dict, secret: str):
591
    """Create test webhook payload for testing"""
592
    
593
    import json
594
    
595
    timestamp = int(time.time())
596
    
597
    event = {
598
        "type": event_type,
599
        "data": data,
600
        "created_at": timestamp
601
    }
602
    
603
    payload = json.dumps(event)
604
    
605
    # Create signature
606
    signed_payload = f"{timestamp}.{payload}"
607
    signature = hmac.new(
608
        secret.encode(),
609
        signed_payload.encode(),
610
        hashlib.sha256
611
    ).hexdigest()
612
    
613
    webhook_signature = f"t={timestamp},v1={signature}"
614
    
615
    return payload.encode(), webhook_signature
616

617
# Test webhook handling
618
test_event_data = {
619
    "id": "ftjob-test123",
620
    "status": "succeeded", 
621
    "fine_tuned_model": "ft:gpt-3.5-turbo-0125:org:model:abc123"
622
}
623

624
test_payload, test_signature = create_test_webhook_payload(
625
    "fine_tuning.job.completed",
626
    test_event_data,
627
    "your_webhook_secret"
628
)
629

630
# Process test webhook
631
webhook_handler.handle_webhook(test_payload, test_signature)
632
```
633

634
### Upload Management
635

636
Handle large file uploads with chunked upload support for efficient data transfer.
637

638
```python { .api }
639
def create(
640
    self,
641
    *,
642
    bytes: int,
643
    filename: str,
644
    mime_type: str,
645
    purpose: Literal["assistants", "batch", "fine-tune", "vision"]
646
) -> Upload: ...
647

648
def cancel(
649
    self,
650
    upload_id: str
651
) -> Upload: ...
652

653
def complete(
654
    self,
655
    upload_id: str,
656
    *,
657
    part_ids: List[str],
658
    md5: str | NotGiven = NOT_GIVEN
659
) -> Upload: ...
660
```
661

662
Usage examples:
663

664
```python
665
import os
666
import hashlib
667

668
# Large file upload with chunking
669
def upload_large_file(file_path: str, purpose: str, chunk_size: int = 8 * 1024 * 1024):
670
    """Upload large file using chunked upload"""
671
    
672
    file_size = os.path.getsize(file_path)
673
    filename = os.path.basename(file_path)
674
    
675
    print(f"Uploading {filename} ({file_size} bytes)")
676
    
677
    # Create upload
678
    upload = client.uploads.create(
679
        bytes=file_size,
680
        filename=filename,
681
        mime_type="application/octet-stream",
682
        purpose=purpose
683
    )
684
    
685
    print(f"Created upload: {upload.id}")
686
    
687
    # Upload parts
688
    part_ids = []
689
    
690
    with open(file_path, 'rb') as f:
691
        part_number = 0
692
        
693
        while True:
694
            chunk = f.read(chunk_size)
695
            if not chunk:
696
                break
697
            
698
            part_number += 1
699
            
700
            # Upload part
701
            part = client.uploads.parts.create(
702
                upload_id=upload.id,
703
                data=chunk
704
            )
705
            
706
            part_ids.append(part.id)
707
            
708
            progress = (part_number * chunk_size) / file_size * 100
709
            print(f"Uploaded part {part_number}: {min(progress, 100):.1f}%")
710
    
711
    # Calculate MD5 hash
712
    with open(file_path, 'rb') as f:
713
        file_hash = hashlib.md5(f.read()).hexdigest()
714
    
715
    # Complete upload
716
    completed_upload = client.uploads.complete(
717
        upload_id=upload.id,
718
        part_ids=part_ids,
719
        md5=file_hash
720
    )
721
    
722
    print(f"Upload completed: {completed_upload.file.id}")
723
    return completed_upload.file
724

725
# Example large file upload
726
# large_file = upload_large_file("large_dataset.jsonl", "fine-tune")
727

728
# Upload with error handling
729
def robust_file_upload(file_path: str, purpose: str, max_retries: int = 3):
730
    """Upload file with retry logic"""
731
    
732
    for attempt in range(max_retries):
733
        try:
734
            if os.path.getsize(file_path) > 100 * 1024 * 1024:  # > 100MB
735
                # Use chunked upload
736
                return upload_large_file(file_path, purpose)
737
            else:
738
                # Use regular upload
739
                with open(file_path, "rb") as f:
740
                    return client.files.create(file=f, purpose=purpose)
741
        
742
        except Exception as e:
743
            print(f"Upload attempt {attempt + 1} failed: {e}")
744
            
745
            if attempt == max_retries - 1:
746
                raise
747
            
748
            time.sleep(2 ** attempt)  # Exponential backoff
749

750
# Upload with progress tracking
751
class UploadProgressTracker:
752
    """Track upload progress for multiple files"""
753
    
754
    def __init__(self):
755
        self.uploads = {}
756
    
757
    def track_upload(self, file_path: str, purpose: str):
758
        """Track file upload with progress"""
759
        
760
        filename = os.path.basename(file_path)
761
        file_size = os.path.getsize(file_path)
762
        
763
        self.uploads[filename] = {
764
            "size": file_size,
765
            "uploaded": 0,
766
            "status": "starting"
767
        }
768
        
769
        try:
770
            if file_size > 100 * 1024 * 1024:  # Large file
771
                file_obj = self._upload_large_with_progress(file_path, purpose)
772
            else:
773
                file_obj = self._upload_regular_with_progress(file_path, purpose)
774
            
775
            self.uploads[filename]["status"] = "completed"
776
            self.uploads[filename]["file_id"] = file_obj.id
777
            
778
            return file_obj
779
        
780
        except Exception as e:
781
            self.uploads[filename]["status"] = "failed"
782
            self.uploads[filename]["error"] = str(e)
783
            raise
784
    
785
    def _upload_regular_with_progress(self, file_path: str, purpose: str):
786
        """Upload regular file with progress tracking"""
787
        
788
        filename = os.path.basename(file_path)
789
        
790
        with open(file_path, "rb") as f:
791
            self.uploads[filename]["status"] = "uploading"
792
            
793
            file_obj = client.files.create(file=f, purpose=purpose)
794
            
795
            self.uploads[filename]["uploaded"] = self.uploads[filename]["size"]
796
        
797
        return file_obj
798
    
799
    def _upload_large_with_progress(self, file_path: str, purpose: str):
800
        """Upload large file with detailed progress tracking"""
801
        
802
        # Implementation would track chunk-by-chunk progress
803
        # Similar to upload_large_file but with progress updates
804
        return upload_large_file(file_path, purpose)
805
    
806
    def get_progress(self):
807
        """Get upload progress summary"""
808
        
809
        total_files = len(self.uploads)
810
        completed_files = sum(1 for u in self.uploads.values() if u["status"] == "completed")
811
        failed_files = sum(1 for u in self.uploads.values() if u["status"] == "failed")
812
        
813
        return {
814
            "total": total_files,
815
            "completed": completed_files,
816
            "failed": failed_files,
817
            "in_progress": total_files - completed_files - failed_files
818
        }
819

820
# Example usage
821
tracker = UploadProgressTracker()
822

823
files_to_upload = ["dataset1.jsonl", "dataset2.jsonl", "dataset3.jsonl"]
824

825
for file_path in files_to_upload:
826
    try:
827
        file_obj = tracker.track_upload(file_path, "fine-tune")
828
        print(f"✅ Uploaded {file_path}: {file_obj.id}")
829
    except Exception as e:
830
        print(f"❌ Failed to upload {file_path}: {e}")
831

832
progress = tracker.get_progress()
833
print(f"Upload summary: {progress}")
834
```
835

836
## Types
837

838
### Models API Types
839

840
```python { .api }
841
class Model(BaseModel):
842
    id: str
843
    created: int
844
    object: Literal["model"]
845
    owned_by: str
846

847
class ModelDeleted(BaseModel):
848
    id: str
849
    deleted: bool
850
    object: Literal["model"]
851
```
852

853
### Moderations API Types
854

855
```python { .api }
856
class ModerationCreateResponse(BaseModel):
857
    id: str
858
    model: str
859
    results: List[ModerationResult]
860

861
class ModerationResult(BaseModel):
862
    categories: ModerationCategories
863
    category_scores: ModerationCategoryScores
864
    flagged: bool
865

866
class ModerationCategories(BaseModel):
867
    harassment: bool
868
    harassment_threatening: bool
869
    hate: bool
870
    hate_threatening: bool
871
    illicit: bool
872
    illicit_violent: bool
873
    self_harm: bool
874
    self_harm_instructions: bool
875
    self_harm_intent: bool
876
    sexual: bool
877
    sexual_minors: bool
878
    violence: bool
879
    violence_graphic: bool
880

881
class ModerationCategoryScores(BaseModel):
882
    harassment: float
883
    harassment_threatening: float
884
    hate: float
885
    hate_threatening: float
886
    illicit: float
887
    illicit_violent: float
888
    self_harm: float
889
    self_harm_instructions: float
890
    self_harm_intent: float
891
    sexual: float
892
    sexual_minors: float
893
    violence: float
894
    violence_graphic: float
895

896
ModerationModel = Literal[
897
    "omni-moderation-latest",
898
    "omni-moderation-2024-09-26",
899
    "text-moderation-latest",
900
    "text-moderation-stable"
901
]
902

903
# Multimodal input types
904
ModerationMultiModalInputParam = Union[
905
    ModerationImageURLInputParam,
906
    ModerationTextInputParam
907
]
908

909
class ModerationTextInputParam(TypedDict, total=False):
910
    type: Required[Literal["text"]]
911
    text: Required[str]
912

913
class ModerationImageURLInputParam(TypedDict, total=False):
914
    type: Required[Literal["image_url"]]
915
    image_url: Required[ModerationImageURL]
916
```
917

918
### Vector Stores Types
919

920
```python { .api }
921
class VectorStore(BaseModel):
922
    id: str
923
    created_at: int
924
    file_counts: VectorStoreFileCounts
925
    last_active_at: Optional[int]
926
    metadata: Optional[Dict[str, str]]
927
    name: str
928
    object: Literal["vector_store"]
929
    status: Literal["expired", "in_progress", "completed"]
930
    usage_bytes: int
931
    expires_after: Optional[VectorStoreExpiresAfter]
932
    expires_at: Optional[int]
933

934
class VectorStoreFileCounts(BaseModel):
935
    cancelled: int
936
    completed: int
937
    failed: int
938
    in_progress: int
939
    total: int
940

941
class VectorStoreExpiresAfter(BaseModel):
942
    anchor: Literal["last_active_at"]
943
    days: int
944

945
class VectorStoreDeleted(BaseModel):
946
    id: str
947
    deleted: bool
948
    object: Literal["vector_store.deleted"]
949

950
# Chunking strategy types
951
ChunkingStrategyParam = Union[
952
    AutoChunkingStrategyParam,
953
    StaticChunkingStrategyParam
954
]
955

956
class AutoChunkingStrategyParam(TypedDict, total=False):
957
    type: Required[Literal["auto"]]
958

959
class StaticChunkingStrategyParam(TypedDict, total=False):
960
    type: Required[Literal["static"]]
961
    static: Required[StaticChunkingStrategy]
962

963
class StaticChunkingStrategy(TypedDict, total=False):
964
    max_chunk_size_tokens: Required[int]
965
    chunk_overlap_tokens: Required[int]
966
```
967

968
### Upload Types
969

970
```python { .api }
971
class Upload(BaseModel):
972
    id: str
973
    bytes: int
974
    created_at: int
975
    expires_at: int
976
    filename: str
977
    object: Literal["upload"]
978
    purpose: str
979
    status: Literal["pending", "completed", "cancelled", "expired"]
980
    file: Optional[FileObject]
981

982
class UploadPart(BaseModel):
983
    id: str
984
    created_at: int
985
    object: Literal["upload.part"]
986
    upload_id: str
987

988
# Upload parameters
989
class UploadCreateParams(TypedDict, total=False):
990
    bytes: Required[int]
991
    filename: Required[str]
992
    mime_type: Required[str] 
993
    purpose: Required[Literal["assistants", "batch", "fine-tune", "vision"]]
994

995
class UploadCompleteParams(TypedDict, total=False):
996
    part_ids: Required[List[str]]
997
    md5: NotRequired[str]
998
```
999

1000
## Best Practices
1001

1002
### Models Management
1003

1004
- Regularly check for new model releases and capabilities
1005
- Monitor fine-tuned model performance and update as needed
1006
- Keep track of model deprecation schedules
1007
- Use appropriate models for specific tasks to optimize costs
1008

1009
### Content Moderation
1010

1011
- Implement moderation for all user-generated content
1012
- Use appropriate moderation models for your content type
1013
- Set reasonable thresholds for different risk categories
1014
- Regularly review and update moderation policies
1015

1016
### Vector Stores
1017

1018
- Organize documents logically within vector stores
1019
- Use meaningful names and metadata for easy management
1020
- Monitor storage usage and implement cleanup procedures
1021
- Keep vector stores updated when source documents change
1022

1023
### Webhooks
1024

1025
- Always verify webhook signatures for security
1026
- Implement proper error handling for webhook events
1027
- Use webhooks to trigger automated workflows
1028
- Monitor webhook delivery and implement retry logic for failures
1029

1030
### File Uploads
1031

1032
- Use chunked uploads for large files (>100MB)
1033
- Implement progress tracking for better user experience
1034
- Add retry logic for upload failures
1035
- Validate file integrity with checksums when possible