docs
0
# Files
1
2
Upload and manage files for use with OpenAI features like Assistants, Fine-tuning, Batch processing, and Vision. Provides file storage with purpose-specific handling.
3
4
## Capabilities
5
6
### Upload File
7
8
Upload a file for use with OpenAI services.
9
10
```python { .api }
11
def create(
12
self,
13
*,
14
file: FileTypes,
15
purpose: FilePurpose,
16
expires_after: dict | Omit = omit,
17
extra_headers: dict[str, str] | None = None,
18
extra_query: dict[str, object] | None = None,
19
extra_body: dict[str, object] | None = None,
20
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
21
) -> FileObject:
22
"""
23
Upload a file for use with OpenAI services.
24
25
Args:
26
file: File to upload. Can be file path string, file object, or tuple.
27
Maximum file size varies by purpose.
28
29
purpose: Intended purpose of the file. Options:
30
- "assistants": For Assistants API and file_search tool
31
- "batch": For Batch API operations
32
- "fine-tune": For fine-tuning jobs
33
- "vision": For vision model inputs
34
- "user_data": Flexible file type for any purpose
35
- "evals": For evaluation data sets
36
37
expires_after: Expiration policy for the file (ExpiresAfter type). By default,
38
files with purpose="batch" expire after 30 days and all other files persist
39
until manually deleted. Structure:
40
- anchor: "created_at" (file creation time)
41
- seconds: int (3600-2592000, time in seconds until expiration)
42
43
extra_headers: Additional HTTP headers.
44
extra_query: Additional query parameters.
45
extra_body: Additional JSON fields.
46
timeout: Request timeout in seconds.
47
48
Returns:
49
FileObject: Uploaded file metadata including ID.
50
51
Raises:
52
BadRequestError: Invalid file format, size, or purpose
53
AuthenticationError: Invalid API key
54
"""
55
```
56
57
Usage examples:
58
59
```python
60
from openai import OpenAI
61
62
client = OpenAI()
63
64
# Upload file for assistants
65
with open("document.pdf", "rb") as file:
66
response = client.files.create(
67
file=file,
68
purpose="assistants"
69
)
70
71
file_id = response.id
72
print(f"File ID: {file_id}")
73
74
# Upload for fine-tuning
75
with open("training_data.jsonl", "rb") as file:
76
response = client.files.create(
77
file=file,
78
purpose="fine-tune"
79
)
80
81
# Upload for batch processing
82
with open("batch_requests.jsonl", "rb") as file:
83
response = client.files.create(
84
file=file,
85
purpose="batch"
86
)
87
88
# Using file_from_path helper
89
from openai import file_from_path
90
91
response = client.files.create(
92
file=file_from_path("data.csv"),
93
purpose="assistants"
94
)
95
96
# Check upload details
97
print(f"Filename: {response.filename}")
98
print(f"Size: {response.bytes} bytes")
99
print(f"Purpose: {response.purpose}")
100
print(f"Status: {response.status}")
101
```
102
103
### Retrieve File Metadata
104
105
Get information about a specific file.
106
107
```python { .api }
108
def retrieve(
109
self,
110
file_id: str,
111
*,
112
extra_headers: dict[str, str] | None = None,
113
extra_query: dict[str, object] | None = None,
114
extra_body: dict[str, object] | None = None,
115
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
116
) -> FileObject:
117
"""
118
Retrieve file metadata.
119
120
Args:
121
file_id: The ID of the file to retrieve.
122
extra_headers: Additional HTTP headers.
123
extra_query: Additional query parameters.
124
extra_body: Additional JSON fields.
125
timeout: Request timeout in seconds.
126
127
Returns:
128
FileObject: File metadata.
129
130
Raises:
131
NotFoundError: File not found
132
"""
133
```
134
135
Usage example:
136
137
```python
138
file = client.files.retrieve("file-abc123")
139
140
print(f"Filename: {file.filename}")
141
print(f"Purpose: {file.purpose}")
142
print(f"Size: {file.bytes} bytes")
143
print(f"Created: {file.created_at}")
144
```
145
146
### List Files
147
148
List all uploaded files with optional filtering.
149
150
```python { .api }
151
def list(
152
self,
153
*,
154
purpose: str | Omit = omit,
155
limit: int | Omit = omit,
156
order: Literal["asc", "desc"] | Omit = omit,
157
after: str | Omit = omit,
158
extra_headers: dict[str, str] | None = None,
159
extra_query: dict[str, object] | None = None,
160
extra_body: dict[str, object] | None = None,
161
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
162
) -> SyncCursorPage[FileObject]:
163
"""
164
List uploaded files with optional filtering and pagination.
165
166
Args:
167
purpose: Filter by file purpose (e.g., "assistants", "fine-tune").
168
limit: Number of files to retrieve (max 10000). Default 10000.
169
order: Sort order. "asc" for ascending, "desc" for descending. Default "desc".
170
after: Cursor for pagination. Return files after this file ID.
171
extra_headers: Additional HTTP headers.
172
extra_query: Additional query parameters.
173
extra_body: Additional JSON fields.
174
timeout: Request timeout in seconds.
175
176
Returns:
177
SyncCursorPage[FileObject]: Cursor-paginated list of files.
178
"""
179
```
180
181
Usage examples:
182
183
```python
184
# List all files
185
files = client.files.list()
186
187
for file in files.data:
188
print(f"{file.filename} ({file.id})")
189
190
# Filter by purpose
191
assistant_files = client.files.list(purpose="assistants")
192
193
# Pagination
194
page1 = client.files.list(limit=10)
195
page2 = client.files.list(limit=10, after=page1.data[-1].id)
196
197
# Iterate through all files
198
for file in client.files.list():
199
print(file.filename)
200
```
201
202
### Delete File
203
204
Delete a file from OpenAI storage.
205
206
```python { .api }
207
def delete(
208
self,
209
file_id: str,
210
*,
211
extra_headers: dict[str, str] | None = None,
212
extra_query: dict[str, object] | None = None,
213
extra_body: dict[str, object] | None = None,
214
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
215
) -> FileDeleted:
216
"""
217
Delete a file.
218
219
Args:
220
file_id: The ID of the file to delete.
221
extra_headers: Additional HTTP headers.
222
extra_query: Additional query parameters.
223
extra_body: Additional JSON fields.
224
timeout: Request timeout in seconds.
225
226
Returns:
227
FileDeleted: Deletion confirmation.
228
229
Raises:
230
NotFoundError: File not found
231
"""
232
```
233
234
Usage example:
235
236
```python
237
# Delete file
238
result = client.files.delete("file-abc123")
239
240
print(f"Deleted: {result.id}")
241
print(f"Success: {result.deleted}")
242
```
243
244
### Download File Content
245
246
Retrieve the binary content of a file.
247
248
```python { .api }
249
def content(
250
self,
251
file_id: str,
252
*,
253
extra_headers: dict[str, str] | None = None,
254
extra_query: dict[str, object] | None = None,
255
extra_body: dict[str, object] | None = None,
256
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
257
) -> HttpxBinaryResponseContent:
258
"""
259
Retrieve file content.
260
261
Args:
262
file_id: The ID of the file to download.
263
extra_headers: Additional HTTP headers.
264
extra_query: Additional query parameters.
265
extra_body: Additional JSON fields.
266
timeout: Request timeout in seconds.
267
268
Returns:
269
HttpxBinaryResponseContent: File content as binary data.
270
271
Raises:
272
NotFoundError: File not found
273
"""
274
```
275
276
Usage example:
277
278
```python
279
from pathlib import Path
280
281
# Download file content
282
content = client.files.content("file-abc123")
283
284
# Save to file
285
Path("downloaded_file.txt").write_bytes(content.content)
286
287
# Or use read()
288
file_bytes = content.read()
289
290
# Stream to file
291
content.stream_to_file("output.txt")
292
```
293
294
### Wait for Processing
295
296
Poll until file processing is complete (helper method).
297
298
```python { .api }
299
def wait_for_processing(
300
self,
301
file_id: str,
302
*,
303
poll_interval: float = 5.0,
304
max_wait_seconds: float = 1800,
305
) -> FileObject:
306
"""
307
Wait for file processing to complete.
308
309
Args:
310
file_id: The ID of the file to wait for.
311
poll_interval: Seconds between status checks. Default 5.0.
312
max_wait_seconds: Maximum seconds to wait. Default 1800 (30 minutes).
313
314
Returns:
315
FileObject: File with completed status.
316
317
Raises:
318
TimeoutError: Processing not completed within max_wait_seconds
319
APIError: Processing failed
320
"""
321
```
322
323
Usage example:
324
325
```python
326
# Upload and wait
327
with open("large_file.pdf", "rb") as file:
328
uploaded = client.files.create(file=file, purpose="assistants")
329
330
# Wait for processing
331
ready_file = client.files.wait_for_processing(uploaded.id)
332
333
print(f"File ready: {ready_file.status}")
334
```
335
336
## Types
337
338
```python { .api }
339
from typing import Literal, TypedDict, Required, Union, Iterator
340
from pydantic import BaseModel
341
342
class FileObject(BaseModel):
343
"""File metadata."""
344
id: str
345
bytes: int
346
created_at: int
347
filename: str
348
object: Literal["file"]
349
purpose: FilePurpose
350
status: FileStatus
351
status_details: str | None
352
353
class FileDeleted(BaseModel):
354
"""File deletion confirmation."""
355
id: str
356
deleted: bool
357
object: Literal["file"]
358
359
FilePurpose = Literal[
360
"assistants",
361
"assistants_output",
362
"batch",
363
"batch_output",
364
"fine-tune",
365
"fine-tune-results",
366
"vision",
367
"user_data",
368
"evals"
369
]
370
371
FileStatus = Literal["uploaded", "processed", "error"]
372
373
class ExpiresAfter(TypedDict):
374
"""File expiration policy configuration."""
375
anchor: Required[Literal["created_at"]]
376
"""Anchor timestamp after which the expiration policy applies. Currently only 'created_at' is supported."""
377
378
seconds: Required[int]
379
"""Number of seconds after the anchor time that the file will expire. Must be between 3600 (1 hour) and 2592000 (30 days)."""
380
381
# File types
382
FileTypes = Union[
383
FileContent, # File-like object
384
tuple[str | None, FileContent], # (filename, content)
385
tuple[str | None, FileContent, str | None] # (filename, content, content_type)
386
]
387
388
# Pagination
389
class SyncPage[T](BaseModel):
390
data: list[T]
391
object: str
392
has_more: bool
393
def __iter__(self) -> Iterator[T]: ...
394
```
395
396
## File Size Limits
397
398
| Purpose | Format | Max Size |
399
|---------|--------|----------|
400
| assistants | Various | 512 MB |
401
| batch | JSONL | 100 MB |
402
| fine-tune | JSONL | 1 GB |
403
| vision | Images | 20 MB |
404
405
## Best Practices
406
407
```python
408
from openai import OpenAI
409
from pathlib import Path
410
411
client = OpenAI()
412
413
# 1. Check file exists before upload
414
file_path = Path("data.txt")
415
if file_path.exists():
416
with open(file_path, "rb") as f:
417
file = client.files.create(file=f, purpose="assistants")
418
419
# 2. Clean up unused files
420
files = client.files.list(purpose="assistants")
421
for file in files:
422
if should_delete(file):
423
client.files.delete(file.id)
424
425
# 3. Handle upload errors
426
from openai import APIError
427
428
try:
429
with open("large_file.pdf", "rb") as f:
430
file = client.files.create(file=f, purpose="assistants")
431
except APIError as e:
432
print(f"Upload failed: {e}")
433
434
# 4. Track file IDs for later use
435
uploaded_files = []
436
437
for file_path in ["file1.txt", "file2.txt"]:
438
with open(file_path, "rb") as f:
439
file = client.files.create(file=f, purpose="assistants")
440
uploaded_files.append(file.id)
441
442
# Use files with assistant
443
assistant = client.beta.assistants.create(
444
model="gpt-4",
445
tools=[{"type": "file_search"}],
446
tool_resources={"file_search": {"file_ids": uploaded_files}}
447
)
448
```
449
450
## Async Usage
451
452
```python
453
import asyncio
454
from openai import AsyncOpenAI
455
456
async def upload_file():
457
client = AsyncOpenAI()
458
459
with open("document.pdf", "rb") as file:
460
response = await client.files.create(
461
file=file,
462
purpose="assistants"
463
)
464
465
return response.id
466
467
file_id = asyncio.run(upload_file())
468
```
469