or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

assistants.mdaudio.mdbatches.mdchat-completions.mdchatkit.mdclient-initialization.mdcompletions.mdcontainers.mdconversations.mdembeddings.mdevals.mdfiles.mdfine-tuning.mdimages.mdindex.mdmodels.mdmoderations.mdrealtime.mdresponses.mdruns.mdthreads-messages.mduploads.mdvector-stores.mdvideos.mdwebhooks.md
KNOWN_ISSUES.md

files.mddocs/

0

# Files

1

2

Upload and manage files for use with OpenAI features like Assistants, Fine-tuning, Batch processing, and Vision. Provides file storage with purpose-specific handling.

3

4

## Capabilities

5

6

### Upload File

7

8

Upload a file for use with OpenAI services.

9

10

```python { .api }

11

def create(

12

self,

13

*,

14

file: FileTypes,

15

purpose: FilePurpose,

16

expires_after: dict | Omit = omit,

17

extra_headers: dict[str, str] | None = None,

18

extra_query: dict[str, object] | None = None,

19

extra_body: dict[str, object] | None = None,

20

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

21

) -> FileObject:

22

"""

23

Upload a file for use with OpenAI services.

24

25

Args:

26

file: File to upload. Can be file path string, file object, or tuple.

27

Maximum file size varies by purpose.

28

29

purpose: Intended purpose of the file. Options:

30

- "assistants": For Assistants API and file_search tool

31

- "batch": For Batch API operations

32

- "fine-tune": For fine-tuning jobs

33

- "vision": For vision model inputs

34

- "user_data": Flexible file type for any purpose

35

- "evals": For evaluation data sets

36

37

expires_after: Expiration policy for the file (ExpiresAfter type). By default,

38

files with purpose="batch" expire after 30 days and all other files persist

39

until manually deleted. Structure:

40

- anchor: "created_at" (file creation time)

41

- seconds: int (3600-2592000, time in seconds until expiration)

42

43

extra_headers: Additional HTTP headers.

44

extra_query: Additional query parameters.

45

extra_body: Additional JSON fields.

46

timeout: Request timeout in seconds.

47

48

Returns:

49

FileObject: Uploaded file metadata including ID.

50

51

Raises:

52

BadRequestError: Invalid file format, size, or purpose

53

AuthenticationError: Invalid API key

54

"""

55

```

56

57

Usage examples:

58

59

```python

60

from openai import OpenAI

61

62

client = OpenAI()

63

64

# Upload file for assistants

65

with open("document.pdf", "rb") as file:

66

response = client.files.create(

67

file=file,

68

purpose="assistants"

69

)

70

71

file_id = response.id

72

print(f"File ID: {file_id}")

73

74

# Upload for fine-tuning

75

with open("training_data.jsonl", "rb") as file:

76

response = client.files.create(

77

file=file,

78

purpose="fine-tune"

79

)

80

81

# Upload for batch processing

82

with open("batch_requests.jsonl", "rb") as file:

83

response = client.files.create(

84

file=file,

85

purpose="batch"

86

)

87

88

# Using file_from_path helper

89

from openai import file_from_path

90

91

response = client.files.create(

92

file=file_from_path("data.csv"),

93

purpose="assistants"

94

)

95

96

# Check upload details

97

print(f"Filename: {response.filename}")

98

print(f"Size: {response.bytes} bytes")

99

print(f"Purpose: {response.purpose}")

100

print(f"Status: {response.status}")

101

```

102

103

### Retrieve File Metadata

104

105

Get information about a specific file.

106

107

```python { .api }

108

def retrieve(

109

self,

110

file_id: str,

111

*,

112

extra_headers: dict[str, str] | None = None,

113

extra_query: dict[str, object] | None = None,

114

extra_body: dict[str, object] | None = None,

115

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

116

) -> FileObject:

117

"""

118

Retrieve file metadata.

119

120

Args:

121

file_id: The ID of the file to retrieve.

122

extra_headers: Additional HTTP headers.

123

extra_query: Additional query parameters.

124

extra_body: Additional JSON fields.

125

timeout: Request timeout in seconds.

126

127

Returns:

128

FileObject: File metadata.

129

130

Raises:

131

NotFoundError: File not found

132

"""

133

```

134

135

Usage example:

136

137

```python

138

file = client.files.retrieve("file-abc123")

139

140

print(f"Filename: {file.filename}")

141

print(f"Purpose: {file.purpose}")

142

print(f"Size: {file.bytes} bytes")

143

print(f"Created: {file.created_at}")

144

```

145

146

### List Files

147

148

List all uploaded files with optional filtering.

149

150

```python { .api }

151

def list(

152

self,

153

*,

154

purpose: str | Omit = omit,

155

limit: int | Omit = omit,

156

order: Literal["asc", "desc"] | Omit = omit,

157

after: str | Omit = omit,

158

extra_headers: dict[str, str] | None = None,

159

extra_query: dict[str, object] | None = None,

160

extra_body: dict[str, object] | None = None,

161

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

162

) -> SyncCursorPage[FileObject]:

163

"""

164

List uploaded files with optional filtering and pagination.

165

166

Args:

167

purpose: Filter by file purpose (e.g., "assistants", "fine-tune").

168

limit: Number of files to retrieve (max 10000). Default 10000.

169

order: Sort order. "asc" for ascending, "desc" for descending. Default "desc".

170

after: Cursor for pagination. Return files after this file ID.

171

extra_headers: Additional HTTP headers.

172

extra_query: Additional query parameters.

173

extra_body: Additional JSON fields.

174

timeout: Request timeout in seconds.

175

176

Returns:

177

SyncCursorPage[FileObject]: Cursor-paginated list of files.

178

"""

179

```

180

181

Usage examples:

182

183

```python

184

# List all files

185

files = client.files.list()

186

187

for file in files.data:

188

print(f"{file.filename} ({file.id})")

189

190

# Filter by purpose

191

assistant_files = client.files.list(purpose="assistants")

192

193

# Pagination

194

page1 = client.files.list(limit=10)

195

page2 = client.files.list(limit=10, after=page1.data[-1].id)

196

197

# Iterate through all files

198

for file in client.files.list():

199

print(file.filename)

200

```

201

202

### Delete File

203

204

Delete a file from OpenAI storage.

205

206

```python { .api }

207

def delete(

208

self,

209

file_id: str,

210

*,

211

extra_headers: dict[str, str] | None = None,

212

extra_query: dict[str, object] | None = None,

213

extra_body: dict[str, object] | None = None,

214

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

215

) -> FileDeleted:

216

"""

217

Delete a file.

218

219

Args:

220

file_id: The ID of the file to delete.

221

extra_headers: Additional HTTP headers.

222

extra_query: Additional query parameters.

223

extra_body: Additional JSON fields.

224

timeout: Request timeout in seconds.

225

226

Returns:

227

FileDeleted: Deletion confirmation.

228

229

Raises:

230

NotFoundError: File not found

231

"""

232

```

233

234

Usage example:

235

236

```python

237

# Delete file

238

result = client.files.delete("file-abc123")

239

240

print(f"Deleted: {result.id}")

241

print(f"Success: {result.deleted}")

242

```

243

244

### Download File Content

245

246

Retrieve the binary content of a file.

247

248

```python { .api }

249

def content(

250

self,

251

file_id: str,

252

*,

253

extra_headers: dict[str, str] | None = None,

254

extra_query: dict[str, object] | None = None,

255

extra_body: dict[str, object] | None = None,

256

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

257

) -> HttpxBinaryResponseContent:

258

"""

259

Retrieve file content.

260

261

Args:

262

file_id: The ID of the file to download.

263

extra_headers: Additional HTTP headers.

264

extra_query: Additional query parameters.

265

extra_body: Additional JSON fields.

266

timeout: Request timeout in seconds.

267

268

Returns:

269

HttpxBinaryResponseContent: File content as binary data.

270

271

Raises:

272

NotFoundError: File not found

273

"""

274

```

275

276

Usage example:

277

278

```python

279

from pathlib import Path

280

281

# Download file content

282

content = client.files.content("file-abc123")

283

284

# Save to file

285

Path("downloaded_file.txt").write_bytes(content.content)

286

287

# Or use read()

288

file_bytes = content.read()

289

290

# Stream to file

291

content.stream_to_file("output.txt")

292

```

293

294

### Wait for Processing

295

296

Poll until file processing is complete (helper method).

297

298

```python { .api }

299

def wait_for_processing(

300

self,

301

file_id: str,

302

*,

303

poll_interval: float = 5.0,

304

max_wait_seconds: float = 1800,

305

) -> FileObject:

306

"""

307

Wait for file processing to complete.

308

309

Args:

310

file_id: The ID of the file to wait for.

311

poll_interval: Seconds between status checks. Default 5.0.

312

max_wait_seconds: Maximum seconds to wait. Default 1800 (30 minutes).

313

314

Returns:

315

FileObject: File with completed status.

316

317

Raises:

318

TimeoutError: Processing not completed within max_wait_seconds

319

APIError: Processing failed

320

"""

321

```

322

323

Usage example:

324

325

```python

326

# Upload and wait

327

with open("large_file.pdf", "rb") as file:

328

uploaded = client.files.create(file=file, purpose="assistants")

329

330

# Wait for processing

331

ready_file = client.files.wait_for_processing(uploaded.id)

332

333

print(f"File ready: {ready_file.status}")

334

```

335

336

## Types

337

338

```python { .api }

339

from typing import Literal, TypedDict, Required, Union, Iterator

340

from pydantic import BaseModel

341

342

class FileObject(BaseModel):

343

"""File metadata."""

344

id: str

345

bytes: int

346

created_at: int

347

filename: str

348

object: Literal["file"]

349

purpose: FilePurpose

350

status: FileStatus

351

status_details: str | None

352

353

class FileDeleted(BaseModel):

354

"""File deletion confirmation."""

355

id: str

356

deleted: bool

357

object: Literal["file"]

358

359

FilePurpose = Literal[

360

"assistants",

361

"assistants_output",

362

"batch",

363

"batch_output",

364

"fine-tune",

365

"fine-tune-results",

366

"vision",

367

"user_data",

368

"evals"

369

]

370

371

FileStatus = Literal["uploaded", "processed", "error"]

372

373

class ExpiresAfter(TypedDict):

374

"""File expiration policy configuration."""

375

anchor: Required[Literal["created_at"]]

376

"""Anchor timestamp after which the expiration policy applies. Currently only 'created_at' is supported."""

377

378

seconds: Required[int]

379

"""Number of seconds after the anchor time that the file will expire. Must be between 3600 (1 hour) and 2592000 (30 days)."""

380

381

# File types

382

FileTypes = Union[

383

FileContent, # File-like object

384

tuple[str | None, FileContent], # (filename, content)

385

tuple[str | None, FileContent, str | None] # (filename, content, content_type)

386

]

387

388

# Pagination

389

class SyncPage[T](BaseModel):

390

data: list[T]

391

object: str

392

has_more: bool

393

def __iter__(self) -> Iterator[T]: ...

394

```

395

396

## File Size Limits

397

398

| Purpose | Format | Max Size |

399

|---------|--------|----------|

400

| assistants | Various | 512 MB |

401

| batch | JSONL | 100 MB |

402

| fine-tune | JSONL | 1 GB |

403

| vision | Images | 20 MB |

404

405

## Best Practices

406

407

```python

408

from openai import OpenAI

409

from pathlib import Path

410

411

client = OpenAI()

412

413

# 1. Check file exists before upload

414

file_path = Path("data.txt")

415

if file_path.exists():

416

with open(file_path, "rb") as f:

417

file = client.files.create(file=f, purpose="assistants")

418

419

# 2. Clean up unused files

420

files = client.files.list(purpose="assistants")

421

for file in files:

422

if should_delete(file):

423

client.files.delete(file.id)

424

425

# 3. Handle upload errors

426

from openai import APIError

427

428

try:

429

with open("large_file.pdf", "rb") as f:

430

file = client.files.create(file=f, purpose="assistants")

431

except APIError as e:

432

print(f"Upload failed: {e}")

433

434

# 4. Track file IDs for later use

435

uploaded_files = []

436

437

for file_path in ["file1.txt", "file2.txt"]:

438

with open(file_path, "rb") as f:

439

file = client.files.create(file=f, purpose="assistants")

440

uploaded_files.append(file.id)

441

442

# Use files with assistant

443

assistant = client.beta.assistants.create(

444

model="gpt-4",

445

tools=[{"type": "file_search"}],

446

tool_resources={"file_search": {"file_ids": uploaded_files}}

447

)

448

```

449

450

## Async Usage

451

452

```python

453

import asyncio

454

from openai import AsyncOpenAI

455

456

async def upload_file():

457

client = AsyncOpenAI()

458

459

with open("document.pdf", "rb") as file:

460

response = await client.files.create(

461

file=file,

462

purpose="assistants"

463

)

464

465

return response.id

466

467

file_id = asyncio.run(upload_file())

468

```

469