or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

assistants.mdaudio.mdbatches.mdchat-completions.mdchatkit.mdclient-initialization.mdcompletions.mdcontainers.mdconversations.mdembeddings.mdevals.mdfiles.mdfine-tuning.mdimages.mdindex.mdmodels.mdmoderations.mdrealtime.mdresponses.mdruns.mdthreads-messages.mduploads.mdvector-stores.mdvideos.mdwebhooks.md
KNOWN_ISSUES.md

vector-stores.mddocs/

0

# Vector Stores

1

2

Create and manage vector stores for semantic search and retrieval with the Assistants API. Vector stores enable file search capabilities by storing and indexing documents for efficient retrieval.

3

4

## Capabilities

5

6

### Create Vector Store

7

8

Create a new vector store for storing and searching documents.

9

10

```python { .api }

11

def create(

12

self,

13

*,

14

chunking_strategy: dict | Omit = omit,

15

description: str | Omit = omit,

16

expires_after: dict | Omit = omit,

17

file_ids: list[str] | Omit = omit,

18

metadata: dict[str, str] | Omit = omit,

19

name: str | Omit = omit,

20

extra_headers: dict[str, str] | None = None,

21

extra_query: dict[str, object] | None = None,

22

extra_body: dict[str, object] | None = None,

23

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

24

) -> VectorStore:

25

"""

26

Create a vector store for file search.

27

28

Args:

29

chunking_strategy: How to chunk files. Options:

30

- {"type": "auto"}: Automatic chunking (default)

31

- {"type": "static", "static": {"max_chunk_size_tokens": 800, "chunk_overlap_tokens": 400}}

32

33

description: Description of the vector store (optional).

34

35

expires_after: Expiration policy. Options:

36

- {"anchor": "last_active_at", "days": 7}: Expires 7 days after last use

37

- {"anchor": "last_active_at", "days": 1}: Expires 1 day after last use

38

39

file_ids: List of file IDs to add to the vector store (max 10000).

40

Files must have purpose="assistants".

41

42

metadata: Key-value pairs for storing additional info (max 16).

43

Keys max 64 chars, values max 512 chars.

44

45

name: Name of the vector store (optional).

46

47

extra_headers: Additional HTTP headers.

48

extra_query: Additional query parameters.

49

extra_body: Additional JSON fields.

50

timeout: Request timeout in seconds.

51

52

Returns:

53

VectorStore: Created vector store.

54

55

Raises:

56

BadRequestError: Invalid parameters or too many files

57

"""

58

```

59

60

Usage examples:

61

62

```python

63

from openai import OpenAI

64

65

client = OpenAI()

66

67

# Create empty vector store

68

vector_store = client.beta.vector_stores.create(

69

name="Product Documentation"

70

)

71

72

print(f"Vector Store ID: {vector_store.id}")

73

74

# Create with files

75

file_ids = ["file-abc123", "file-def456"]

76

77

vector_store = client.beta.vector_stores.create(

78

name="Knowledge Base",

79

file_ids=file_ids

80

)

81

82

# With expiration policy

83

vector_store = client.beta.vector_stores.create(

84

name="Temporary Store",

85

expires_after={"anchor": "last_active_at", "days": 7}

86

)

87

88

# With custom chunking

89

vector_store = client.beta.vector_stores.create(

90

name="Custom Chunking",

91

file_ids=file_ids,

92

chunking_strategy={

93

"type": "static",

94

"static": {

95

"max_chunk_size_tokens": 800,

96

"chunk_overlap_tokens": 400

97

}

98

}

99

)

100

101

# With metadata

102

vector_store = client.beta.vector_stores.create(

103

name="Project Docs",

104

metadata={

105

"project": "alpha",

106

"version": "1.0"

107

}

108

)

109

```

110

111

### Retrieve Vector Store

112

113

Get vector store details.

114

115

```python { .api }

116

def retrieve(

117

self,

118

vector_store_id: str,

119

*,

120

extra_headers: dict[str, str] | None = None,

121

extra_query: dict[str, object] | None = None,

122

extra_body: dict[str, object] | None = None,

123

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

124

) -> VectorStore:

125

"""

126

Retrieve vector store details.

127

128

Args:

129

vector_store_id: The ID of the vector store.

130

extra_headers: Additional HTTP headers.

131

extra_query: Additional query parameters.

132

extra_body: Additional JSON fields.

133

timeout: Request timeout in seconds.

134

135

Returns:

136

VectorStore: Vector store details.

137

138

Raises:

139

NotFoundError: Vector store not found

140

"""

141

```

142

143

Usage example:

144

145

```python

146

# Get vector store

147

store = client.beta.vector_stores.retrieve("vs_abc123")

148

149

print(f"Name: {store.name}")

150

print(f"File counts: {store.file_counts}")

151

print(f"Status: {store.status}")

152

```

153

154

### Update Vector Store

155

156

Modify vector store settings.

157

158

```python { .api }

159

def update(

160

self,

161

vector_store_id: str,

162

*,

163

name: str | Omit = omit,

164

expires_after: dict | Omit = omit,

165

metadata: dict[str, str] | Omit = omit,

166

extra_headers: dict[str, str] | None = None,

167

extra_query: dict[str, object] | None = None,

168

extra_body: dict[str, object] | None = None,

169

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

170

) -> VectorStore:

171

"""

172

Update vector store properties.

173

174

Args:

175

vector_store_id: The ID of the vector store.

176

name: New name for the vector store.

177

expires_after: New expiration policy.

178

metadata: New metadata (replaces existing).

179

extra_headers: Additional HTTP headers.

180

extra_query: Additional query parameters.

181

extra_body: Additional JSON fields.

182

timeout: Request timeout in seconds.

183

184

Returns:

185

VectorStore: Updated vector store.

186

"""

187

```

188

189

Usage example:

190

191

```python

192

# Update name

193

store = client.beta.vector_stores.update(

194

"vs_abc123",

195

name="Updated Documentation"

196

)

197

198

# Update metadata

199

store = client.beta.vector_stores.update(

200

"vs_abc123",

201

metadata={"version": "2.0"}

202

)

203

```

204

205

### List Vector Stores

206

207

List all vector stores with pagination.

208

209

```python { .api }

210

def list(

211

self,

212

*,

213

after: str | Omit = omit,

214

before: str | Omit = omit,

215

limit: int | Omit = omit,

216

order: Literal["asc", "desc"] | Omit = omit,

217

extra_headers: dict[str, str] | None = None,

218

extra_query: dict[str, object] | None = None,

219

extra_body: dict[str, object] | None = None,

220

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

221

) -> SyncCursorPage[VectorStore]:

222

"""

223

List vector stores with pagination.

224

225

Args:

226

after: Cursor for next page.

227

before: Cursor for previous page.

228

limit: Number to retrieve (max 100). Default 20.

229

order: Sort order. "asc" or "desc". Default "desc".

230

extra_headers: Additional HTTP headers.

231

extra_query: Additional query parameters.

232

extra_body: Additional JSON fields.

233

timeout: Request timeout in seconds.

234

235

Returns:

236

SyncCursorPage[VectorStore]: Paginated list of vector stores.

237

"""

238

```

239

240

Usage example:

241

242

```python

243

# List all stores

244

stores = client.beta.vector_stores.list()

245

246

for store in stores:

247

print(f"{store.name} ({store.id})")

248

249

# Pagination

250

page1 = client.beta.vector_stores.list(limit=10)

251

page2 = client.beta.vector_stores.list(limit=10, after=page1.data[-1].id)

252

```

253

254

### Delete Vector Store

255

256

Delete a vector store and all its files.

257

258

```python { .api }

259

def delete(

260

self,

261

vector_store_id: str,

262

*,

263

extra_headers: dict[str, str] | None = None,

264

extra_query: dict[str, object] | None = None,

265

extra_body: dict[str, object] | None = None,

266

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

267

) -> VectorStoreDeleted:

268

"""

269

Delete a vector store.

270

271

Args:

272

vector_store_id: The ID of the vector store to delete.

273

extra_headers: Additional HTTP headers.

274

extra_query: Additional query parameters.

275

extra_body: Additional JSON fields.

276

timeout: Request timeout in seconds.

277

278

Returns:

279

VectorStoreDeleted: Deletion confirmation.

280

281

Raises:

282

NotFoundError: Vector store not found

283

"""

284

```

285

286

Usage example:

287

288

```python

289

# Delete vector store

290

result = client.beta.vector_stores.delete("vs_abc123")

291

292

print(f"Deleted: {result.deleted}")

293

```

294

295

### Add Files to Vector Store

296

297

Add files to an existing vector store.

298

299

```python { .api }

300

def create(

301

self,

302

vector_store_id: str,

303

*,

304

file_id: str,

305

attributes: dict[str, str | float | bool] | None | Omit = omit,

306

chunking_strategy: dict | Omit = omit,

307

extra_headers: dict[str, str] | None = None,

308

extra_query: dict[str, object] | None = None,

309

extra_body: dict[str, object] | None = None,

310

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

311

) -> VectorStoreFile:

312

"""

313

Add a file to a vector store.

314

315

Args:

316

vector_store_id: The vector store ID.

317

file_id: The file ID to add.

318

attributes: Key-value pairs that can be attached to the file (max 16 pairs).

319

Keys: max 64 chars. Values: max 512 chars (strings) or numbers/booleans.

320

Useful for storing metadata like version numbers, categories, etc.

321

chunking_strategy: Chunking configuration (same as vector store create).

322

extra_headers: Additional HTTP headers.

323

extra_query: Additional query parameters.

324

extra_body: Additional JSON fields.

325

timeout: Request timeout in seconds.

326

327

Returns:

328

VectorStoreFile: Added file details.

329

"""

330

```

331

332

Usage example:

333

334

```python

335

# Add file to vector store

336

file = client.beta.vector_stores.files.create(

337

vector_store_id="vs_abc123",

338

file_id="file-xyz789"

339

)

340

341

print(f"File status: {file.status}")

342

```

343

344

### Retrieve Vector Store File

345

346

Get details about a file in a vector store.

347

348

```python { .api }

349

def retrieve(

350

self,

351

file_id: str,

352

*,

353

vector_store_id: str,

354

extra_headers: dict[str, str] | None = None,

355

extra_query: dict[str, object] | None = None,

356

extra_body: dict[str, object] | None = None,

357

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

358

) -> VectorStoreFile:

359

"""

360

Retrieve details about a vector store file.

361

362

Args:

363

file_id: The ID of the file.

364

vector_store_id: The vector store ID.

365

extra_headers: Additional HTTP headers.

366

extra_query: Additional query parameters.

367

extra_body: Additional JSON fields.

368

timeout: Request timeout in seconds.

369

370

Returns:

371

VectorStoreFile: File details including status and metadata.

372

373

Raises:

374

NotFoundError: File not found in vector store

375

"""

376

```

377

378

Usage example:

379

380

```python

381

# Get file details

382

file = client.beta.vector_stores.files.retrieve(

383

file_id="file-xyz789",

384

vector_store_id="vs_abc123"

385

)

386

387

print(f"Status: {file.status}")

388

print(f"Usage bytes: {file.usage_bytes}")

389

```

390

391

### Update Vector Store File

392

393

Update attributes on a vector store file.

394

395

```python { .api }

396

def update(

397

self,

398

file_id: str,

399

*,

400

vector_store_id: str,

401

attributes: dict[str, str | float | bool] | None,

402

extra_headers: dict[str, str] | None = None,

403

extra_query: dict[str, object] | None = None,

404

extra_body: dict[str, object] | None = None,

405

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

406

) -> VectorStoreFile:

407

"""

408

Update attributes on a vector store file.

409

410

Args:

411

file_id: The ID of the file.

412

vector_store_id: The vector store ID.

413

attributes: Key-value pairs to attach (max 16 pairs).

414

Keys: max 64 chars, Values: max 512 chars (or numbers/booleans).

415

extra_headers: Additional HTTP headers.

416

extra_query: Additional query parameters.

417

extra_body: Additional JSON fields.

418

timeout: Request timeout in seconds.

419

420

Returns:

421

VectorStoreFile: Updated file object.

422

"""

423

```

424

425

Usage example:

426

427

```python

428

# Update file attributes

429

file = client.beta.vector_stores.files.update(

430

file_id="file-xyz789",

431

vector_store_id="vs_abc123",

432

attributes={

433

"category": "documentation",

434

"version": "1.2.0",

435

"priority": 5

436

}

437

)

438

```

439

440

### List Vector Store Files

441

442

List all files in a vector store with pagination and filtering.

443

444

```python { .api }

445

def list(

446

self,

447

vector_store_id: str,

448

*,

449

after: str | Omit = omit,

450

before: str | Omit = omit,

451

filter: Literal["in_progress", "completed", "failed", "cancelled"] | Omit = omit,

452

limit: int | Omit = omit,

453

order: Literal["asc", "desc"] | Omit = omit,

454

extra_headers: dict[str, str] | None = None,

455

extra_query: dict[str, object] | None = None,

456

extra_body: dict[str, object] | None = None,

457

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

458

) -> SyncCursorPage[VectorStoreFile]:

459

"""

460

List files in a vector store with optional filtering.

461

462

Args:

463

vector_store_id: The vector store ID.

464

after: Cursor for pagination (object ID to start after).

465

before: Cursor for pagination (object ID to start before).

466

filter: Filter by file status: "in_progress", "completed", "failed", "cancelled".

467

limit: Number of files to return (1-100, default 20).

468

order: Sort order by created_at: "asc" or "desc".

469

extra_headers: Additional HTTP headers.

470

extra_query: Additional query parameters.

471

extra_body: Additional JSON fields.

472

timeout: Request timeout in seconds.

473

474

Returns:

475

SyncCursorPage[VectorStoreFile]: Paginated list of files.

476

"""

477

```

478

479

Usage examples:

480

481

```python

482

# List all files

483

files = client.beta.vector_stores.files.list(

484

vector_store_id="vs_abc123"

485

)

486

487

for file in files:

488

print(f"{file.id}: {file.status}")

489

490

# Filter by status

491

completed_files = client.beta.vector_stores.files.list(

492

vector_store_id="vs_abc123",

493

filter="completed"

494

)

495

496

# Pagination

497

page1 = client.beta.vector_stores.files.list(

498

vector_store_id="vs_abc123",

499

limit=10,

500

order="desc"

501

)

502

503

page2 = client.beta.vector_stores.files.list(

504

vector_store_id="vs_abc123",

505

limit=10,

506

after=page1.data[-1].id

507

)

508

```

509

510

### Delete Vector Store File

511

512

Remove a file from a vector store (does not delete the file itself).

513

514

```python { .api }

515

def delete(

516

self,

517

file_id: str,

518

*,

519

vector_store_id: str,

520

extra_headers: dict[str, str] | None = None,

521

extra_query: dict[str, object] | None = None,

522

extra_body: dict[str, object] | None = None,

523

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

524

) -> VectorStoreFileDeleted:

525

"""

526

Delete a vector store file.

527

528

This removes the file from the vector store but does not delete the file

529

itself. To delete the file, use client.files.delete().

530

531

Args:

532

file_id: The ID of the file.

533

vector_store_id: The vector store ID.

534

extra_headers: Additional HTTP headers.

535

extra_query: Additional query parameters.

536

extra_body: Additional JSON fields.

537

timeout: Request timeout in seconds.

538

539

Returns:

540

VectorStoreFileDeleted: Deletion confirmation.

541

"""

542

```

543

544

Usage example:

545

546

```python

547

# Remove file from vector store

548

deleted = client.beta.vector_stores.files.delete(

549

file_id="file-xyz789",

550

vector_store_id="vs_abc123"

551

)

552

553

print(f"Deleted: {deleted.id}")

554

555

# To also delete the file itself:

556

client.files.delete(file_id="file-xyz789")

557

```

558

559

### Get Vector Store File Content

560

561

Retrieve the parsed contents of a vector store file.

562

563

```python { .api }

564

def content(

565

self,

566

file_id: str,

567

*,

568

vector_store_id: str,

569

extra_headers: dict[str, str] | None = None,

570

extra_query: dict[str, object] | None = None,

571

extra_body: dict[str, object] | None = None,

572

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

573

) -> SyncPage[FileContentResponse]:

574

"""

575

Retrieve the parsed contents of a vector store file.

576

577

Args:

578

file_id: The ID of the file.

579

vector_store_id: The vector store ID.

580

extra_headers: Additional HTTP headers.

581

extra_query: Additional query parameters.

582

extra_body: Additional JSON fields.

583

timeout: Request timeout in seconds.

584

585

Returns:

586

SyncPage[FileContentResponse]: Parsed file content with chunks.

587

"""

588

```

589

590

Usage example:

591

592

```python

593

# Get parsed file content

594

content_pages = client.beta.vector_stores.files.content(

595

file_id="file-xyz789",

596

vector_store_id="vs_abc123"

597

)

598

599

for page in content_pages:

600

print(f"Content: {page.content}")

601

print(f"Metadata: {page.metadata}")

602

```

603

604

### Helper: Create and Poll

605

606

Convenience method that combines create() and poll() - adds a file and waits for processing.

607

608

```python { .api }

609

def create_and_poll(

610

self,

611

file_id: str,

612

*,

613

vector_store_id: str,

614

attributes: dict[str, str | float | bool] | None | Omit = omit,

615

poll_interval_ms: int | Omit = omit,

616

chunking_strategy: dict | Omit = omit,

617

) -> VectorStoreFile:

618

"""

619

Attach a file to the given vector store and wait for it to be processed.

620

621

Args:

622

file_id: The file ID to add.

623

vector_store_id: The vector store ID.

624

attributes: Key-value pairs to attach to the file.

625

poll_interval_ms: Polling interval in milliseconds. If not specified, uses server-suggested interval.

626

chunking_strategy: Chunking configuration.

627

628

Returns:

629

VectorStoreFile: Processed file details (may be completed or failed).

630

"""

631

```

632

633

Usage example:

634

635

```python

636

# Add file and wait for processing

637

file = client.beta.vector_stores.files.create_and_poll(

638

file_id="file-xyz789",

639

vector_store_id="vs_abc123",

640

poll_interval_ms=1000

641

)

642

643

print(f"Final status: {file.status}")

644

if file.status == "failed":

645

print(f"Error: {file.last_error}")

646

```

647

648

### Helper: Poll Processing

649

650

Wait for a vector store file to finish processing.

651

652

```python { .api }

653

def poll(

654

self,

655

file_id: str,

656

*,

657

vector_store_id: str,

658

poll_interval_ms: int | Omit = omit,

659

) -> VectorStoreFile:

660

"""

661

Wait for the vector store file to finish processing.

662

663

Note: this will return even if the file failed to process. Check

664

file.status and file.last_error to handle failures.

665

666

Args:

667

file_id: The file ID.

668

vector_store_id: The vector store ID.

669

poll_interval_ms: Polling interval in milliseconds. If not specified, uses server-suggested interval.

670

671

Returns:

672

VectorStoreFile: File details after processing completes (or fails).

673

"""

674

```

675

676

Usage example:

677

678

```python

679

# First create the file

680

file = client.beta.vector_stores.files.create(

681

file_id="file-xyz789",

682

vector_store_id="vs_abc123"

683

)

684

685

# Then poll until processing completes

686

processed_file = client.beta.vector_stores.files.poll(

687

file_id="file-xyz789",

688

vector_store_id="vs_abc123"

689

)

690

691

print(f"Status: {processed_file.status}")

692

```

693

694

### Helper: Upload and Attach

695

696

Upload a new file to the Files API and attach it to the vector store.

697

698

```python { .api }

699

def upload(

700

self,

701

*,

702

vector_store_id: str,

703

file: FileTypes,

704

chunking_strategy: dict | Omit = omit,

705

) -> VectorStoreFile:

706

"""

707

Upload a file to the Files API and attach it to the given vector store.

708

709

Note: The file will be asynchronously processed. Use upload_and_poll()

710

to wait for processing to complete.

711

712

Args:

713

vector_store_id: The vector store ID.

714

file: File to upload (path, file object, or bytes).

715

chunking_strategy: Chunking configuration.

716

717

Returns:

718

VectorStoreFile: File details (status will be "in_progress").

719

"""

720

```

721

722

Usage example:

723

724

```python

725

# Upload and attach file

726

with open("document.pdf", "rb") as f:

727

file = client.beta.vector_stores.files.upload(

728

vector_store_id="vs_abc123",

729

file=f

730

)

731

732

print(f"Uploaded file ID: {file.id}")

733

print(f"Status: {file.status}")

734

```

735

736

### Helper: Upload and Poll

737

738

Complete workflow - upload a file, attach to vector store, and wait for processing.

739

740

```python { .api }

741

def upload_and_poll(

742

self,

743

*,

744

vector_store_id: str,

745

file: FileTypes,

746

attributes: dict[str, str | float | bool] | None | Omit = omit,

747

poll_interval_ms: int | Omit = omit,

748

chunking_strategy: dict | Omit = omit,

749

) -> VectorStoreFile:

750

"""

751

Upload a file and poll until processing is complete.

752

753

This is the most convenient method for adding files - it handles

754

the upload, attachment, and waiting in one call.

755

756

Args:

757

vector_store_id: The vector store ID.

758

file: File to upload (path, file object, or bytes).

759

attributes: Key-value pairs to attach to the file.

760

poll_interval_ms: Polling interval in milliseconds.

761

chunking_strategy: Chunking configuration.

762

763

Returns:

764

VectorStoreFile: Processed file details (may be completed or failed).

765

"""

766

```

767

768

Usage example:

769

770

```python

771

# Complete workflow in one call

772

with open("document.pdf", "rb") as f:

773

file = client.beta.vector_stores.files.upload_and_poll(

774

vector_store_id="vs_abc123",

775

file=f,

776

attributes={"type": "documentation", "version": "2.0"},

777

poll_interval_ms=1000

778

)

779

780

print(f"File ID: {file.id}")

781

print(f"Status: {file.status}")

782

print(f"Usage bytes: {file.usage_bytes}")

783

784

if file.status == "failed":

785

print(f"Error: {file.last_error}")

786

```

787

788

### Search Vector Store

789

790

Search for relevant content in a vector store based on a query and optional file attributes filter.

791

792

```python { .api }

793

def search(

794

self,

795

vector_store_id: str,

796

*,

797

query: str | list[str],

798

filters: dict | Omit = omit,

799

max_num_results: int | Omit = omit,

800

ranking_options: dict | Omit = omit,

801

rewrite_query: bool | Omit = omit,

802

extra_headers: dict[str, str] | None = None,

803

extra_query: dict[str, object] | None = None,

804

extra_body: dict[str, object] | None = None,

805

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

806

) -> SyncPage[VectorStoreSearchResponse]:

807

"""

808

Search vector store for relevant content.

809

810

Args:

811

vector_store_id: The vector store ID.

812

query: Search query text (string or list of strings).

813

filters: A filter to apply based on file attributes.

814

max_num_results: Maximum number of results to return (1-50 inclusive).

815

ranking_options: Ranking options for search.

816

rewrite_query: Whether to rewrite the natural language query for vector search.

817

extra_headers: Additional HTTP headers.

818

extra_query: Additional query parameters.

819

extra_body: Additional JSON fields.

820

timeout: Request timeout in seconds.

821

822

Returns:

823

SyncPage[VectorStoreSearchResponse]: Paginated search results with relevant chunks.

824

"""

825

```

826

827

Usage example:

828

829

```python

830

# Search vector store

831

results = client.beta.vector_stores.search(

832

vector_store_id="vs_abc123",

833

query="How do I install the SDK?",

834

max_num_results=5

835

)

836

837

for result in results.data:

838

print(f"Score: {result.score}")

839

print(f"Content: {result.content}")

840

print(f"File: {result.file_id}")

841

```

842

843

### File Batches

844

845

Batch operations for adding multiple files to a vector store efficiently. Accessed via `client.beta.vector_stores.file_batches`.

846

847

```python { .api }

848

def create(

849

self,

850

vector_store_id: str,

851

*,

852

file_ids: list[str] | Omit = omit,

853

files: list[dict] | Omit = omit,

854

attributes: dict[str, str | float | bool] | None | Omit = omit,

855

chunking_strategy: dict | Omit = omit,

856

extra_headers: dict[str, str] | None = None,

857

extra_query: dict[str, object] | None = None,

858

extra_body: dict[str, object] | None = None,

859

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

860

) -> VectorStoreFileBatch:

861

"""

862

Create a batch of files to add to vector store.

863

864

Args:

865

vector_store_id: The vector store ID.

866

file_ids: List of file IDs to add (mutually exclusive with files).

867

files: List of file objects with per-file metadata (mutually exclusive with file_ids).

868

attributes: Metadata to apply to all files in batch.

869

chunking_strategy: Strategy for chunking files.

870

extra_headers: Additional HTTP headers.

871

extra_query: Additional query parameters.

872

extra_body: Additional JSON fields.

873

timeout: Request timeout in seconds.

874

875

Returns:

876

VectorStoreFileBatch: Created batch object.

877

"""

878

879

def retrieve(

880

self,

881

batch_id: str,

882

*,

883

vector_store_id: str,

884

extra_headers: dict[str, str] | None = None,

885

extra_query: dict[str, object] | None = None,

886

extra_body: dict[str, object] | None = None,

887

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

888

) -> VectorStoreFileBatch:

889

"""

890

Retrieve file batch status.

891

892

Args:

893

batch_id: The file batch ID.

894

vector_store_id: The vector store ID.

895

extra_headers: Additional HTTP headers.

896

extra_query: Additional query parameters.

897

extra_body: Additional JSON fields.

898

timeout: Request timeout in seconds.

899

900

Returns:

901

VectorStoreFileBatch: Batch details.

902

"""

903

904

def cancel(

905

self,

906

batch_id: str,

907

*,

908

vector_store_id: str,

909

extra_headers: dict[str, str] | None = None,

910

extra_query: dict[str, object] | None = None,

911

extra_body: dict[str, object] | None = None,

912

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

913

) -> VectorStoreFileBatch:

914

"""

915

Cancel an in-progress file batch.

916

917

Args:

918

batch_id: The file batch ID.

919

vector_store_id: The vector store ID.

920

extra_headers: Additional HTTP headers.

921

extra_query: Additional query parameters.

922

extra_body: Additional JSON fields.

923

timeout: Request timeout in seconds.

924

925

Returns:

926

VectorStoreFileBatch: Updated batch with cancelled status.

927

"""

928

```

929

930

Usage examples:

931

932

```python

933

# Create file batch

934

batch = client.beta.vector_stores.file_batches.create(

935

vector_store_id="vs_abc123",

936

file_ids=["file-1", "file-2", "file-3"]

937

)

938

939

print(f"Batch ID: {batch.id}")

940

print(f"Status: {batch.status}")

941

942

# Check batch status

943

batch = client.beta.vector_stores.file_batches.retrieve(

944

batch_id=batch.id,

945

vector_store_id="vs_abc123"

946

)

947

948

# Cancel batch if needed

949

batch = client.beta.vector_stores.file_batches.cancel(

950

batch_id=batch.id,

951

vector_store_id="vs_abc123"

952

)

953

```

954

955

## Types

956

957

```python { .api }

958

from typing import Literal

959

from pydantic import BaseModel

960

961

class VectorStore(BaseModel):

962

"""Vector store for file search."""

963

id: str

964

created_at: int

965

name: str

966

usage_bytes: int

967

file_counts: FileCounts

968

status: Literal["expired", "in_progress", "completed"]

969

expires_after: dict | None

970

expires_at: int | None

971

last_active_at: int | None

972

metadata: dict[str, str] | None

973

object: Literal["vector_store"]

974

975

class FileCounts(BaseModel):

976

"""File count statistics."""

977

in_progress: int

978

completed: int

979

failed: int

980

cancelled: int

981

total: int

982

983

class VectorStoreDeleted(BaseModel):

984

"""Deletion confirmation."""

985

id: str

986

deleted: bool

987

object: Literal["vector_store.deleted"]

988

989

class VectorStoreFile(BaseModel):

990

"""File in vector store."""

991

id: str

992

created_at: int

993

vector_store_id: str

994

usage_bytes: int

995

status: Literal["in_progress", "completed", "cancelled", "failed"]

996

last_error: dict | None

997

chunking_strategy: dict | None

998

object: Literal["vector_store.file"]

999

1000

class VectorStoreFileBatch(BaseModel):

1001

"""Batch of files being added to vector store."""

1002

id: str

1003

created_at: int

1004

vector_store_id: str

1005

status: Literal["in_progress", "completed", "cancelled", "failed"]

1006

file_counts: FileCounts

1007

object: Literal["vector_store.files_batch"]

1008

1009

class VectorStoreSearchResponse(BaseModel):

1010

"""Search results."""

1011

data: list[SearchResult]

1012

object: str

1013

1014

class SearchResult(BaseModel):

1015

"""Single search result."""

1016

content: str

1017

file_id: str

1018

score: float

1019

metadata: dict | None

1020

```

1021

1022

## Best Practices

1023

1024

```python

1025

from openai import OpenAI

1026

1027

client = OpenAI()

1028

1029

# 1. Create vector store with appropriate files

1030

# Upload files first

1031

file_ids = []

1032

for doc_path in ["doc1.pdf", "doc2.txt", "doc3.md"]:

1033

with open(doc_path, "rb") as f:

1034

file = client.files.create(file=f, purpose="assistants")

1035

file_ids.append(file.id)

1036

1037

# Create vector store

1038

store = client.beta.vector_stores.create(

1039

name="Product Documentation",

1040

file_ids=file_ids

1041

)

1042

1043

# 2. Wait for processing

1044

import time

1045

1046

while store.status == "in_progress":

1047

time.sleep(2)

1048

store = client.beta.vector_stores.retrieve(store.id)

1049

1050

print(f"Status: {store.status}")

1051

print(f"Completed files: {store.file_counts.completed}")

1052

1053

# 3. Use with Assistant

1054

assistant = client.beta.assistants.create(

1055

name="Documentation Assistant",

1056

instructions="Help users find information in documentation.",

1057

model="gpt-4",

1058

tools=[{"type": "file_search"}],

1059

tool_resources={

1060

"file_search": {

1061

"vector_store_ids": [store.id]

1062

}

1063

}

1064

)

1065

1066

# 4. Clean up expired stores

1067

stores = client.beta.vector_stores.list()

1068

for store in stores:

1069

if store.status == "expired":

1070

client.beta.vector_stores.delete(store.id)

1071

```

1072

1073

## Async Usage

1074

1075

```python

1076

import asyncio

1077

from openai import AsyncOpenAI

1078

1079

async def create_store():

1080

client = AsyncOpenAI()

1081

1082

store = await client.beta.vector_stores.create(

1083

name="Async Store",

1084

file_ids=["file-abc123"]

1085

)

1086

1087

return store.id

1088

1089

store_id = asyncio.run(create_store())

1090

```

1091