docs
0
# Vector Stores
1
2
Create and manage vector stores for semantic search and retrieval with the Assistants API. Vector stores enable file search capabilities by storing and indexing documents for efficient retrieval.
3
4
## Capabilities
5
6
### Create Vector Store
7
8
Create a new vector store for storing and searching documents.
9
10
```python { .api }
11
def create(
12
self,
13
*,
14
chunking_strategy: dict | Omit = omit,
15
description: str | Omit = omit,
16
expires_after: dict | Omit = omit,
17
file_ids: list[str] | Omit = omit,
18
metadata: dict[str, str] | Omit = omit,
19
name: str | Omit = omit,
20
extra_headers: dict[str, str] | None = None,
21
extra_query: dict[str, object] | None = None,
22
extra_body: dict[str, object] | None = None,
23
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
24
) -> VectorStore:
25
"""
26
Create a vector store for file search.
27
28
Args:
29
chunking_strategy: How to chunk files. Options:
30
- {"type": "auto"}: Automatic chunking (default)
31
- {"type": "static", "static": {"max_chunk_size_tokens": 800, "chunk_overlap_tokens": 400}}
32
33
description: Description of the vector store (optional).
34
35
expires_after: Expiration policy. Options:
36
- {"anchor": "last_active_at", "days": 7}: Expires 7 days after last use
37
- {"anchor": "last_active_at", "days": 1}: Expires 1 day after last use
38
39
file_ids: List of file IDs to add to the vector store (max 10000).
40
Files must have purpose="assistants".
41
42
metadata: Key-value pairs for storing additional info (max 16).
43
Keys max 64 chars, values max 512 chars.
44
45
name: Name of the vector store (optional).
46
47
extra_headers: Additional HTTP headers.
48
extra_query: Additional query parameters.
49
extra_body: Additional JSON fields.
50
timeout: Request timeout in seconds.
51
52
Returns:
53
VectorStore: Created vector store.
54
55
Raises:
56
BadRequestError: Invalid parameters or too many files
57
"""
58
```
59
60
Usage examples:
61
62
```python
63
from openai import OpenAI
64
65
client = OpenAI()
66
67
# Create empty vector store
68
vector_store = client.beta.vector_stores.create(
69
name="Product Documentation"
70
)
71
72
print(f"Vector Store ID: {vector_store.id}")
73
74
# Create with files
75
file_ids = ["file-abc123", "file-def456"]
76
77
vector_store = client.beta.vector_stores.create(
78
name="Knowledge Base",
79
file_ids=file_ids
80
)
81
82
# With expiration policy
83
vector_store = client.beta.vector_stores.create(
84
name="Temporary Store",
85
expires_after={"anchor": "last_active_at", "days": 7}
86
)
87
88
# With custom chunking
89
vector_store = client.beta.vector_stores.create(
90
name="Custom Chunking",
91
file_ids=file_ids,
92
chunking_strategy={
93
"type": "static",
94
"static": {
95
"max_chunk_size_tokens": 800,
96
"chunk_overlap_tokens": 400
97
}
98
}
99
)
100
101
# With metadata
102
vector_store = client.beta.vector_stores.create(
103
name="Project Docs",
104
metadata={
105
"project": "alpha",
106
"version": "1.0"
107
}
108
)
109
```
110
111
### Retrieve Vector Store
112
113
Get vector store details.
114
115
```python { .api }
116
def retrieve(
117
self,
118
vector_store_id: str,
119
*,
120
extra_headers: dict[str, str] | None = None,
121
extra_query: dict[str, object] | None = None,
122
extra_body: dict[str, object] | None = None,
123
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
124
) -> VectorStore:
125
"""
126
Retrieve vector store details.
127
128
Args:
129
vector_store_id: The ID of the vector store.
130
extra_headers: Additional HTTP headers.
131
extra_query: Additional query parameters.
132
extra_body: Additional JSON fields.
133
timeout: Request timeout in seconds.
134
135
Returns:
136
VectorStore: Vector store details.
137
138
Raises:
139
NotFoundError: Vector store not found
140
"""
141
```
142
143
Usage example:
144
145
```python
146
# Get vector store
147
store = client.beta.vector_stores.retrieve("vs_abc123")
148
149
print(f"Name: {store.name}")
150
print(f"File counts: {store.file_counts}")
151
print(f"Status: {store.status}")
152
```
153
154
### Update Vector Store
155
156
Modify vector store settings.
157
158
```python { .api }
159
def update(
160
self,
161
vector_store_id: str,
162
*,
163
name: str | Omit = omit,
164
expires_after: dict | Omit = omit,
165
metadata: dict[str, str] | Omit = omit,
166
extra_headers: dict[str, str] | None = None,
167
extra_query: dict[str, object] | None = None,
168
extra_body: dict[str, object] | None = None,
169
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
170
) -> VectorStore:
171
"""
172
Update vector store properties.
173
174
Args:
175
vector_store_id: The ID of the vector store.
176
name: New name for the vector store.
177
expires_after: New expiration policy.
178
metadata: New metadata (replaces existing).
179
extra_headers: Additional HTTP headers.
180
extra_query: Additional query parameters.
181
extra_body: Additional JSON fields.
182
timeout: Request timeout in seconds.
183
184
Returns:
185
VectorStore: Updated vector store.
186
"""
187
```
188
189
Usage example:
190
191
```python
192
# Update name
193
store = client.beta.vector_stores.update(
194
"vs_abc123",
195
name="Updated Documentation"
196
)
197
198
# Update metadata
199
store = client.beta.vector_stores.update(
200
"vs_abc123",
201
metadata={"version": "2.0"}
202
)
203
```
204
205
### List Vector Stores
206
207
List all vector stores with pagination.
208
209
```python { .api }
210
def list(
211
self,
212
*,
213
after: str | Omit = omit,
214
before: str | Omit = omit,
215
limit: int | Omit = omit,
216
order: Literal["asc", "desc"] | Omit = omit,
217
extra_headers: dict[str, str] | None = None,
218
extra_query: dict[str, object] | None = None,
219
extra_body: dict[str, object] | None = None,
220
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
221
) -> SyncCursorPage[VectorStore]:
222
"""
223
List vector stores with pagination.
224
225
Args:
226
after: Cursor for next page.
227
before: Cursor for previous page.
228
limit: Number to retrieve (max 100). Default 20.
229
order: Sort order. "asc" or "desc". Default "desc".
230
extra_headers: Additional HTTP headers.
231
extra_query: Additional query parameters.
232
extra_body: Additional JSON fields.
233
timeout: Request timeout in seconds.
234
235
Returns:
236
SyncCursorPage[VectorStore]: Paginated list of vector stores.
237
"""
238
```
239
240
Usage example:
241
242
```python
243
# List all stores
244
stores = client.beta.vector_stores.list()
245
246
for store in stores:
247
print(f"{store.name} ({store.id})")
248
249
# Pagination
250
page1 = client.beta.vector_stores.list(limit=10)
251
page2 = client.beta.vector_stores.list(limit=10, after=page1.data[-1].id)
252
```
253
254
### Delete Vector Store
255
256
Delete a vector store and all its files.
257
258
```python { .api }
259
def delete(
260
self,
261
vector_store_id: str,
262
*,
263
extra_headers: dict[str, str] | None = None,
264
extra_query: dict[str, object] | None = None,
265
extra_body: dict[str, object] | None = None,
266
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
267
) -> VectorStoreDeleted:
268
"""
269
Delete a vector store.
270
271
Args:
272
vector_store_id: The ID of the vector store to delete.
273
extra_headers: Additional HTTP headers.
274
extra_query: Additional query parameters.
275
extra_body: Additional JSON fields.
276
timeout: Request timeout in seconds.
277
278
Returns:
279
VectorStoreDeleted: Deletion confirmation.
280
281
Raises:
282
NotFoundError: Vector store not found
283
"""
284
```
285
286
Usage example:
287
288
```python
289
# Delete vector store
290
result = client.beta.vector_stores.delete("vs_abc123")
291
292
print(f"Deleted: {result.deleted}")
293
```
294
295
### Add Files to Vector Store
296
297
Add files to an existing vector store.
298
299
```python { .api }
300
def create(
301
self,
302
vector_store_id: str,
303
*,
304
file_id: str,
305
attributes: dict[str, str | float | bool] | None | Omit = omit,
306
chunking_strategy: dict | Omit = omit,
307
extra_headers: dict[str, str] | None = None,
308
extra_query: dict[str, object] | None = None,
309
extra_body: dict[str, object] | None = None,
310
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
311
) -> VectorStoreFile:
312
"""
313
Add a file to a vector store.
314
315
Args:
316
vector_store_id: The vector store ID.
317
file_id: The file ID to add.
318
attributes: Key-value pairs that can be attached to the file (max 16 pairs).
319
Keys: max 64 chars. Values: max 512 chars (strings) or numbers/booleans.
320
Useful for storing metadata like version numbers, categories, etc.
321
chunking_strategy: Chunking configuration (same as vector store create).
322
extra_headers: Additional HTTP headers.
323
extra_query: Additional query parameters.
324
extra_body: Additional JSON fields.
325
timeout: Request timeout in seconds.
326
327
Returns:
328
VectorStoreFile: Added file details.
329
"""
330
```
331
332
Usage example:
333
334
```python
335
# Add file to vector store
336
file = client.beta.vector_stores.files.create(
337
vector_store_id="vs_abc123",
338
file_id="file-xyz789"
339
)
340
341
print(f"File status: {file.status}")
342
```
343
344
### Retrieve Vector Store File
345
346
Get details about a file in a vector store.
347
348
```python { .api }
349
def retrieve(
350
self,
351
file_id: str,
352
*,
353
vector_store_id: str,
354
extra_headers: dict[str, str] | None = None,
355
extra_query: dict[str, object] | None = None,
356
extra_body: dict[str, object] | None = None,
357
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
358
) -> VectorStoreFile:
359
"""
360
Retrieve details about a vector store file.
361
362
Args:
363
file_id: The ID of the file.
364
vector_store_id: The vector store ID.
365
extra_headers: Additional HTTP headers.
366
extra_query: Additional query parameters.
367
extra_body: Additional JSON fields.
368
timeout: Request timeout in seconds.
369
370
Returns:
371
VectorStoreFile: File details including status and metadata.
372
373
Raises:
374
NotFoundError: File not found in vector store
375
"""
376
```
377
378
Usage example:
379
380
```python
381
# Get file details
382
file = client.beta.vector_stores.files.retrieve(
383
file_id="file-xyz789",
384
vector_store_id="vs_abc123"
385
)
386
387
print(f"Status: {file.status}")
388
print(f"Usage bytes: {file.usage_bytes}")
389
```
390
391
### Update Vector Store File
392
393
Update attributes on a vector store file.
394
395
```python { .api }
396
def update(
397
self,
398
file_id: str,
399
*,
400
vector_store_id: str,
401
attributes: dict[str, str | float | bool] | None,
402
extra_headers: dict[str, str] | None = None,
403
extra_query: dict[str, object] | None = None,
404
extra_body: dict[str, object] | None = None,
405
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
406
) -> VectorStoreFile:
407
"""
408
Update attributes on a vector store file.
409
410
Args:
411
file_id: The ID of the file.
412
vector_store_id: The vector store ID.
413
attributes: Key-value pairs to attach (max 16 pairs).
414
Keys: max 64 chars, Values: max 512 chars (or numbers/booleans).
415
extra_headers: Additional HTTP headers.
416
extra_query: Additional query parameters.
417
extra_body: Additional JSON fields.
418
timeout: Request timeout in seconds.
419
420
Returns:
421
VectorStoreFile: Updated file object.
422
"""
423
```
424
425
Usage example:
426
427
```python
428
# Update file attributes
429
file = client.beta.vector_stores.files.update(
430
file_id="file-xyz789",
431
vector_store_id="vs_abc123",
432
attributes={
433
"category": "documentation",
434
"version": "1.2.0",
435
"priority": 5
436
}
437
)
438
```
439
440
### List Vector Store Files
441
442
List all files in a vector store with pagination and filtering.
443
444
```python { .api }
445
def list(
446
self,
447
vector_store_id: str,
448
*,
449
after: str | Omit = omit,
450
before: str | Omit = omit,
451
filter: Literal["in_progress", "completed", "failed", "cancelled"] | Omit = omit,
452
limit: int | Omit = omit,
453
order: Literal["asc", "desc"] | Omit = omit,
454
extra_headers: dict[str, str] | None = None,
455
extra_query: dict[str, object] | None = None,
456
extra_body: dict[str, object] | None = None,
457
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
458
) -> SyncCursorPage[VectorStoreFile]:
459
"""
460
List files in a vector store with optional filtering.
461
462
Args:
463
vector_store_id: The vector store ID.
464
after: Cursor for pagination (object ID to start after).
465
before: Cursor for pagination (object ID to start before).
466
filter: Filter by file status: "in_progress", "completed", "failed", "cancelled".
467
limit: Number of files to return (1-100, default 20).
468
order: Sort order by created_at: "asc" or "desc".
469
extra_headers: Additional HTTP headers.
470
extra_query: Additional query parameters.
471
extra_body: Additional JSON fields.
472
timeout: Request timeout in seconds.
473
474
Returns:
475
SyncCursorPage[VectorStoreFile]: Paginated list of files.
476
"""
477
```
478
479
Usage examples:
480
481
```python
482
# List all files
483
files = client.beta.vector_stores.files.list(
484
vector_store_id="vs_abc123"
485
)
486
487
for file in files:
488
print(f"{file.id}: {file.status}")
489
490
# Filter by status
491
completed_files = client.beta.vector_stores.files.list(
492
vector_store_id="vs_abc123",
493
filter="completed"
494
)
495
496
# Pagination
497
page1 = client.beta.vector_stores.files.list(
498
vector_store_id="vs_abc123",
499
limit=10,
500
order="desc"
501
)
502
503
page2 = client.beta.vector_stores.files.list(
504
vector_store_id="vs_abc123",
505
limit=10,
506
after=page1.data[-1].id
507
)
508
```
509
510
### Delete Vector Store File
511
512
Remove a file from a vector store (does not delete the file itself).
513
514
```python { .api }
515
def delete(
516
self,
517
file_id: str,
518
*,
519
vector_store_id: str,
520
extra_headers: dict[str, str] | None = None,
521
extra_query: dict[str, object] | None = None,
522
extra_body: dict[str, object] | None = None,
523
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
524
) -> VectorStoreFileDeleted:
525
"""
526
Delete a vector store file.
527
528
This removes the file from the vector store but does not delete the file
529
itself. To delete the file, use client.files.delete().
530
531
Args:
532
file_id: The ID of the file.
533
vector_store_id: The vector store ID.
534
extra_headers: Additional HTTP headers.
535
extra_query: Additional query parameters.
536
extra_body: Additional JSON fields.
537
timeout: Request timeout in seconds.
538
539
Returns:
540
VectorStoreFileDeleted: Deletion confirmation.
541
"""
542
```
543
544
Usage example:
545
546
```python
547
# Remove file from vector store
548
deleted = client.beta.vector_stores.files.delete(
549
file_id="file-xyz789",
550
vector_store_id="vs_abc123"
551
)
552
553
print(f"Deleted: {deleted.id}")
554
555
# To also delete the file itself:
556
client.files.delete(file_id="file-xyz789")
557
```
558
559
### Get Vector Store File Content
560
561
Retrieve the parsed contents of a vector store file.
562
563
```python { .api }
564
def content(
565
self,
566
file_id: str,
567
*,
568
vector_store_id: str,
569
extra_headers: dict[str, str] | None = None,
570
extra_query: dict[str, object] | None = None,
571
extra_body: dict[str, object] | None = None,
572
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
573
) -> SyncPage[FileContentResponse]:
574
"""
575
Retrieve the parsed contents of a vector store file.
576
577
Args:
578
file_id: The ID of the file.
579
vector_store_id: The vector store ID.
580
extra_headers: Additional HTTP headers.
581
extra_query: Additional query parameters.
582
extra_body: Additional JSON fields.
583
timeout: Request timeout in seconds.
584
585
Returns:
586
SyncPage[FileContentResponse]: Parsed file content with chunks.
587
"""
588
```
589
590
Usage example:
591
592
```python
593
# Get parsed file content
594
content_pages = client.beta.vector_stores.files.content(
595
file_id="file-xyz789",
596
vector_store_id="vs_abc123"
597
)
598
599
for page in content_pages:
600
print(f"Content: {page.content}")
601
print(f"Metadata: {page.metadata}")
602
```
603
604
### Helper: Create and Poll
605
606
Convenience method that combines create() and poll() - adds a file and waits for processing.
607
608
```python { .api }
609
def create_and_poll(
610
self,
611
file_id: str,
612
*,
613
vector_store_id: str,
614
attributes: dict[str, str | float | bool] | None | Omit = omit,
615
poll_interval_ms: int | Omit = omit,
616
chunking_strategy: dict | Omit = omit,
617
) -> VectorStoreFile:
618
"""
619
Attach a file to the given vector store and wait for it to be processed.
620
621
Args:
622
file_id: The file ID to add.
623
vector_store_id: The vector store ID.
624
attributes: Key-value pairs to attach to the file.
625
poll_interval_ms: Polling interval in milliseconds. If not specified, uses server-suggested interval.
626
chunking_strategy: Chunking configuration.
627
628
Returns:
629
VectorStoreFile: Processed file details (may be completed or failed).
630
"""
631
```
632
633
Usage example:
634
635
```python
636
# Add file and wait for processing
637
file = client.beta.vector_stores.files.create_and_poll(
638
file_id="file-xyz789",
639
vector_store_id="vs_abc123",
640
poll_interval_ms=1000
641
)
642
643
print(f"Final status: {file.status}")
644
if file.status == "failed":
645
print(f"Error: {file.last_error}")
646
```
647
648
### Helper: Poll Processing
649
650
Wait for a vector store file to finish processing.
651
652
```python { .api }
653
def poll(
654
self,
655
file_id: str,
656
*,
657
vector_store_id: str,
658
poll_interval_ms: int | Omit = omit,
659
) -> VectorStoreFile:
660
"""
661
Wait for the vector store file to finish processing.
662
663
Note: this will return even if the file failed to process. Check
664
file.status and file.last_error to handle failures.
665
666
Args:
667
file_id: The file ID.
668
vector_store_id: The vector store ID.
669
poll_interval_ms: Polling interval in milliseconds. If not specified, uses server-suggested interval.
670
671
Returns:
672
VectorStoreFile: File details after processing completes (or fails).
673
"""
674
```
675
676
Usage example:
677
678
```python
679
# First create the file
680
file = client.beta.vector_stores.files.create(
681
file_id="file-xyz789",
682
vector_store_id="vs_abc123"
683
)
684
685
# Then poll until processing completes
686
processed_file = client.beta.vector_stores.files.poll(
687
file_id="file-xyz789",
688
vector_store_id="vs_abc123"
689
)
690
691
print(f"Status: {processed_file.status}")
692
```
693
694
### Helper: Upload and Attach
695
696
Upload a new file to the Files API and attach it to the vector store.
697
698
```python { .api }
699
def upload(
700
self,
701
*,
702
vector_store_id: str,
703
file: FileTypes,
704
chunking_strategy: dict | Omit = omit,
705
) -> VectorStoreFile:
706
"""
707
Upload a file to the Files API and attach it to the given vector store.
708
709
Note: The file will be asynchronously processed. Use upload_and_poll()
710
to wait for processing to complete.
711
712
Args:
713
vector_store_id: The vector store ID.
714
file: File to upload (path, file object, or bytes).
715
chunking_strategy: Chunking configuration.
716
717
Returns:
718
VectorStoreFile: File details (status will be "in_progress").
719
"""
720
```
721
722
Usage example:
723
724
```python
725
# Upload and attach file
726
with open("document.pdf", "rb") as f:
727
file = client.beta.vector_stores.files.upload(
728
vector_store_id="vs_abc123",
729
file=f
730
)
731
732
print(f"Uploaded file ID: {file.id}")
733
print(f"Status: {file.status}")
734
```
735
736
### Helper: Upload and Poll
737
738
Complete workflow - upload a file, attach to vector store, and wait for processing.
739
740
```python { .api }
741
def upload_and_poll(
742
self,
743
*,
744
vector_store_id: str,
745
file: FileTypes,
746
attributes: dict[str, str | float | bool] | None | Omit = omit,
747
poll_interval_ms: int | Omit = omit,
748
chunking_strategy: dict | Omit = omit,
749
) -> VectorStoreFile:
750
"""
751
Upload a file and poll until processing is complete.
752
753
This is the most convenient method for adding files - it handles
754
the upload, attachment, and waiting in one call.
755
756
Args:
757
vector_store_id: The vector store ID.
758
file: File to upload (path, file object, or bytes).
759
attributes: Key-value pairs to attach to the file.
760
poll_interval_ms: Polling interval in milliseconds.
761
chunking_strategy: Chunking configuration.
762
763
Returns:
764
VectorStoreFile: Processed file details (may be completed or failed).
765
"""
766
```
767
768
Usage example:
769
770
```python
771
# Complete workflow in one call
772
with open("document.pdf", "rb") as f:
773
file = client.beta.vector_stores.files.upload_and_poll(
774
vector_store_id="vs_abc123",
775
file=f,
776
attributes={"type": "documentation", "version": "2.0"},
777
poll_interval_ms=1000
778
)
779
780
print(f"File ID: {file.id}")
781
print(f"Status: {file.status}")
782
print(f"Usage bytes: {file.usage_bytes}")
783
784
if file.status == "failed":
785
print(f"Error: {file.last_error}")
786
```
787
788
### Search Vector Store
789
790
Search for relevant content in a vector store based on a query and optional file attributes filter.
791
792
```python { .api }
793
def search(
794
self,
795
vector_store_id: str,
796
*,
797
query: str | list[str],
798
filters: dict | Omit = omit,
799
max_num_results: int | Omit = omit,
800
ranking_options: dict | Omit = omit,
801
rewrite_query: bool | Omit = omit,
802
extra_headers: dict[str, str] | None = None,
803
extra_query: dict[str, object] | None = None,
804
extra_body: dict[str, object] | None = None,
805
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
806
) -> SyncPage[VectorStoreSearchResponse]:
807
"""
808
Search vector store for relevant content.
809
810
Args:
811
vector_store_id: The vector store ID.
812
query: Search query text (string or list of strings).
813
filters: A filter to apply based on file attributes.
814
max_num_results: Maximum number of results to return (1-50 inclusive).
815
ranking_options: Ranking options for search.
816
rewrite_query: Whether to rewrite the natural language query for vector search.
817
extra_headers: Additional HTTP headers.
818
extra_query: Additional query parameters.
819
extra_body: Additional JSON fields.
820
timeout: Request timeout in seconds.
821
822
Returns:
823
SyncPage[VectorStoreSearchResponse]: Paginated search results with relevant chunks.
824
"""
825
```
826
827
Usage example:
828
829
```python
830
# Search vector store
831
results = client.beta.vector_stores.search(
832
vector_store_id="vs_abc123",
833
query="How do I install the SDK?",
834
max_num_results=5
835
)
836
837
for result in results.data:
838
print(f"Score: {result.score}")
839
print(f"Content: {result.content}")
840
print(f"File: {result.file_id}")
841
```
842
843
### File Batches
844
845
Batch operations for adding multiple files to a vector store efficiently. Accessed via `client.beta.vector_stores.file_batches`.
846
847
```python { .api }
848
def create(
849
self,
850
vector_store_id: str,
851
*,
852
file_ids: list[str] | Omit = omit,
853
files: list[dict] | Omit = omit,
854
attributes: dict[str, str | float | bool] | None | Omit = omit,
855
chunking_strategy: dict | Omit = omit,
856
extra_headers: dict[str, str] | None = None,
857
extra_query: dict[str, object] | None = None,
858
extra_body: dict[str, object] | None = None,
859
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
860
) -> VectorStoreFileBatch:
861
"""
862
Create a batch of files to add to vector store.
863
864
Args:
865
vector_store_id: The vector store ID.
866
file_ids: List of file IDs to add (mutually exclusive with files).
867
files: List of file objects with per-file metadata (mutually exclusive with file_ids).
868
attributes: Metadata to apply to all files in batch.
869
chunking_strategy: Strategy for chunking files.
870
extra_headers: Additional HTTP headers.
871
extra_query: Additional query parameters.
872
extra_body: Additional JSON fields.
873
timeout: Request timeout in seconds.
874
875
Returns:
876
VectorStoreFileBatch: Created batch object.
877
"""
878
879
def retrieve(
880
self,
881
batch_id: str,
882
*,
883
vector_store_id: str,
884
extra_headers: dict[str, str] | None = None,
885
extra_query: dict[str, object] | None = None,
886
extra_body: dict[str, object] | None = None,
887
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
888
) -> VectorStoreFileBatch:
889
"""
890
Retrieve file batch status.
891
892
Args:
893
batch_id: The file batch ID.
894
vector_store_id: The vector store ID.
895
extra_headers: Additional HTTP headers.
896
extra_query: Additional query parameters.
897
extra_body: Additional JSON fields.
898
timeout: Request timeout in seconds.
899
900
Returns:
901
VectorStoreFileBatch: Batch details.
902
"""
903
904
def cancel(
905
self,
906
batch_id: str,
907
*,
908
vector_store_id: str,
909
extra_headers: dict[str, str] | None = None,
910
extra_query: dict[str, object] | None = None,
911
extra_body: dict[str, object] | None = None,
912
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
913
) -> VectorStoreFileBatch:
914
"""
915
Cancel an in-progress file batch.
916
917
Args:
918
batch_id: The file batch ID.
919
vector_store_id: The vector store ID.
920
extra_headers: Additional HTTP headers.
921
extra_query: Additional query parameters.
922
extra_body: Additional JSON fields.
923
timeout: Request timeout in seconds.
924
925
Returns:
926
VectorStoreFileBatch: Updated batch with cancelled status.
927
"""
928
```
929
930
Usage examples:
931
932
```python
933
# Create file batch
934
batch = client.beta.vector_stores.file_batches.create(
935
vector_store_id="vs_abc123",
936
file_ids=["file-1", "file-2", "file-3"]
937
)
938
939
print(f"Batch ID: {batch.id}")
940
print(f"Status: {batch.status}")
941
942
# Check batch status
943
batch = client.beta.vector_stores.file_batches.retrieve(
944
batch_id=batch.id,
945
vector_store_id="vs_abc123"
946
)
947
948
# Cancel batch if needed
949
batch = client.beta.vector_stores.file_batches.cancel(
950
batch_id=batch.id,
951
vector_store_id="vs_abc123"
952
)
953
```
954
955
## Types
956
957
```python { .api }
958
from typing import Literal
959
from pydantic import BaseModel
960
961
class VectorStore(BaseModel):
962
"""Vector store for file search."""
963
id: str
964
created_at: int
965
name: str
966
usage_bytes: int
967
file_counts: FileCounts
968
status: Literal["expired", "in_progress", "completed"]
969
expires_after: dict | None
970
expires_at: int | None
971
last_active_at: int | None
972
metadata: dict[str, str] | None
973
object: Literal["vector_store"]
974
975
class FileCounts(BaseModel):
976
"""File count statistics."""
977
in_progress: int
978
completed: int
979
failed: int
980
cancelled: int
981
total: int
982
983
class VectorStoreDeleted(BaseModel):
984
"""Deletion confirmation."""
985
id: str
986
deleted: bool
987
object: Literal["vector_store.deleted"]
988
989
class VectorStoreFile(BaseModel):
990
"""File in vector store."""
991
id: str
992
created_at: int
993
vector_store_id: str
994
usage_bytes: int
995
status: Literal["in_progress", "completed", "cancelled", "failed"]
996
last_error: dict | None
997
chunking_strategy: dict | None
998
object: Literal["vector_store.file"]
999
1000
class VectorStoreFileBatch(BaseModel):
1001
"""Batch of files being added to vector store."""
1002
id: str
1003
created_at: int
1004
vector_store_id: str
1005
status: Literal["in_progress", "completed", "cancelled", "failed"]
1006
file_counts: FileCounts
1007
object: Literal["vector_store.files_batch"]
1008
1009
class VectorStoreSearchResponse(BaseModel):
1010
"""Search results."""
1011
data: list[SearchResult]
1012
object: str
1013
1014
class SearchResult(BaseModel):
1015
"""Single search result."""
1016
content: str
1017
file_id: str
1018
score: float
1019
metadata: dict | None
1020
```
1021
1022
## Best Practices
1023
1024
```python
1025
from openai import OpenAI
1026
1027
client = OpenAI()
1028
1029
# 1. Create vector store with appropriate files
1030
# Upload files first
1031
file_ids = []
1032
for doc_path in ["doc1.pdf", "doc2.txt", "doc3.md"]:
1033
with open(doc_path, "rb") as f:
1034
file = client.files.create(file=f, purpose="assistants")
1035
file_ids.append(file.id)
1036
1037
# Create vector store
1038
store = client.beta.vector_stores.create(
1039
name="Product Documentation",
1040
file_ids=file_ids
1041
)
1042
1043
# 2. Wait for processing
1044
import time
1045
1046
while store.status == "in_progress":
1047
time.sleep(2)
1048
store = client.beta.vector_stores.retrieve(store.id)
1049
1050
print(f"Status: {store.status}")
1051
print(f"Completed files: {store.file_counts.completed}")
1052
1053
# 3. Use with Assistant
1054
assistant = client.beta.assistants.create(
1055
name="Documentation Assistant",
1056
instructions="Help users find information in documentation.",
1057
model="gpt-4",
1058
tools=[{"type": "file_search"}],
1059
tool_resources={
1060
"file_search": {
1061
"vector_store_ids": [store.id]
1062
}
1063
}
1064
)
1065
1066
# 4. Clean up expired stores
1067
stores = client.beta.vector_stores.list()
1068
for store in stores:
1069
if store.status == "expired":
1070
client.beta.vector_stores.delete(store.id)
1071
```
1072
1073
## Async Usage
1074
1075
```python
1076
import asyncio
1077
from openai import AsyncOpenAI
1078
1079
async def create_store():
1080
client = AsyncOpenAI()
1081
1082
store = await client.beta.vector_stores.create(
1083
name="Async Store",
1084
file_ids=["file-abc123"]
1085
)
1086
1087
return store.id
1088
1089
store_id = asyncio.run(create_store())
1090
```
1091