docs
0
# Embeddings
1
2
Create vector embeddings for text inputs to use in semantic search, clustering, recommendations, and other machine learning applications. Embeddings are numerical representations of text that capture semantic meaning.
3
4
## Capabilities
5
6
### Create Embeddings
7
8
Generate vector embeddings for one or more text inputs.
9
10
```python { .api }
11
def create(
12
self,
13
*,
14
input: str | list[str] | list[int] | list[list[int]],
15
model: str | EmbeddingModel,
16
dimensions: int | Omit = omit,
17
encoding_format: Literal["float", "base64"] | Omit = omit,
18
user: str | Omit = omit,
19
extra_headers: dict[str, str] | None = None,
20
extra_query: dict[str, object] | None = None,
21
extra_body: dict[str, object] | None = None,
22
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
23
) -> CreateEmbeddingResponse:
24
"""
25
Create embedding vectors representing the input text.
26
27
Args:
28
input: Text to embed. Can be:
29
- Single string: "Hello world"
30
- List of strings: ["Hello", "world"]
31
- Token array: [123, 456, 789]
32
- List of token arrays: [[123, 456], [789, 012]]
33
Max 8192 tokens per input, 2048 dimensions max for arrays.
34
Total limit: 300,000 tokens across all inputs per request.
35
36
model: Embedding model ID. Options:
37
- "text-embedding-3-large": Most capable, 3072 dimensions
38
- "text-embedding-3-small": Fast and efficient, 1536 dimensions
39
- "text-embedding-ada-002": Legacy model, 1536 dimensions
40
41
dimensions: Number of dimensions for output embeddings.
42
Only supported in text-embedding-3 models. Allows reducing
43
embedding size for storage/performance. Must be ≤ model's max.
44
45
encoding_format: Output format for embeddings.
46
- "float": List of floats (default)
47
- "base64": Base64-encoded bytes for space efficiency
48
49
user: Unique end-user identifier for abuse monitoring.
50
51
extra_headers: Additional HTTP headers.
52
extra_query: Additional query parameters.
53
extra_body: Additional JSON fields.
54
timeout: Request timeout in seconds.
55
56
Returns:
57
CreateEmbeddingResponse: Contains embedding vectors and usage info.
58
59
Raises:
60
BadRequestError: Invalid input or exceeds token limits
61
AuthenticationError: Invalid API key
62
RateLimitError: Rate limit exceeded
63
"""
64
```
65
66
Usage examples:
67
68
```python
69
from openai import OpenAI
70
71
client = OpenAI()
72
73
# Single text embedding
74
response = client.embeddings.create(
75
model="text-embedding-3-small",
76
input="The quick brown fox jumps over the lazy dog"
77
)
78
79
embedding = response.data[0].embedding
80
print(f"Embedding dimension: {len(embedding)}")
81
print(f"First 5 values: {embedding[:5]}")
82
83
# Multiple texts at once
84
response = client.embeddings.create(
85
model="text-embedding-3-small",
86
input=[
87
"Machine learning is fascinating",
88
"I love natural language processing",
89
"The weather is nice today"
90
]
91
)
92
93
for i, item in enumerate(response.data):
94
print(f"Embedding {i}: {len(item.embedding)} dimensions")
95
96
# Using larger model with custom dimensions
97
response = client.embeddings.create(
98
model="text-embedding-3-large",
99
input="Semantic search with embeddings",
100
dimensions=1024 # Reduce from default 3072
101
)
102
103
# Base64 encoding for space efficiency
104
response = client.embeddings.create(
105
model="text-embedding-3-small",
106
input="Compressed embedding format",
107
encoding_format="base64"
108
)
109
110
# Decode base64 embedding
111
import base64
112
import array
113
114
encoded = response.data[0].embedding
115
decoded_bytes = base64.b64decode(encoded)
116
floats = array.array('f', decoded_bytes)
117
print(f"Decoded embedding: {list(floats)[:5]}")
118
119
# Token-based input (pre-tokenized)
120
import tiktoken
121
122
enc = tiktoken.encoding_for_model("text-embedding-3-small")
123
tokens = enc.encode("Hello world")
124
125
response = client.embeddings.create(
126
model="text-embedding-3-small",
127
input=tokens
128
)
129
130
# Semantic search example
131
def cosine_similarity(a, b):
132
import math
133
dot = sum(x * y for x, y in zip(a, b))
134
mag_a = math.sqrt(sum(x * x for x in a))
135
mag_b = math.sqrt(sum(y * y for y in b))
136
return dot / (mag_a * mag_b)
137
138
# Embed documents
139
documents = [
140
"Python is a programming language",
141
"Machine learning uses algorithms",
142
"The cat sat on the mat"
143
]
144
145
response = client.embeddings.create(
146
model="text-embedding-3-small",
147
input=documents
148
)
149
150
doc_embeddings = [item.embedding for item in response.data]
151
152
# Embed query
153
query = "Tell me about programming"
154
query_response = client.embeddings.create(
155
model="text-embedding-3-small",
156
input=query
157
)
158
query_embedding = query_response.data[0].embedding
159
160
# Find most similar document
161
similarities = [
162
cosine_similarity(query_embedding, doc_emb)
163
for doc_emb in doc_embeddings
164
]
165
166
best_match_idx = similarities.index(max(similarities))
167
print(f"Most similar document: {documents[best_match_idx]}")
168
print(f"Similarity score: {similarities[best_match_idx]:.4f}")
169
```
170
171
## Types
172
173
```python { .api }
174
from typing import Literal
175
from pydantic import BaseModel
176
177
class CreateEmbeddingResponse(BaseModel):
178
"""Response from embeddings endpoint."""
179
data: list[Embedding]
180
model: str
181
object: Literal["list"]
182
usage: Usage
183
184
class Embedding(BaseModel):
185
"""Single embedding vector."""
186
embedding: list[float] | str # list[float] for "float", str for "base64"
187
index: int
188
object: Literal["embedding"]
189
190
class Usage(BaseModel):
191
"""Token usage information."""
192
prompt_tokens: int
193
total_tokens: int
194
195
# Model type
196
EmbeddingModel = Literal[
197
"text-embedding-3-large",
198
"text-embedding-3-small",
199
"text-embedding-ada-002"
200
]
201
```
202
203
## Model Comparison
204
205
| Model | Dimensions | Performance | Use Case |
206
|-------|-----------|-------------|----------|
207
| text-embedding-3-large | 3072 (default) | Highest quality | Production semantic search, highest accuracy needed |
208
| text-embedding-3-small | 1536 (default) | Good quality, faster | General purpose, cost-sensitive applications |
209
| text-embedding-ada-002 | 1536 (fixed) | Legacy performance | Backwards compatibility |
210
211
## Best Practices
212
213
```python
214
from openai import OpenAI
215
216
client = OpenAI()
217
218
# 1. Batch similar requests for efficiency
219
texts = ["text1", "text2", "text3"] # Up to 2048 inputs
220
response = client.embeddings.create(
221
model="text-embedding-3-small",
222
input=texts
223
)
224
225
# 2. Use dimensions parameter to reduce storage
226
response = client.embeddings.create(
227
model="text-embedding-3-large",
228
input="Sample text",
229
dimensions=256 # Much smaller than default 3072
230
)
231
232
# 3. Handle errors gracefully
233
try:
234
response = client.embeddings.create(
235
model="text-embedding-3-small",
236
input="x" * 10000 # Too long
237
)
238
except Exception as e:
239
print(f"Error: {e}")
240
241
# 4. Use base64 for space efficiency in storage
242
response = client.embeddings.create(
243
model="text-embedding-3-small",
244
input="Efficient storage",
245
encoding_format="base64"
246
)
247
# Store base64 string directly, decode when needed
248
```
249
250
## Async Usage
251
252
```python
253
import asyncio
254
from openai import AsyncOpenAI
255
256
async def get_embeddings():
257
client = AsyncOpenAI()
258
259
response = await client.embeddings.create(
260
model="text-embedding-3-small",
261
input="Async embedding creation"
262
)
263
264
return response.data[0].embedding
265
266
# Run async
267
embeddings = asyncio.run(get_embeddings())
268
```
269