or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

assistants.mdaudio.mdbatches.mdchat-completions.mdchatkit.mdclient-initialization.mdcompletions.mdcontainers.mdconversations.mdembeddings.mdevals.mdfiles.mdfine-tuning.mdimages.mdindex.mdmodels.mdmoderations.mdrealtime.mdresponses.mdruns.mdthreads-messages.mduploads.mdvector-stores.mdvideos.mdwebhooks.md
KNOWN_ISSUES.md

embeddings.mddocs/

0

# Embeddings

1

2

Create vector embeddings for text inputs to use in semantic search, clustering, recommendations, and other machine learning applications. Embeddings are numerical representations of text that capture semantic meaning.

3

4

## Capabilities

5

6

### Create Embeddings

7

8

Generate vector embeddings for one or more text inputs.

9

10

```python { .api }

11

def create(

12

self,

13

*,

14

input: str | list[str] | list[int] | list[list[int]],

15

model: str | EmbeddingModel,

16

dimensions: int | Omit = omit,

17

encoding_format: Literal["float", "base64"] | Omit = omit,

18

user: str | Omit = omit,

19

extra_headers: dict[str, str] | None = None,

20

extra_query: dict[str, object] | None = None,

21

extra_body: dict[str, object] | None = None,

22

timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,

23

) -> CreateEmbeddingResponse:

24

"""

25

Create embedding vectors representing the input text.

26

27

Args:

28

input: Text to embed. Can be:

29

- Single string: "Hello world"

30

- List of strings: ["Hello", "world"]

31

- Token array: [123, 456, 789]

32

- List of token arrays: [[123, 456], [789, 012]]

33

Max 8192 tokens per input, 2048 dimensions max for arrays.

34

Total limit: 300,000 tokens across all inputs per request.

35

36

model: Embedding model ID. Options:

37

- "text-embedding-3-large": Most capable, 3072 dimensions

38

- "text-embedding-3-small": Fast and efficient, 1536 dimensions

39

- "text-embedding-ada-002": Legacy model, 1536 dimensions

40

41

dimensions: Number of dimensions for output embeddings.

42

Only supported in text-embedding-3 models. Allows reducing

43

embedding size for storage/performance. Must be ≤ model's max.

44

45

encoding_format: Output format for embeddings.

46

- "float": List of floats (default)

47

- "base64": Base64-encoded bytes for space efficiency

48

49

user: Unique end-user identifier for abuse monitoring.

50

51

extra_headers: Additional HTTP headers.

52

extra_query: Additional query parameters.

53

extra_body: Additional JSON fields.

54

timeout: Request timeout in seconds.

55

56

Returns:

57

CreateEmbeddingResponse: Contains embedding vectors and usage info.

58

59

Raises:

60

BadRequestError: Invalid input or exceeds token limits

61

AuthenticationError: Invalid API key

62

RateLimitError: Rate limit exceeded

63

"""

64

```

65

66

Usage examples:

67

68

```python

69

from openai import OpenAI

70

71

client = OpenAI()

72

73

# Single text embedding

74

response = client.embeddings.create(

75

model="text-embedding-3-small",

76

input="The quick brown fox jumps over the lazy dog"

77

)

78

79

embedding = response.data[0].embedding

80

print(f"Embedding dimension: {len(embedding)}")

81

print(f"First 5 values: {embedding[:5]}")

82

83

# Multiple texts at once

84

response = client.embeddings.create(

85

model="text-embedding-3-small",

86

input=[

87

"Machine learning is fascinating",

88

"I love natural language processing",

89

"The weather is nice today"

90

]

91

)

92

93

for i, item in enumerate(response.data):

94

print(f"Embedding {i}: {len(item.embedding)} dimensions")

95

96

# Using larger model with custom dimensions

97

response = client.embeddings.create(

98

model="text-embedding-3-large",

99

input="Semantic search with embeddings",

100

dimensions=1024 # Reduce from default 3072

101

)

102

103

# Base64 encoding for space efficiency

104

response = client.embeddings.create(

105

model="text-embedding-3-small",

106

input="Compressed embedding format",

107

encoding_format="base64"

108

)

109

110

# Decode base64 embedding

111

import base64

112

import array

113

114

encoded = response.data[0].embedding

115

decoded_bytes = base64.b64decode(encoded)

116

floats = array.array('f', decoded_bytes)

117

print(f"Decoded embedding: {list(floats)[:5]}")

118

119

# Token-based input (pre-tokenized)

120

import tiktoken

121

122

enc = tiktoken.encoding_for_model("text-embedding-3-small")

123

tokens = enc.encode("Hello world")

124

125

response = client.embeddings.create(

126

model="text-embedding-3-small",

127

input=tokens

128

)

129

130

# Semantic search example

131

def cosine_similarity(a, b):

132

import math

133

dot = sum(x * y for x, y in zip(a, b))

134

mag_a = math.sqrt(sum(x * x for x in a))

135

mag_b = math.sqrt(sum(y * y for y in b))

136

return dot / (mag_a * mag_b)

137

138

# Embed documents

139

documents = [

140

"Python is a programming language",

141

"Machine learning uses algorithms",

142

"The cat sat on the mat"

143

]

144

145

response = client.embeddings.create(

146

model="text-embedding-3-small",

147

input=documents

148

)

149

150

doc_embeddings = [item.embedding for item in response.data]

151

152

# Embed query

153

query = "Tell me about programming"

154

query_response = client.embeddings.create(

155

model="text-embedding-3-small",

156

input=query

157

)

158

query_embedding = query_response.data[0].embedding

159

160

# Find most similar document

161

similarities = [

162

cosine_similarity(query_embedding, doc_emb)

163

for doc_emb in doc_embeddings

164

]

165

166

best_match_idx = similarities.index(max(similarities))

167

print(f"Most similar document: {documents[best_match_idx]}")

168

print(f"Similarity score: {similarities[best_match_idx]:.4f}")

169

```

170

171

## Types

172

173

```python { .api }

174

from typing import Literal

175

from pydantic import BaseModel

176

177

class CreateEmbeddingResponse(BaseModel):

178

"""Response from embeddings endpoint."""

179

data: list[Embedding]

180

model: str

181

object: Literal["list"]

182

usage: Usage

183

184

class Embedding(BaseModel):

185

"""Single embedding vector."""

186

embedding: list[float] | str # list[float] for "float", str for "base64"

187

index: int

188

object: Literal["embedding"]

189

190

class Usage(BaseModel):

191

"""Token usage information."""

192

prompt_tokens: int

193

total_tokens: int

194

195

# Model type

196

EmbeddingModel = Literal[

197

"text-embedding-3-large",

198

"text-embedding-3-small",

199

"text-embedding-ada-002"

200

]

201

```

202

203

## Model Comparison

204

205

| Model | Dimensions | Performance | Use Case |

206

|-------|-----------|-------------|----------|

207

| text-embedding-3-large | 3072 (default) | Highest quality | Production semantic search, highest accuracy needed |

208

| text-embedding-3-small | 1536 (default) | Good quality, faster | General purpose, cost-sensitive applications |

209

| text-embedding-ada-002 | 1536 (fixed) | Legacy performance | Backwards compatibility |

210

211

## Best Practices

212

213

```python

214

from openai import OpenAI

215

216

client = OpenAI()

217

218

# 1. Batch similar requests for efficiency

219

texts = ["text1", "text2", "text3"] # Up to 2048 inputs

220

response = client.embeddings.create(

221

model="text-embedding-3-small",

222

input=texts

223

)

224

225

# 2. Use dimensions parameter to reduce storage

226

response = client.embeddings.create(

227

model="text-embedding-3-large",

228

input="Sample text",

229

dimensions=256 # Much smaller than default 3072

230

)

231

232

# 3. Handle errors gracefully

233

try:

234

response = client.embeddings.create(

235

model="text-embedding-3-small",

236

input="x" * 10000 # Too long

237

)

238

except Exception as e:

239

print(f"Error: {e}")

240

241

# 4. Use base64 for space efficiency in storage

242

response = client.embeddings.create(

243

model="text-embedding-3-small",

244

input="Efficient storage",

245

encoding_format="base64"

246

)

247

# Store base64 string directly, decode when needed

248

```

249

250

## Async Usage

251

252

```python

253

import asyncio

254

from openai import AsyncOpenAI

255

256

async def get_embeddings():

257

client = AsyncOpenAI()

258

259

response = await client.embeddings.create(

260

model="text-embedding-3-small",

261

input="Async embedding creation"

262

)

263

264

return response.data[0].embedding

265

266

# Run async

267

embeddings = asyncio.run(get_embeddings())

268

```

269