Python Client SDK for the Mistral AI API with chat completions, embeddings, fine-tuning, and agent capabilities.
—
Generate vector embeddings for text input with support for different models and output formats. Embeddings are dense vector representations of text that capture semantic meaning for use in search, clustering, and similarity tasks.
Generate vector embeddings for single or multiple text inputs with customizable output dimensions and formats.
def create(
model: str,
inputs: Union[str, List[str]],
output_dimension: Optional[int] = None,
output_dtype: Optional[EmbeddingDtype] = None,
encoding_format: Optional[EncodingFormat] = None,
**kwargs
) -> EmbeddingResponse:
"""
Create embeddings for text inputs.
Parameters:
- model: ID of the model to use
- inputs: Text to embed (can be a string or list of strings)
- output_dimension: The dimension of the output embeddings
- output_dtype: Output data type ("float", "int8", "uint8", "binary", "ubinary")
- encoding_format: Encoding format ("float", "base64")
Returns:
EmbeddingResponse with vector embeddings
"""from mistralai import Mistral
client = Mistral(api_key="your-api-key")
# Generate embedding for a single text
response = client.embeddings.create(
model="mistral-embed",
inputs="The quick brown fox jumps over the lazy dog."
)
# Access the embedding vector
embedding = response.data[0].embedding
print(f"Embedding dimension: {len(embedding)}")
print(f"First 5 values: {embedding[:5]}")# Generate embeddings for multiple texts
texts = [
"Machine learning is a subset of artificial intelligence.",
"Deep learning uses neural networks with multiple layers.",
"Natural language processing helps computers understand text.",
"Computer vision enables machines to interpret visual information."
]
response = client.embeddings.create(
model="mistral-embed",
inputs=texts
)
# Process embeddings
for i, embedding_data in enumerate(response.data):
print(f"Text {i + 1}: {len(embedding_data.embedding)} dimensions")
print(f"Text: {texts[i][:50]}...")# Generate embeddings with specific output format
response = client.embeddings.create(
model="mistral-embed",
inputs="Semantic search with embeddings",
encoding_format="base64",
output_dtype="float"
)
embedding_data = response.data[0]
print(f"Encoding format: {response.encoding_format}")
print(f"Data type: {response.output_dtype}")import numpy as np
from scipy.spatial.distance import cosine
# Embed a query and documents
query = "What is machine learning?"
documents = [
"Machine learning is a method of data analysis that automates analytical model building.",
"Artificial intelligence is the simulation of human intelligence in machines.",
"Data science combines domain expertise, programming skills, and statistical knowledge.",
"Natural language processing is a branch of AI focused on language understanding."
]
# Get embeddings
query_response = client.embeddings.create(
model="mistral-embed",
inputs=query
)
doc_response = client.embeddings.create(
model="mistral-embed",
inputs=documents
)
query_embedding = np.array(query_response.data[0].embedding)
doc_embeddings = [np.array(data.embedding) for data in doc_response.data]
# Calculate similarities
similarities = []
for doc_embedding in doc_embeddings:
similarity = 1 - cosine(query_embedding, doc_embedding)
similarities.append(similarity)
# Find most similar document
best_match_idx = np.argmax(similarities)
print(f"Query: {query}")
print(f"Most similar document: {documents[best_match_idx]}")
print(f"Similarity score: {similarities[best_match_idx]:.4f}")class EmbeddingRequest:
model: str
inputs: Union[str, List[str]]
output_dimension: Optional[int]
output_dtype: Optional[str] # "float", "ubinary"
encoding_format: Optional[str] # "float", "base64"class EmbeddingResponse:
id: str
object: str
data: List[EmbeddingResponseData]
model: str
usage: Optional[UsageInfo]
class EmbeddingResponseData:
object: str
embedding: List[float]
index: intclass EmbeddingDtype:
FLOAT = "float"
UBINARY = "ubinary"
class EncodingFormat:
FLOAT = "float"
BASE64 = "base64"Install with Tessl CLI
npx tessl i tessl/pypi-mistralai