Common AI framework utilities for the Embabel Agent system including LLM configuration, output converters, prompt contributors, and embedding service abstractions.
Abstractions for text embedding services with support for both single and batch embeddings.
Text embedding services that convert text into dense vector representations.
interface EmbeddingService : AiModel<Any>, EmbeddingServiceMetadata {
val dimensions: Int
fun embed(text: String): FloatArray
fun embed(texts: List<String>): List<FloatArray>
}Basic Usage:
val embeddingService: EmbeddingService = // ... from Spring context
// Single text embedding
val text = "Machine learning is a subset of artificial intelligence"
val embedding: FloatArray = embeddingService.embed(text)
println("Embedding dimensions: ${embedding.size}") // e.g., 1536
// Batch embedding (more efficient)
val texts = listOf(
"Machine learning is fascinating",
"Deep learning uses neural networks",
"Natural language processing analyzes text"
)
val embeddings: List<FloatArray> = embeddingService.embed(texts)
println("Embedded ${embeddings.size} texts")
// Metadata access
println("Service: ${embeddingService.name}")
println("Provider: ${embeddingService.provider}")
println("Dimensions: ${embeddingService.dimensions}")Implementation of EmbeddingServiceMetadata.
data class EmbeddingServiceMetadataImpl(
override val name: String,
override val provider: String
) : EmbeddingServiceMetadataConcrete implementation wrapping Spring AI's EmbeddingModel. This class acts as an adapter that wraps Spring AI's EmbeddingModel interface and converts its responses to the simpler FloatArray format.
data class SpringAiEmbeddingService(
override val name: String,
override val provider: String,
override val model: EmbeddingModel
) : EmbeddingService {
override val type: ModelType = ModelType.EMBEDDING
override val dimensions: Int // Computed from model
}Usage:
import org.springframework.ai.embedding.EmbeddingModel
// Create from Spring AI model
val springAiModel: EmbeddingModel = // ... from Spring AI configuration
val embeddingService = SpringAiEmbeddingService(
name = "text-embedding-ada-002",
provider = "openai",
model = springAiModel
)
val embedding = embeddingService.embed("Hello, world!")import org.springframework.context.annotation.Bean
import org.springframework.context.annotation.Configuration
@Configuration
class EmbeddingConfiguration {
@Bean
fun openAiEmbeddingService(
embeddingModel: EmbeddingModel
): EmbeddingService {
return SpringAiEmbeddingService(
name = "text-embedding-ada-002",
provider = "openai",
model = embeddingModel
)
}
}
@Service
class DocumentService(
private val embeddingService: EmbeddingService
) {
fun embedDocument(text: String): FloatArray {
return embeddingService.embed(text)
}
fun embedDocuments(texts: List<String>): List<FloatArray> {
return embeddingService.embed(texts)
}
}class SemanticSearchEngine(
private val embeddingService: EmbeddingService
) {
private val documents = mutableListOf<Pair<String, FloatArray>>()
fun indexDocument(text: String) {
val embedding = embeddingService.embed(text)
documents.add(text to embedding)
}
fun indexDocuments(texts: List<String>) {
val embeddings = embeddingService.embed(texts)
texts.zip(embeddings).forEach { (text, embedding) ->
documents.add(text to embedding)
}
}
fun search(query: String, topK: Int = 5): List<String> {
val queryEmbedding = embeddingService.embed(query)
return documents
.map { (text, embedding) ->
text to cosineSimilarity(queryEmbedding, embedding)
}
.sortedByDescending { it.second }
.take(topK)
.map { it.first }
}
}
// Usage
val searchEngine = SemanticSearchEngine(embeddingService)
searchEngine.indexDocuments(listOf(
"Python is a programming language",
"Machine learning uses algorithms",
"Dogs are loyal pets"
))
val results = searchEngine.search("coding languages")fun findSimilarDocuments(
query: String,
documents: List<String>,
embeddingService: EmbeddingService,
threshold: Double = 0.7
): List<String> {
val queryEmbedding = embeddingService.embed(query)
val docEmbeddings = embeddingService.embed(documents)
return documents.zip(docEmbeddings)
.filter { (_, embedding) ->
cosineSimilarity(queryEmbedding, embedding) >= threshold
}
.map { it.first }
}class RAGSystem(
private val embeddingService: EmbeddingService,
private val llmClient: LLMClient
) {
private val knowledgeBase = mutableListOf<Pair<String, FloatArray>>()
fun addKnowledge(text: String) {
val embedding = embeddingService.embed(text)
knowledgeBase.add(text to embedding)
}
fun query(question: String, topK: Int = 3): String {
// 1. Embed the question
val questionEmbedding = embeddingService.embed(question)
// 2. Find most relevant knowledge
val relevantDocs = knowledgeBase
.map { (text, embedding) ->
text to cosineSimilarity(questionEmbedding, embedding)
}
.sortedByDescending { it.second }
.take(topK)
.map { it.first }
// 3. Build prompt with context
val context = relevantDocs.joinToString("\n\n")
val prompt = """
Use the following context to answer the question.
Context:
$context
Question: $question
Answer:
""".trimIndent()
// 4. Generate answer
return llmClient.generate(prompt)
}
}Always prefer batch embedding for multiple texts.
// INEFFICIENT: Multiple separate calls
val embeddings = texts.map { text ->
embeddingService.embed(text) // Network call per text
}
// EFFICIENT: Single batch call
val embeddings = embeddingService.embed(texts) // One network callclass CachedEmbeddingService(
private val delegate: EmbeddingService,
private val cache: MutableMap<String, FloatArray> = mutableMapOf()
) : EmbeddingService by delegate {
override fun embed(text: String): FloatArray {
return cache.getOrPut(text) {
delegate.embed(text)
}
}
override fun embed(texts: List<String>): List<FloatArray> {
val cached = mutableListOf<FloatArray>()
val toEmbed = mutableListOf<String>()
texts.forEach { text ->
cache[text]?.let { cached.add(it) } ?: toEmbed.add(text)
}
if (toEmbed.isNotEmpty()) {
val newEmbeddings = delegate.embed(toEmbed)
toEmbed.zip(newEmbeddings).forEach { (text, embedding) ->
cache[text] = embedding
}
cached.addAll(newEmbeddings)
}
return cached
}
}Common operations on embedding vectors.
// Cosine similarity (most common)
fun cosineSimilarity(a: FloatArray, b: FloatArray): Double {
val dotProduct = a.zip(b).sumOf { (x, y) -> (x * y).toDouble() }
val magA = kotlin.math.sqrt(a.sumOf { (it * it).toDouble() })
val magB = kotlin.math.sqrt(b.sumOf { (it * it).toDouble() })
return dotProduct / (magA * magB)
}
// Euclidean distance
fun euclideanDistance(a: FloatArray, b: FloatArray): Double {
return kotlin.math.sqrt(
a.zip(b).sumOf { (x, y) -> ((x - y) * (x - y)).toDouble() }
)
}
// Dot product
fun dotProduct(a: FloatArray, b: FloatArray): Double {
return a.zip(b).sumOf { (x, y) -> (x * y).toDouble() }
}
// Normalize vector
fun normalize(vector: FloatArray): FloatArray {
val magnitude = kotlin.math.sqrt(vector.sumOf { (it * it).toDouble() })
return vector.map { (it / magnitude).toFloat() }.toFloatArray()
}tessl i tessl/maven-com-embabel-agent--embabel-agent-common@0.3.1