CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-com-embabel-agent--embabel-agent-common

Common AI framework utilities for the Embabel Agent system including LLM configuration, output converters, prompt contributors, and embedding service abstractions.

Overview
Eval results
Files

integration-patterns.mddocs/advanced/

Integration Patterns

Common patterns for integrating Embabel Agent Common with Spring, reactive streams, and LLM clients.

Spring Integration

Configuration Beans

import org.springframework.context.annotation.Bean
import org.springframework.context.annotation.Configuration
import com.fasterxml.jackson.databind.ObjectMapper

@Configuration
class AIConfiguration {
    @Bean
    fun objectMapper(): ObjectMapper {
        return ObjectMapper()
            .registerKotlinModule()
            .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
    }

    @Bean
    fun defaultLlmOptions(): LlmOptions {
        return LlmOptions.withModel("gpt-4")
            .withTemperature(0.7)
            .withMaxTokens(1000)
    }

    @Bean
    fun embeddingService(embeddingModel: EmbeddingModel): EmbeddingService {
        return SpringAiEmbeddingService(
            name = "text-embedding-ada-002",
            provider = "openai",
            model = embeddingModel
        )
    }
}

Service Layer

import org.springframework.stereotype.Service

@Service
class LLMService(
    private val chatClient: ChatClient,
    private val objectMapper: ObjectMapper
) {
    fun <T> callWithStructuredOutput(
        prompt: String,
        responseType: Class<T>,
        options: LlmOptions = LlmOptions.withDefaultLlm()
    ): T? {
        val converter = JacksonOutputConverter(responseType, objectMapper)

        val fullPrompt = """
            $prompt

            ${converter.getFormat()}
        """.trimIndent()

        val response = chatClient
            .prompt()
            .user(fullPrompt)
            .call()
            .content()

        return converter.convert(response)
    }
}

Prompt Contributors as Beans

@Component
class SystemPromptContributor : PromptContributor {
    override val role = "system"
    override val promptContributionLocation = PromptContributionLocation.BEGINNING

    override fun contribution(): String {
        return "You are a helpful AI assistant."
    }
}

@Service
class PromptService(
    private val contributors: List<PromptContributor>
) {
    fun buildPrompt(userInput: String): String {
        val beginning = contributors
            .filter { it.promptContributionLocation == PromptContributionLocation.BEGINNING }
            .joinToString("\n\n") { it.contribution() }

        val end = contributors
            .filter { it.promptContributionLocation == PromptContributionLocation.END }
            .joinToString("\n\n") { it.contribution() }

        return buildString {
            if (beginning.isNotEmpty()) {
                appendLine(beginning)
                appendLine()
            }
            appendLine(userInput)
            if (end.isNotEmpty()) {
                appendLine()
                appendLine(end)
            }
        }.trim()
    }
}

Structured Output Pattern

Complete pattern for LLM calls with typed responses.

data class ExtractedData(
    val entities: List<String>,
    val sentiment: String,
    val summary: String
)

@Service
class ExtractionService(
    private val chatClient: ChatClient,
    private val objectMapper: ObjectMapper,
    private val costTracker: CostTracker
) {
    fun extractData(text: String): ExtractedData? {
        val converter = JacksonOutputConverter(ExtractedData::class.java, objectMapper)

        val prompt = """
            Extract information from the following text.

            ${converter.getFormat()}

            Text: $text
        """.trimIndent()

        val options = LlmOptions.withModel("gpt-4")
            .withTemperature(0.3)
            .withMaxTokens(500)

        val response = chatClient
            .prompt()
            .user(prompt)
            .call()
            .content()

        // Track costs
        val usage = response.metadata?.usage
        if (usage != null) {
            costTracker.recordUsage(usage)
        }

        return converter.convert(response)
    }
}

Streaming Pattern

Process streaming LLM responses reactively.

@Service
class StreamingService(
    private val chatClient: ChatClient,
    private val objectMapper: ObjectMapper
) {
    fun streamEvents(prompt: String): Flux<Event> {
        val converter = StreamingJacksonOutputConverter(
            Event::class.java,
            objectMapper
        )

        val fullPrompt = """
            $prompt

            ${converter.getFormat()}
        """.trimIndent()

        return chatClient
            .prompt()
            .user(fullPrompt)
            .stream()
            .content()
            .buffer()
            .map { chunks -> chunks.joinToString("") }
            .flatMapMany { jsonl -> converter.convertStream(jsonl) }
    }

    fun streamWithThinking(prompt: String): Flux<StreamingEvent<Event>> {
        val converter = StreamingJacksonOutputConverter(
            Event::class.java,
            objectMapper
        )

        return chatClient
            .prompt()
            .user(prompt)
            .stream()
            .content()
            .buffer()
            .map { it.joinToString("") }
            .flatMapMany { converter.convertStreamWithThinking(it) }
    }
}

Cost Tracking Pattern

Track costs across requests and sessions.

@Service
class ManagedLLMService(
    private val chatClient: ChatClient,
    private val pricing: PricingModel
) {
    private val sessionTracker = CostTracker(pricing)

    fun callWithTracking(prompt: String, options: LlmOptions): String {
        val response = chatClient
            .prompt()
            .user(prompt)
            .call()

        val usage = response.metadata?.usage
        if (usage != null) {
            sessionTracker.recordUsage(usage)
            logger.info("Request cost: $${pricing.costOf(usage)}")
            logger.info("Session total: $${sessionTracker.totalCost()}")
        }

        return response.content()
    }

    fun getSessionCost(): Double = sessionTracker.totalCost()

    fun resetSession() {
        sessionTracker.reset()
    }
}

Multi-Model Pattern

Route requests to different models based on criteria.

@Service
class MultiModelService(
    private val chatClient: ChatClient,
    private val objectMapper: ObjectMapper
) {
    private val modelStrategy = mapOf(
        "quick" to LlmOptions.withModel("gpt-3.5-turbo").withMaxTokens(500),
        "balanced" to LlmOptions.withModel("gpt-4").withMaxTokens(1000),
        "creative" to LlmOptions.withModel("claude-3-opus").withTemperature(0.9),
        "analytical" to LlmOptions.withModel("gpt-4").withTemperature(0.2)
    )

    fun call(prompt: String, strategy: String = "balanced"): String {
        val options = modelStrategy[strategy]
            ?: throw IllegalArgumentException("Unknown strategy: $strategy")

        return chatClient
            .prompt()
            .user(prompt)
            .call()
            .content()
    }

    fun <T> callStructured(
        prompt: String,
        responseType: Class<T>,
        strategy: String = "balanced"
    ): T? {
        val converter = JacksonOutputConverter(responseType, objectMapper)
        val options = modelStrategy[strategy]
            ?: throw IllegalArgumentException("Unknown strategy: $strategy")

        val fullPrompt = "$prompt\n\n${converter.getFormat()}"
        val response = call(fullPrompt, strategy)

        return converter.convert(response)
    }
}

Embedding Pattern

Semantic search and similarity detection.

@Service
class SemanticSearchService(
    private val embeddingService: EmbeddingService
) {
    private val documents = mutableListOf<Pair<String, FloatArray>>()

    fun indexDocument(text: String) {
        val embedding = embeddingService.embed(text)
        documents.add(text to embedding)
    }

    fun indexDocuments(texts: List<String>) {
        val embeddings = embeddingService.embed(texts)
        texts.zip(embeddings).forEach { (text, embedding) ->
            documents.add(text to embedding)
        }
    }

    fun search(query: String, topK: Int = 5): List<String> {
        val queryEmbedding = embeddingService.embed(query)

        return documents
            .map { (text, embedding) ->
                text to cosineSimilarity(queryEmbedding, embedding)
            }
            .sortedByDescending { it.second }
            .take(topK)
            .map { it.first }
    }

    fun findSimilar(text: String, threshold: Double = 0.7): List<String> {
        val embedding = embeddingService.embed(text)

        return documents
            .filter { (_, docEmbedding) ->
                cosineSimilarity(embedding, docEmbedding) >= threshold
            }
            .map { it.first }
    }

    private fun cosineSimilarity(a: FloatArray, b: FloatArray): Double {
        val dotProduct = a.zip(b).sumOf { (x, y) -> (x * y).toDouble() }
        val magA = kotlin.math.sqrt(a.sumOf { (it * it).toDouble() })
        val magB = kotlin.math.sqrt(b.sumOf { (it * it).toDouble() })
        return dotProduct / (magA * magB)
    }
}

RAG Pattern

Retrieval-Augmented Generation with embeddings.

@Service
class RAGService(
    private val embeddingService: EmbeddingService,
    private val chatClient: ChatClient
) {
    private val knowledgeBase = mutableListOf<Pair<String, FloatArray>>()

    fun addKnowledge(text: String) {
        val embedding = embeddingService.embed(text)
        knowledgeBase.add(text to embedding)
    }

    fun addKnowledge(texts: List<String>) {
        val embeddings = embeddingService.embed(texts)
        texts.zip(embeddings).forEach { (text, embedding) ->
            knowledgeBase.add(text to embedding)
        }
    }

    fun query(question: String, topK: Int = 3): String {
        // 1. Embed question
        val questionEmbedding = embeddingService.embed(question)

        // 2. Find relevant context
        val relevantDocs = knowledgeBase
            .map { (text, embedding) ->
                text to cosineSimilarity(questionEmbedding, embedding)
            }
            .sortedByDescending { it.second }
            .take(topK)
            .map { it.first }

        // 3. Build prompt with context
        val context = relevantDocs.joinToString("\n\n")
        val prompt = """
            Use the following context to answer the question.

            Context:
            $context

            Question: $question

            Answer:
        """.trimIndent()

        // 4. Generate answer
        return chatClient
            .prompt()
            .user(prompt)
            .call()
            .content()
    }

    private fun cosineSimilarity(a: FloatArray, b: FloatArray): Double {
        val dotProduct = a.zip(b).sumOf { (x, y) -> (x * y).toDouble() }
        val magA = kotlin.math.sqrt(a.sumOf { (it * it).toDouble() })
        val magB = kotlin.math.sqrt(b.sumOf { (it * it).toDouble() })
        return dotProduct / (magA * magB)
    }
}

Batch Processing Pattern

Process multiple requests efficiently.

@Service
class BatchProcessingService(
    private val chatClient: ChatClient,
    private val objectMapper: ObjectMapper
) {
    fun <T> processBatch(
        items: List<String>,
        responseType: Class<T>,
        batchSize: Int = 10
    ): List<T?> {
        val converter = JacksonOutputConverter(responseType, objectMapper)

        return items.chunked(batchSize).flatMap { batch ->
            batch.mapNotNull { item ->
                val prompt = "Process: $item\n\n${converter.getFormat()}"
                val response = chatClient
                    .prompt()
                    .user(prompt)
                    .call()
                    .content()

                converter.convert(response)
            }
        }
    }

    fun <T> processBatchParallel(
        items: List<String>,
        responseType: Class<T>
    ): Flux<T> {
        val converter = JacksonOutputConverter(responseType, objectMapper)

        return Flux.fromIterable(items)
            .parallel()
            .runOn(Schedulers.parallel())
            .flatMap { item ->
                val prompt = "Process: $item\n\n${converter.getFormat()}"
                Mono.fromCallable {
                    val response = chatClient
                        .prompt()
                        .user(prompt)
                        .call()
                        .content()

                    converter.convert(response)
                }
            }
            .sequential()
            .filter { it != null }
            .map { it!! }
    }
}

Retry Pattern with Backoff

@Service
class ResilientLLMService(
    private val chatClient: ChatClient
) {
    fun callWithRetry(
        prompt: String,
        maxRetries: Int = 3
    ): String? {
        var lastError: Exception? = null

        repeat(maxRetries) { attempt ->
            try {
                return chatClient
                    .prompt()
                    .user(prompt)
                    .call()
                    .content()
            } catch (e: Exception) {
                lastError = e
                logger.warn("Attempt ${attempt + 1} failed: ${e.message}")

                if (attempt < maxRetries - 1) {
                    val delay = 1000L * (attempt + 1) // Exponential backoff
                    Thread.sleep(delay)
                }
            }
        }

        logger.error("All retries failed", lastError)
        return null
    }

    fun callWithReactiveRetry(prompt: String): Mono<String> {
        return Mono.fromCallable {
            chatClient
                .prompt()
                .user(prompt)
                .call()
                .content()
        }
        .retryWhen(Retry.backoff(3, Duration.ofSeconds(1)))
        .onErrorResume { error ->
            logger.error("All retries failed", error)
            Mono.empty()
        }
    }
}

Caching Pattern

@Service
class CachedLLMService(
    private val chatClient: ChatClient
) {
    private val cache = ConcurrentHashMap<String, String>()

    fun callWithCache(prompt: String): String {
        return cache.getOrPut(prompt) {
            chatClient
                .prompt()
                .user(prompt)
                .call()
                .content()
        }
    }

    fun callWithTimedCache(
        prompt: String,
        ttl: Duration = Duration.ofMinutes(10)
    ): String {
        val cacheKey = "$prompt:${System.currentTimeMillis() / ttl.toMillis()}"
        return cache.getOrPut(cacheKey) {
            chatClient
                .prompt()
                .user(prompt)
                .call()
                .content()
        }
    }

    fun clearCache() {
        cache.clear()
    }
}

Testing Patterns

Mock Converters

class MockConverter<T>(private val mockResult: T) : StructuredOutputConverter<T> {
    override fun convert(text: String): T = mockResult
    override fun getFormat(): String = "Mock format"
}

@Test
fun `test service with mock converter`() {
    val mockPerson = Person("Test", 30, "test@example.com")
    val mockConverter = MockConverter(mockPerson)

    // Use in tests
    val result = service.process(mockConverter)
    assertEquals(mockPerson, result)
}

Integration Tests

@SpringBootTest
class LLMServiceIntegrationTest {
    @Autowired
    private lateinit var llmService: LLMService

    @Test
    fun `test structured output extraction`() {
        val result = llmService.callWithStructuredOutput(
            "Extract: John Doe, 30, john@example.com",
            Person::class.java
        )

        assertNotNull(result)
        assertEquals("John Doe", result?.name)
        assertEquals(30, result?.age)
    }
}

Best Practices

  1. Use dependency injection for all services
  2. Reuse ObjectMapper instances - they're thread-safe
  3. Track costs in production environments
  4. Implement retries with exponential backoff
  5. Cache responses when appropriate
  6. Use streaming for long-running tasks
  7. Batch requests when possible
  8. Handle errors gracefully with fallbacks
  9. Test with mocks for unit tests
  10. Monitor performance and costs in production

Install with Tessl CLI

npx tessl i tessl/maven-com-embabel-agent--embabel-agent-common

docs

advanced

error-handling.md

integration-patterns.md

performance.md

index.md

quick-reference.md

README.md

tile.json