OpenAI compatible model factory for the Embabel Agent Framework
Comprehensive guide to configuring the OpenAI-compatible model factory.
For OpenAI and most cloud providers:
val factory = OpenAiCompatibleModelFactory(
baseUrl = null,
apiKey = "sk-...", // Your API key
completionsPath = null,
embeddingsPath = null,
observationRegistry = observationRegistry
)For local servers that don't require authentication:
val factory = OpenAiCompatibleModelFactory(
baseUrl = "http://localhost:8000",
apiKey = null, // No authentication
completionsPath = null,
embeddingsPath = null,
observationRegistry = observationRegistry
)val factory = OpenAiCompatibleModelFactory(
baseUrl = System.getenv("OPENAI_BASE_URL"), // Can be null
apiKey = System.getenv("OPENAI_API_KEY"), // Read from environment
completionsPath = null,
embeddingsPath = null,
observationRegistry = observationRegistry
)@Configuration
class LlmConfiguration {
@Bean
fun openAiModelFactory(
@Value("\${openai.api.key}") apiKey: String,
@Value("\${openai.base.url:#{null}}") baseUrl: String?,
observationRegistry: ObservationRegistry
): OpenAiCompatibleModelFactory {
return OpenAiCompatibleModelFactory(
baseUrl = baseUrl,
apiKey = apiKey,
completionsPath = null,
embeddingsPath = null,
observationRegistry = observationRegistry
)
}
}The baseUrl parameter sets the API base URL:
// OpenAI (default)
val openAi = OpenAiCompatibleModelFactory(
baseUrl = null, // Uses https://api.openai.com
...
)
// Azure OpenAI
val azure = OpenAiCompatibleModelFactory(
baseUrl = "https://your-resource.openai.azure.com",
...
)
// Local LLM server
val local = OpenAiCompatibleModelFactory(
baseUrl = "http://localhost:11434",
...
)
// Custom cloud provider
val custom = OpenAiCompatibleModelFactory(
baseUrl = "https://api.custom-provider.com",
...
)Override the default chat completions endpoint path:
val factory = OpenAiCompatibleModelFactory(
baseUrl = "https://custom-llm-api.com",
apiKey = "your-api-key",
completionsPath = "/api/v1/chat/completions", // Custom path
embeddingsPath = null,
observationRegistry = observationRegistry
)Default paths:
/v1/chat/completionsOverride the default embeddings endpoint path:
val factory = OpenAiCompatibleModelFactory(
baseUrl = "https://custom-llm-api.com",
apiKey = "your-api-key",
completionsPath = null,
embeddingsPath = "/api/v1/embeddings", // Custom path
observationRegistry = observationRegistry
)Default paths:
/v1/embeddingsval factory = OpenAiCompatibleModelFactory(
baseUrl = "https://custom-provider.com",
apiKey = "custom-api-key",
completionsPath = "/custom/chat/endpoint",
embeddingsPath = "/custom/embeddings/endpoint",
observationRegistry = observationRegistry
)The factory uses these default timeouts:
Provide a custom ClientHttpRequestFactory to override timeouts:
import org.springframework.http.client.SimpleClientHttpRequestFactory
import org.springframework.beans.factory.ObjectProvider
val customRequestFactory = SimpleClientHttpRequestFactory().apply {
setConnectTimeout(10000) // 10 seconds connect timeout
setReadTimeout(120000) // 2 minutes read timeout
}
val factory = OpenAiCompatibleModelFactory(
baseUrl = null,
apiKey = "your-api-key",
completionsPath = null,
embeddingsPath = null,
observationRegistry = observationRegistry,
requestFactory = ObjectProvider.of(customRequestFactory)
)Connect timeout:
Read timeout:
Example - Short timeout for fast responses:
val fastFactory = SimpleClientHttpRequestFactory().apply {
setConnectTimeout(5000) // 5 seconds
setReadTimeout(60000) // 1 minute - expect fast responses
}
val factory = OpenAiCompatibleModelFactory(
baseUrl = null,
apiKey = "your-api-key",
completionsPath = null,
embeddingsPath = null,
observationRegistry = observationRegistry,
requestFactory = ObjectProvider.of(fastFactory)
)The factory integrates with Micrometer for observability.
import io.micrometer.observation.ObservationRegistry
// Create observation registry
val observationRegistry = ObservationRegistry.create()
// Pass to factory
val factory = OpenAiCompatibleModelFactory(
baseUrl = null,
apiKey = "your-api-key",
completionsPath = null,
embeddingsPath = null,
observationRegistry = observationRegistry // Enables observability
)The factory automatically instruments:
import io.micrometer.core.instrument.MeterRegistry
import io.micrometer.observation.ObservationRegistry
val observationRegistry = ObservationRegistry.create().apply {
observationConfig()
.observationHandler(DefaultMeterObservationHandler(meterRegistry))
}
val factory = OpenAiCompatibleModelFactory(
baseUrl = null,
apiKey = "your-api-key",
completionsPath = null,
embeddingsPath = null,
observationRegistry = observationRegistry
)import io.micrometer.observation.ObservationRegistry
import io.micrometer.tracing.Tracer
val observationRegistry = ObservationRegistry.create().apply {
observationConfig()
.observationHandler(DefaultTracingObservationHandler(tracer))
}
val factory = OpenAiCompatibleModelFactory(
baseUrl = null,
apiKey = "your-api-key",
completionsPath = null,
embeddingsPath = null,
observationRegistry = observationRegistry
)In Spring Boot, the ObservationRegistry is auto-configured:
@Configuration
class LlmConfiguration(
private val observationRegistry: ObservationRegistry // Auto-injected
) {
@Bean
fun openAiModelFactory(
@Value("\${openai.api.key}") apiKey: String
): OpenAiCompatibleModelFactory {
return OpenAiCompatibleModelFactory(
baseUrl = null,
apiKey = apiKey,
completionsPath = null,
embeddingsPath = null,
observationRegistry = observationRegistry // Uses Spring Boot's registry
)
}
}Configure retry behavior for handling transient failures.
By default, the factory uses Spring AI's default retry template with reasonable retry policies.
import org.springframework.retry.support.RetryTemplate
import org.springframework.retry.backoff.ExponentialBackOffPolicy
import org.springframework.retry.policy.SimpleRetryPolicy
val retryTemplate = RetryTemplate().apply {
setBackOffPolicy(ExponentialBackOffPolicy().apply {
initialInterval = 1000 // Start with 1 second delay
multiplier = 2.0 // Double the delay each retry
maxInterval = 10000 // Cap at 10 seconds
})
setRetryPolicy(SimpleRetryPolicy(3)) // Retry up to 3 times
}
val llmService = factory.openAiCompatibleLlm(
model = "gpt-4",
pricingModel = PricingModel.usdPer1MTokens(30.0, 60.0),
provider = "OpenAI",
knowledgeCutoffDate = LocalDate.of(2023, 4, 1),
retryTemplate = retryTemplate // Custom retry configuration
)import org.springframework.retry.policy.ExceptionClassifierRetryPolicy
import org.springframework.retry.policy.NeverRetryPolicy
import org.springframework.retry.policy.SimpleRetryPolicy
import org.springframework.web.client.HttpServerErrorException
import org.springframework.web.client.ResourceAccessException
val retryPolicy = ExceptionClassifierRetryPolicy().apply {
setExceptionClassifier { throwable ->
when (throwable) {
is HttpServerErrorException -> SimpleRetryPolicy(3) // Retry 5xx errors
is ResourceAccessException -> SimpleRetryPolicy(3) // Retry connection errors
else -> NeverRetryPolicy() // Don't retry others
}
}
}
val retryTemplate = RetryTemplate().apply {
setRetryPolicy(retryPolicy)
setBackOffPolicy(ExponentialBackOffPolicy().apply {
initialInterval = 1000
multiplier = 2.0
maxInterval = 10000
})
}
val llmService = factory.openAiCompatibleLlm(
model = "gpt-4",
pricingModel = PricingModel.usdPer1MTokens(30.0, 60.0),
provider = "OpenAI",
knowledgeCutoffDate = LocalDate.of(2023, 4, 1),
retryTemplate = retryTemplate
)import org.springframework.retry.support.RetryTemplate
import org.springframework.retry.policy.NeverRetryPolicy
val noRetryTemplate = RetryTemplate().apply {
setRetryPolicy(NeverRetryPolicy())
}
val llmService = factory.openAiCompatibleLlm(
model = "gpt-4",
pricingModel = PricingModel.usdPer1MTokens(30.0, 60.0),
provider = "OpenAI",
knowledgeCutoffDate = LocalDate.of(2023, 4, 1),
retryTemplate = noRetryTemplate // Disable retries
)val aggressiveRetryTemplate = RetryTemplate().apply {
setBackOffPolicy(ExponentialBackOffPolicy().apply {
initialInterval = 500 // Start with 500ms
multiplier = 1.5 // Slower exponential growth
maxInterval = 30000 // Cap at 30 seconds
})
setRetryPolicy(SimpleRetryPolicy(5)) // Retry up to 5 times
}
val llmService = factory.openAiCompatibleLlm(
model = "gpt-4",
pricingModel = PricingModel.usdPer1MTokens(30.0, 60.0),
provider = "OpenAI",
knowledgeCutoffDate = LocalDate.of(2023, 4, 1),
retryTemplate = aggressiveRetryTemplate
)Configure pricing for cost tracking.
For cloud providers that charge per token:
val pricingModel = PricingModel.usdPer1MTokens(
usdPer1mInputTokens = 30.0, // $30 per 1 million input tokens
usdPer1mOutputTokens = 60.0 // $60 per 1 million output tokens
)
val service = factory.openAiCompatibleLlm(
model = "gpt-4",
pricingModel = pricingModel,
provider = "OpenAI",
knowledgeCutoffDate = LocalDate.of(2023, 4, 1)
)Common OpenAI prices (as of 2024):
PricingModel.usdPer1MTokens(0.5, 1.5)PricingModel.usdPer1MTokens(30.0, 60.0)PricingModel.usdPer1MTokens(10.0, 30.0)PricingModel.usdPer1MTokens(10.0, 30.0)For free models or fixed-cost scenarios:
val service = factory.openAiCompatibleLlm(
model = "llama-3-70b",
pricingModel = PricingModel.ALL_YOU_CAN_EAT, // No per-token tracking
provider = "Ollama",
knowledgeCutoffDate = null
)Use ALL_YOU_CAN_EAT for:
Putting it all together:
import com.embabel.agent.openai.OpenAiCompatibleModelFactory
import com.embabel.agent.openai.StandardOpenAiOptionsConverter
import com.embabel.common.ai.model.PricingModel
import io.micrometer.observation.ObservationRegistry
import org.springframework.http.client.SimpleClientHttpRequestFactory
import org.springframework.beans.factory.ObjectProvider
import org.springframework.retry.support.RetryTemplate
import org.springframework.retry.backoff.ExponentialBackOffPolicy
import org.springframework.retry.policy.SimpleRetryPolicy
import java.time.LocalDate
// Custom HTTP client
val requestFactory = SimpleClientHttpRequestFactory().apply {
setConnectTimeout(10000) // 10 seconds
setReadTimeout(300000) // 5 minutes
}
// Custom retry policy
val retryTemplate = RetryTemplate().apply {
setBackOffPolicy(ExponentialBackOffPolicy().apply {
initialInterval = 1000
multiplier = 2.0
maxInterval = 10000
})
setRetryPolicy(SimpleRetryPolicy(3))
}
// Create factory with custom configuration
val factory = OpenAiCompatibleModelFactory(
baseUrl = "https://api.openai.com",
apiKey = System.getenv("OPENAI_API_KEY"),
completionsPath = "/v1/chat/completions",
embeddingsPath = "/v1/embeddings",
observationRegistry = ObservationRegistry.create(),
requestFactory = ObjectProvider.of(requestFactory)
)
// Create service with custom options
val service = factory.openAiCompatibleLlm(
model = "gpt-4-turbo",
pricingModel = PricingModel.usdPer1MTokens(10.0, 30.0),
provider = "OpenAI",
knowledgeCutoffDate = LocalDate.of(2023, 12, 1),
optionsConverter = StandardOpenAiOptionsConverter,
retryTemplate = retryTemplate
)Install with Tessl CLI
npx tessl i tessl/maven-com-embabel-agent--embabel-agent-openai@0.3.0