Core runtime module for Quarkus LangChain4j integration with declarative AI services, guardrails, and observability
The cost estimation framework provides custom strategies for estimating AI model API call costs. This experimental feature enables tracking, budgeting, and billing for AI service usage.
Status: Experimental - API subject to change
import io.quarkiverse.langchain4j.cost.Cost;
import io.quarkiverse.langchain4j.cost.CostEstimator;
import io.quarkiverse.langchain4j.cost.CostEstimatorService;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import java.math.BigDecimal;package io.quarkiverse.langchain4j.cost;
import java.math.BigDecimal;
public record Cost(
BigDecimal number,
String currencyCode
) {
public static Cost of(BigDecimal number, String currencyCode);
public String toString();
}Represents a monetary cost with currency.
public static Cost of(BigDecimal number, String currencyCode)Creates a Cost instance with defaults:
number: Defaults to 0 if nullcurrencyCode: Defaults to "UNKNOWN" if nullpublic String toString()Returns formatted string: "{number}{currencyCode}" (e.g., "0.05USD")
package io.quarkiverse.langchain4j.cost;
import java.math.BigDecimal;
public interface CostEstimator {
boolean supports(SupportsContext context);
CostResult estimate(CostContext context);
interface SupportsContext {
String model();
}
interface CostContext extends SupportsContext {
Integer inputTokens();
Integer outputTokens();
}
record CostResult(
BigDecimal inputTokensCost,
BigDecimal outputTokensCost,
String currency
) {}
}boolean supports(SupportsContext context);Checks if this estimator applies to the given model.
Parameters:
context: Context containing model nameReturns:
true if this estimator can estimate costs for the modelfalse otherwiseCostResult estimate(CostContext context);Estimates the cost of an API call.
Parameters:
context: Context containing model name, input tokens, and output tokensReturns:
public interface SupportsContext {
String model();
}Context for checking estimator support.
public interface CostContext extends SupportsContext {
Integer inputTokens();
Integer outputTokens();
}Complete context for cost estimation including token counts.
public record CostResult(
BigDecimal inputTokensCost,
BigDecimal outputTokensCost,
String currency
) {}Result of cost estimation containing separate costs for input and output tokens.
package io.quarkiverse.langchain4j.cost;
import jakarta.inject.Singleton;
import java.util.List;
@Singleton
public class CostEstimatorService {
public CostEstimatorService(List<CostEstimator> costEstimators);
public Cost estimate(ChatModelResponseContext response);
}Service for determining API request costs using registered CostEstimators.
public CostEstimatorService(List<CostEstimator> costEstimators)Automatically injects all CDI CostEstimator beans.
public Cost estimate(ChatModelResponseContext response)Estimates cost from a chat model response.
Parameters:
response: Response context from ChatModelListenerReturns:
import io.quarkiverse.langchain4j.cost.CostEstimator;
import jakarta.enterprise.context.ApplicationScoped;
import java.math.BigDecimal;
@ApplicationScoped
public class OpenAICostEstimator implements CostEstimator {
// Pricing as of example date (check current pricing)
private static final BigDecimal GPT_4_INPUT_COST = new BigDecimal("0.03"); // per 1K tokens
private static final BigDecimal GPT_4_OUTPUT_COST = new BigDecimal("0.06"); // per 1K tokens
@Override
public boolean supports(SupportsContext context) {
return context.model().startsWith("gpt-4");
}
@Override
public CostResult estimate(CostContext context) {
BigDecimal inputCost = calculateCost(context.inputTokens(), GPT_4_INPUT_COST);
BigDecimal outputCost = calculateCost(context.outputTokens(), GPT_4_OUTPUT_COST);
return new CostResult(inputCost, outputCost, "USD");
}
private BigDecimal calculateCost(Integer tokens, BigDecimal costPer1K) {
if (tokens == null || tokens == 0) {
return BigDecimal.ZERO;
}
return costPer1K.multiply(new BigDecimal(tokens))
.divide(new BigDecimal(1000), 6, BigDecimal.ROUND_HALF_UP);
}
}@ApplicationScoped
public class MultiModelCostEstimator implements CostEstimator {
private static final Map<String, ModelPricing> PRICING = Map.of(
"gpt-4-turbo", new ModelPricing("0.01", "0.03", "USD"),
"gpt-4", new ModelPricing("0.03", "0.06", "USD"),
"gpt-3.5-turbo", new ModelPricing("0.0005", "0.0015", "USD"),
"claude-3-opus", new ModelPricing("0.015", "0.075", "USD"),
"claude-3-sonnet", new ModelPricing("0.003", "0.015", "USD")
);
@Override
public boolean supports(SupportsContext context) {
return PRICING.containsKey(context.model());
}
@Override
public CostResult estimate(CostContext context) {
ModelPricing pricing = PRICING.get(context.model());
if (pricing == null) {
return null;
}
BigDecimal inputCost = calculateCost(
context.inputTokens(),
pricing.inputCostPer1K
);
BigDecimal outputCost = calculateCost(
context.outputTokens(),
pricing.outputCostPer1K
);
return new CostResult(inputCost, outputCost, pricing.currency);
}
private BigDecimal calculateCost(Integer tokens, BigDecimal costPer1K) {
if (tokens == null || tokens == 0) {
return BigDecimal.ZERO;
}
return new BigDecimal(tokens)
.multiply(costPer1K)
.divide(new BigDecimal(1000), 6, BigDecimal.ROUND_HALF_UP);
}
private record ModelPricing(
BigDecimal inputCostPer1K,
BigDecimal outputCostPer1K,
String currency
) {
ModelPricing(String input, String output, String currency) {
this(new BigDecimal(input), new BigDecimal(output), currency);
}
}
}import io.quarkiverse.langchain4j.cost.Cost;
import io.quarkiverse.langchain4j.cost.CostEstimatorService;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
@ApplicationScoped
public class UsageTracker {
@Inject
CostEstimatorService costEstimator;
public void trackUsage(ChatModelResponseContext response, String userId) {
Cost cost = costEstimator.estimate(response);
if (cost != null) {
recordUsage(userId, cost);
log.infof("Cost for user %s: %s", userId, cost);
}
}
private void recordUsage(String userId, Cost cost) {
// Store in database, update metrics, etc.
}
}@ApplicationScoped
public class BudgetManager {
@Inject
CostEstimatorService costEstimator;
private final Map<String, BigDecimal> userBudgets = new ConcurrentHashMap<>();
private final Map<String, BigDecimal> userSpending = new ConcurrentHashMap<>();
public void setUserBudget(String userId, BigDecimal budget) {
userBudgets.put(userId, budget);
}
public boolean canAfford(String userId, ChatModelResponseContext response) {
Cost cost = costEstimator.estimate(response);
if (cost == null) {
return true; // No cost estimate available
}
BigDecimal budget = userBudgets.getOrDefault(userId, BigDecimal.ZERO);
BigDecimal spent = userSpending.getOrDefault(userId, BigDecimal.ZERO);
BigDecimal remaining = budget.subtract(spent);
return remaining.compareTo(cost.number()) >= 0;
}
public void recordSpending(String userId, ChatModelResponseContext response) {
Cost cost = costEstimator.estimate(response);
if (cost != null) {
userSpending.merge(userId, cost.number(), BigDecimal::add);
}
}
public BigDecimal getRemainingBudget(String userId) {
BigDecimal budget = userBudgets.getOrDefault(userId, BigDecimal.ZERO);
BigDecimal spent = userSpending.getOrDefault(userId, BigDecimal.ZERO);
return budget.subtract(spent);
}
}import dev.langchain4j.observability.api.event.AiServiceCompletedEvent;
import jakarta.enterprise.event.Observes;
import jakarta.enterprise.context.ApplicationScoped;
@ApplicationScoped
public class CostTracker {
@Inject
CostEstimatorService costEstimator;
@Inject
BillingService billing;
void trackCost(@Observes AiServiceCompletedEvent event) {
ChatModelResponseContext response = event.getResponseContext();
Cost cost = costEstimator.estimate(response);
if (cost != null) {
String userId = getUserId(event);
billing.recordCharge(userId, cost);
log.infof("Tracked cost for user %s: %s", userId, cost);
}
}
}@ApplicationScoped
public class DynamicPricingEstimator implements CostEstimator {
@Inject
PricingService pricingService;
@Override
public boolean supports(SupportsContext context) {
return context.model().startsWith("gpt-");
}
@Override
public CostResult estimate(CostContext context) {
// Fetch current pricing from external service
ModelPricing pricing = pricingService.getCurrentPricing(context.model());
BigDecimal inputCost = calculateCost(
context.inputTokens(),
pricing.getInputPrice()
);
BigDecimal outputCost = calculateCost(
context.outputTokens(),
pricing.getOutputPrice()
);
return new CostResult(inputCost, outputCost, pricing.getCurrency());
}
private BigDecimal calculateCost(Integer tokens, BigDecimal pricePerToken) {
if (tokens == null || tokens == 0) {
return BigDecimal.ZERO;
}
return pricePerToken.multiply(new BigDecimal(tokens));
}
}@ApplicationScoped
public class CostReporter {
@Inject
CostEstimatorService costEstimator;
private final Map<String, List<Cost>> userCosts = new ConcurrentHashMap<>();
public void recordCost(String userId, ChatModelResponseContext response) {
Cost cost = costEstimator.estimate(response);
if (cost != null) {
userCosts.computeIfAbsent(userId, k -> new CopyOnWriteArrayList<>())
.add(cost);
}
}
public CostReport generateReport(String userId) {
List<Cost> costs = userCosts.getOrDefault(userId, List.of());
BigDecimal total = costs.stream()
.map(Cost::number)
.reduce(BigDecimal.ZERO, BigDecimal::add);
return new CostReport(userId, costs.size(), total, "USD");
}
public record CostReport(
String userId,
int requestCount,
BigDecimal totalCost,
String currency
) {}
}@ApplicationScoped
public class CostManagementSystem {
@Inject
CostEstimatorService costEstimator;
@Inject
MeterRegistry metrics;
@Inject
BillingDatabase database;
private final Map<String, UserCostContext> userContexts = new ConcurrentHashMap<>();
public void trackAndEnforce(String userId, ChatModelResponseContext response) {
Cost cost = costEstimator.estimate(response);
if (cost == null) {
return;
}
UserCostContext context = userContexts.computeIfAbsent(
userId,
k -> new UserCostContext()
);
synchronized (context) {
// Record cost
context.totalCost = context.totalCost.add(cost.number());
context.requestCount++;
// Update metrics
Counter.builder("ai.service.cost")
.tag("user", userId)
.tag("currency", cost.currencyCode())
.register(metrics)
.increment(cost.number().doubleValue());
// Persist to database
database.recordUsage(userId, cost, Instant.now());
// Check budget
if (context.totalCost.compareTo(context.budget) > 0) {
log.warnf("User %s exceeded budget: spent=%s, budget=%s",
userId, context.totalCost, context.budget);
// Trigger alert
alertService.sendBudgetExceededAlert(userId, context.totalCost);
}
}
}
public void setUserBudget(String userId, BigDecimal budget) {
UserCostContext context = userContexts.computeIfAbsent(
userId,
k -> new UserCostContext()
);
context.budget = budget;
}
public BigDecimal getRemainingBudget(String userId) {
UserCostContext context = userContexts.get(userId);
if (context == null) {
return BigDecimal.ZERO;
}
return context.budget.subtract(context.totalCost);
}
private static class UserCostContext {
BigDecimal totalCost = BigDecimal.ZERO;
BigDecimal budget = new BigDecimal("100.00"); // Default budget
int requestCount = 0;
}
}Install with Tessl CLI
npx tessl i tessl/maven-io-quarkiverse-langchain4j--quarkus-langchain4j-core