Core model interfaces and abstractions for Spring AI framework providing portable API for chat, embeddings, images, audio, and tool calling across multiple AI providers
Comprehensive metadata structures for tracking token usage, rate limits, model information, finish reasons, and provider-specific details across all model types.
All chat metadata classes are located in the org.springframework.ai.chat.metadata package.
Required imports:
import org.springframework.ai.chat.metadata.ChatGenerationMetadata;
import org.springframework.ai.chat.metadata.ChatResponseMetadata;
import org.springframework.ai.chat.metadata.Usage;
import org.springframework.ai.chat.metadata.RateLimit;Metadata for chat responses.
public class ChatResponseMetadata extends AbstractResponseMetadata implements ResponseMetadata {
/**
* Get the unique ID for this response.
*
* @return the response ID
*/
String getId();
/**
* Get the model name used for this response.
*
* @return the model name
*/
String getModel();
/**
* Get rate limit information.
*
* @return the rate limit details
*/
RateLimit getRateLimit();
/**
* Get token usage information.
*
* @return the usage details
*/
Usage getUsage();
/**
* Get prompt metadata.
*
* @return the prompt metadata
*/
PromptMetadata getPromptMetadata();
/**
* Create a new metadata builder.
*
* @return a new builder
*/
static Builder builder();
/**
* Builder for constructing ChatResponseMetadata instances.
*/
public static final class Builder {
Builder metadata(Map<String, Object> mapToCopy);
Builder keyValue(String key, Object value);
Builder id(String id);
Builder model(String model);
Builder rateLimit(RateLimit rateLimit);
Builder usage(Usage usage);
Builder promptMetadata(PromptMetadata promptMetadata);
ChatResponseMetadata build();
}
}Metadata for individual chat generations.
public interface ChatGenerationMetadata extends ResultMetadata {
/**
* Empty metadata constant.
*/
ChatGenerationMetadata NULL = builder().build();
/**
* Get the finish reason for this generation.
* Common values: "stop", "length", "tool_calls", "content_filter"
*
* @return the finish reason
*/
String getFinishReason();
/**
* Get content filters applied to this generation.
*
* @return set of content filter names
*/
Set<String> getContentFilters();
/**
* Get metadata value by key.
*
* @param key the metadata key
* @param <T> the value type
* @return the metadata value
*/
<T> T get(String key);
/**
* Check if metadata contains a key.
*
* @param key the metadata key
* @return true if key exists
*/
boolean containsKey(String key);
/**
* Get metadata value with default.
*
* @param key the metadata key
* @param defaultObject the default value
* @param <T> the value type
* @return the metadata value or default
*/
<T> T getOrDefault(String key, T defaultObject);
/**
* Get all metadata entries.
*
* @return set of metadata entries
*/
Set<Entry<String, Object>> entrySet();
/**
* Get all metadata keys.
*
* @return set of metadata keys
*/
Set<String> keySet();
/**
* Check if metadata is empty.
*
* @return true if empty
*/
boolean isEmpty();
/**
* Create a new metadata builder.
*
* @return a new builder
*/
static Builder builder();
/**
* Builder for constructing ChatGenerationMetadata instances.
*/
public interface Builder {
/**
* Set the finish reason.
*
* @param finishReason the finish reason
* @return this builder
*/
Builder finishReason(String finishReason);
/**
* Add metadata entry.
*
* @param key the metadata key
* @param value the metadata value
* @param <T> the value type
* @return this builder
*/
<T> Builder metadata(String key, T value);
/**
* Add metadata entries.
*
* @param metadata the metadata map
* @return this builder
*/
Builder metadata(Map<String, Object> metadata);
/**
* Add content filter.
*
* @param contentFilter the content filter name
* @return this builder
*/
Builder contentFilter(String contentFilter);
/**
* Add content filters.
*
* @param contentFilters the content filter names
* @return this builder
*/
Builder contentFilters(Set<String> contentFilters);
/**
* Build the ChatGenerationMetadata instance.
*
* @return the metadata
*/
ChatGenerationMetadata build();
}
}Default implementation of ChatGenerationMetadata.
public class DefaultChatGenerationMetadata implements ChatGenerationMetadata {
// Default implementation
}Token usage information.
public interface Usage {
/**
* Get the number of tokens used in the prompt.
*
* @return prompt token count
*/
Integer getPromptTokens();
/**
* Get the number of tokens generated in the completion.
*
* @return completion token count
*/
Integer getCompletionTokens();
/**
* Get the total number of tokens used (prompt + completion).
*
* @return total token count
*/
Integer getTotalTokens();
/**
* Get provider-specific native usage information.
*
* @return the native usage object
*/
Object getNativeUsage();
}Default implementation of Usage.
public class DefaultUsage implements Usage {
/**
* Construct a DefaultUsage.
*
* @param promptTokens the prompt token count
* @param completionTokens the completion token count
*/
public DefaultUsage(Integer promptTokens, Integer completionTokens);
/**
* Construct a DefaultUsage with native usage.
*
* @param promptTokens the prompt token count
* @param completionTokens the completion token count
* @param nativeUsage the provider-specific usage
*/
public DefaultUsage(Integer promptTokens, Integer completionTokens, Object nativeUsage);
@Override
public Integer getPromptTokens();
@Override
public Integer getCompletionTokens();
@Override
public Integer getTotalTokens();
@Override
public Object getNativeUsage();
}Empty usage implementation with all nulls.
public class EmptyUsage implements Usage {
@Override
public Integer getPromptTokens() {
return null;
}
@Override
public Integer getCompletionTokens() {
return null;
}
@Override
public Integer getTotalTokens() {
return null;
}
@Override
public Object getNativeUsage() {
return null;
}
}API rate limit information.
public interface RateLimit {
/**
* Get the maximum number of requests allowed in the time window.
*
* @return the requests limit
*/
Long getRequestsLimit();
/**
* Get the number of requests remaining in the current window.
*
* @return the remaining requests
*/
Long getRequestsRemaining();
/**
* Get the time until the request limit resets.
*
* @return the reset duration
*/
Duration getRequestsReset();
/**
* Get the maximum number of tokens allowed in the time window.
*
* @return the tokens limit
*/
Long getTokensLimit();
/**
* Get the number of tokens remaining in the current window.
*
* @return the remaining tokens
*/
Long getTokensRemaining();
/**
* Get the time until the token limit resets.
*
* @return the reset duration
*/
Duration getTokensReset();
}Empty rate limit implementation with all nulls.
public class EmptyRateLimit implements RateLimit {
@Override
public Long getRequestsLimit() {
return null;
}
@Override
public Long getRequestsRemaining() {
return null;
}
@Override
public Duration getRequestsReset() {
return null;
}
@Override
public Long getTokensLimit() {
return null;
}
@Override
public Long getTokensRemaining() {
return null;
}
@Override
public Duration getTokensReset() {
return null;
}
}Metadata for prompts with filter information.
public interface PromptMetadata extends Iterable<PromptFilterMetadata> {
/**
* Metadata for prompt filters.
*/
interface PromptFilterMetadata {
// Filter-specific metadata
}
}ChatResponse response = chatModel.call(prompt);
// Get metadata
ChatResponseMetadata metadata = response.getMetadata();
// Model information
String modelName = metadata.getModel();
String responseId = metadata.getId();
System.out.println("Model: " + modelName);
System.out.println("Response ID: " + responseId);ChatResponse response = chatModel.call(prompt);
// Get usage information
Usage usage = response.getMetadata().getUsage();
Integer promptTokens = usage.getPromptTokens();
Integer completionTokens = usage.getCompletionTokens();
Integer totalTokens = usage.getTotalTokens();
System.out.println("Prompt tokens: " + promptTokens);
System.out.println("Completion tokens: " + completionTokens);
System.out.println("Total tokens: " + totalTokens);
// Calculate cost (example rates)
double promptCostPer1k = 0.03; // $0.03 per 1k tokens
double completionCostPer1k = 0.06; // $0.06 per 1k tokens
double cost = (promptTokens / 1000.0) * promptCostPer1k +
(completionTokens / 1000.0) * completionCostPer1k;
System.out.println("Estimated cost: $" + String.format("%.4f", cost));ChatResponse response = chatModel.call(prompt);
// Get rate limit info
RateLimit rateLimit = response.getMetadata().getRateLimit();
if (rateLimit != null) {
Long requestsRemaining = rateLimit.getRequestsRemaining();
Long tokensRemaining = rateLimit.getTokensRemaining();
Duration requestsReset = rateLimit.getRequestsReset();
System.out.println("Requests remaining: " + requestsRemaining);
System.out.println("Tokens remaining: " + tokensRemaining);
System.out.println("Reset in: " + requestsReset.toMinutes() + " minutes");
// Check if approaching limits
if (requestsRemaining != null && requestsRemaining < 10) {
System.out.println("WARNING: Approaching rate limit!");
}
}ChatResponse response = chatModel.call(prompt);
// Get finish reason from generation metadata
ChatGenerationMetadata genMetadata = response.getResult().getMetadata();
String finishReason = genMetadata.getFinishReason();
switch (finishReason) {
case "stop":
System.out.println("Completed normally");
break;
case "length":
System.out.println("Stopped due to max token limit");
break;
case "tool_calls":
System.out.println("Stopped to make tool calls");
break;
case "content_filter":
System.out.println("Stopped by content filter");
break;
default:
System.out.println("Unknown finish reason: " + finishReason);
}@Service
public class RateLimitAwareChatService {
private final ChatModel chatModel;
public String chatWithRetry(String message) throws InterruptedException {
int maxRetries = 3;
int attempt = 0;
while (attempt < maxRetries) {
try {
ChatResponse response = chatModel.call(new Prompt(message));
// Check rate limits
RateLimit rateLimit = response.getMetadata().getRateLimit();
if (rateLimit != null) {
Long remaining = rateLimit.getRequestsRemaining();
if (remaining != null && remaining < 5) {
// Proactively slow down
Thread.sleep(1000);
}
}
return response.getResult().getOutput().getText();
} catch (Exception e) {
attempt++;
if (attempt >= maxRetries) {
throw e;
}
// Exponential backoff
Thread.sleep((long) Math.pow(2, attempt) * 1000);
}
}
throw new RuntimeException("Max retries exceeded");
}
}@Service
public class UsageTrackingService {
private final AtomicLong totalPromptTokens = new AtomicLong(0);
private final AtomicLong totalCompletionTokens = new AtomicLong(0);
public String trackAndCall(ChatModel chatModel, Prompt prompt) {
ChatResponse response = chatModel.call(prompt);
// Track usage
Usage usage = response.getMetadata().getUsage();
totalPromptTokens.addAndGet(usage.getPromptTokens());
totalCompletionTokens.addAndGet(usage.getCompletionTokens());
return response.getResult().getOutput().getText();
}
public long getTotalTokens() {
return totalPromptTokens.get() + totalCompletionTokens.get();
}
public double getEstimatedCost(double promptCostPer1k, double completionCostPer1k) {
return (totalPromptTokens.get() / 1000.0) * promptCostPer1k +
(totalCompletionTokens.get() / 1000.0) * completionCostPer1k;
}
}ChatGenerationMetadata customMetadata = ChatGenerationMetadata.builder()
.finishReason("stop")
.build();
Generation generation = new Generation(
new AssistantMessage("Response text"),
customMetadata
);ChatResponse response = chatModel.call(prompt);
ChatResponseMetadata metadata = response.getMetadata();
// Get all metadata keys
Set<String> keys = metadata.keySet();
System.out.println("Available metadata keys: " + keys);
// Access custom metadata
for (String key : keys) {
Object value = metadata.get(key);
System.out.println(key + ": " + value);
}
// Check for specific keys
if (metadata.containsKey("provider")) {
String provider = metadata.get("provider");
System.out.println("Provider: " + provider);
}Flux<ChatResponse> stream = chatModel.stream(prompt);
stream.subscribe(
chunk -> {
// Metadata available in each chunk
ChatResponseMetadata metadata = chunk.getMetadata();
// Usually only final chunk has complete metadata
if (metadata != null && metadata.getUsage() != null) {
System.out.println("Total tokens: " +
metadata.getUsage().getTotalTokens());
}
}
);ChatResponse response = chatModel.call(prompt);
Usage usage = response.getMetadata().getUsage();
// Get provider-specific usage details
Object nativeUsage = usage.getNativeUsage();
if (nativeUsage instanceof OpenAiUsage openAiUsage) {
// Access OpenAI-specific fields
int cachedTokens = openAiUsage.getCachedTokens();
System.out.println("Cached tokens: " + cachedTokens);
}@Service
public class MetadataLoggingService {
private static final Logger log = LoggerFactory.getLogger(
MetadataLoggingService.class
);
public String chatWithLogging(ChatModel chatModel, Prompt prompt) {
ChatResponse response = chatModel.call(prompt);
// Log metadata
ChatResponseMetadata metadata = response.getMetadata();
log.info("Chat completed - Model: {}, ID: {}, Tokens: {}",
metadata.getModel(),
metadata.getId(),
metadata.getUsage().getTotalTokens()
);
// Log rate limits if available
RateLimit rateLimit = metadata.getRateLimit();
if (rateLimit != null && rateLimit.getRequestsRemaining() != null) {
log.debug("Rate limit - Requests remaining: {}",
rateLimit.getRequestsRemaining()
);
}
return response.getResult().getOutput().getText();
}
}@Service
public class ComprehensiveMetadataService {
public void analyzeResponse(ChatResponse response) {
// Response metadata
ChatResponseMetadata respMeta = response.getMetadata();
System.out.println("=== Response Metadata ===");
System.out.println("ID: " + respMeta.getId());
System.out.println("Model: " + respMeta.getModel());
// Usage
Usage usage = respMeta.getUsage();
if (usage != null) {
System.out.println("\n=== Usage ===");
System.out.println("Prompt tokens: " + usage.getPromptTokens());
System.out.println("Completion tokens: " + usage.getCompletionTokens());
System.out.println("Total tokens: " + usage.getTotalTokens());
}
// Rate limits
RateLimit rateLimit = respMeta.getRateLimit();
if (rateLimit != null) {
System.out.println("\n=== Rate Limits ===");
System.out.println("Requests remaining: " + rateLimit.getRequestsRemaining());
System.out.println("Tokens remaining: " + rateLimit.getTokensRemaining());
}
// Generation metadata
ChatGenerationMetadata genMeta = response.getResult().getMetadata();
System.out.println("\n=== Generation Metadata ===");
System.out.println("Finish reason: " + genMeta.getFinishReason());
// Prompt metadata
PromptMetadata promptMeta = respMeta.getPromptMetadata();
if (promptMeta != null) {
System.out.println("\n=== Prompt Metadata ===");
// Access prompt filter metadata
}
}
}