CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-model

Core model interfaces and abstractions for Spring AI framework providing portable API for chat, embeddings, images, audio, and tool calling across multiple AI providers

Overview
Eval results
Files

metadata.mddocs/reference/

Metadata and Usage Tracking

Comprehensive metadata structures for tracking token usage, rate limits, model information, finish reasons, and provider-specific details across all model types.

Package Information

All chat metadata classes are located in the org.springframework.ai.chat.metadata package.

Required imports:

import org.springframework.ai.chat.metadata.ChatGenerationMetadata;
import org.springframework.ai.chat.metadata.ChatResponseMetadata;
import org.springframework.ai.chat.metadata.Usage;
import org.springframework.ai.chat.metadata.RateLimit;

Capabilities

Chat Response Metadata

ChatResponseMetadata

Metadata for chat responses.

public class ChatResponseMetadata extends AbstractResponseMetadata implements ResponseMetadata {
    /**
     * Get the unique ID for this response.
     *
     * @return the response ID
     */
    String getId();

    /**
     * Get the model name used for this response.
     *
     * @return the model name
     */
    String getModel();

    /**
     * Get rate limit information.
     *
     * @return the rate limit details
     */
    RateLimit getRateLimit();

    /**
     * Get token usage information.
     *
     * @return the usage details
     */
    Usage getUsage();

    /**
     * Get prompt metadata.
     *
     * @return the prompt metadata
     */
    PromptMetadata getPromptMetadata();

    /**
     * Create a new metadata builder.
     *
     * @return a new builder
     */
    static Builder builder();

    /**
     * Builder for constructing ChatResponseMetadata instances.
     */
    public static final class Builder {
        Builder metadata(Map<String, Object> mapToCopy);
        Builder keyValue(String key, Object value);
        Builder id(String id);
        Builder model(String model);
        Builder rateLimit(RateLimit rateLimit);
        Builder usage(Usage usage);
        Builder promptMetadata(PromptMetadata promptMetadata);
        ChatResponseMetadata build();
    }
}

ChatGenerationMetadata Interface

Metadata for individual chat generations.

public interface ChatGenerationMetadata extends ResultMetadata {
    /**
     * Empty metadata constant.
     */
    ChatGenerationMetadata NULL = builder().build();

    /**
     * Get the finish reason for this generation.
     * Common values: "stop", "length", "tool_calls", "content_filter"
     *
     * @return the finish reason
     */
    String getFinishReason();

    /**
     * Get content filters applied to this generation.
     *
     * @return set of content filter names
     */
    Set<String> getContentFilters();

    /**
     * Get metadata value by key.
     *
     * @param key the metadata key
     * @param <T> the value type
     * @return the metadata value
     */
    <T> T get(String key);

    /**
     * Check if metadata contains a key.
     *
     * @param key the metadata key
     * @return true if key exists
     */
    boolean containsKey(String key);

    /**
     * Get metadata value with default.
     *
     * @param key the metadata key
     * @param defaultObject the default value
     * @param <T> the value type
     * @return the metadata value or default
     */
    <T> T getOrDefault(String key, T defaultObject);

    /**
     * Get all metadata entries.
     *
     * @return set of metadata entries
     */
    Set<Entry<String, Object>> entrySet();

    /**
     * Get all metadata keys.
     *
     * @return set of metadata keys
     */
    Set<String> keySet();

    /**
     * Check if metadata is empty.
     *
     * @return true if empty
     */
    boolean isEmpty();

    /**
     * Create a new metadata builder.
     *
     * @return a new builder
     */
    static Builder builder();

    /**
     * Builder for constructing ChatGenerationMetadata instances.
     */
    public interface Builder {
        /**
         * Set the finish reason.
         *
         * @param finishReason the finish reason
         * @return this builder
         */
        Builder finishReason(String finishReason);

        /**
         * Add metadata entry.
         *
         * @param key the metadata key
         * @param value the metadata value
         * @param <T> the value type
         * @return this builder
         */
        <T> Builder metadata(String key, T value);

        /**
         * Add metadata entries.
         *
         * @param metadata the metadata map
         * @return this builder
         */
        Builder metadata(Map<String, Object> metadata);

        /**
         * Add content filter.
         *
         * @param contentFilter the content filter name
         * @return this builder
         */
        Builder contentFilter(String contentFilter);

        /**
         * Add content filters.
         *
         * @param contentFilters the content filter names
         * @return this builder
         */
        Builder contentFilters(Set<String> contentFilters);

        /**
         * Build the ChatGenerationMetadata instance.
         *
         * @return the metadata
         */
        ChatGenerationMetadata build();
    }
}

DefaultChatGenerationMetadata

Default implementation of ChatGenerationMetadata.

public class DefaultChatGenerationMetadata implements ChatGenerationMetadata {
    // Default implementation
}

Usage Information

Usage Interface

Token usage information.

public interface Usage {
    /**
     * Get the number of tokens used in the prompt.
     *
     * @return prompt token count
     */
    Integer getPromptTokens();

    /**
     * Get the number of tokens generated in the completion.
     *
     * @return completion token count
     */
    Integer getCompletionTokens();

    /**
     * Get the total number of tokens used (prompt + completion).
     *
     * @return total token count
     */
    Integer getTotalTokens();

    /**
     * Get provider-specific native usage information.
     *
     * @return the native usage object
     */
    Object getNativeUsage();
}

DefaultUsage

Default implementation of Usage.

public class DefaultUsage implements Usage {
    /**
     * Construct a DefaultUsage.
     *
     * @param promptTokens the prompt token count
     * @param completionTokens the completion token count
     */
    public DefaultUsage(Integer promptTokens, Integer completionTokens);

    /**
     * Construct a DefaultUsage with native usage.
     *
     * @param promptTokens the prompt token count
     * @param completionTokens the completion token count
     * @param nativeUsage the provider-specific usage
     */
    public DefaultUsage(Integer promptTokens, Integer completionTokens, Object nativeUsage);

    @Override
    public Integer getPromptTokens();

    @Override
    public Integer getCompletionTokens();

    @Override
    public Integer getTotalTokens();

    @Override
    public Object getNativeUsage();
}

EmptyUsage

Empty usage implementation with all nulls.

public class EmptyUsage implements Usage {
    @Override
    public Integer getPromptTokens() {
        return null;
    }

    @Override
    public Integer getCompletionTokens() {
        return null;
    }

    @Override
    public Integer getTotalTokens() {
        return null;
    }

    @Override
    public Object getNativeUsage() {
        return null;
    }
}

Rate Limit Information

RateLimit Interface

API rate limit information.

public interface RateLimit {
    /**
     * Get the maximum number of requests allowed in the time window.
     *
     * @return the requests limit
     */
    Long getRequestsLimit();

    /**
     * Get the number of requests remaining in the current window.
     *
     * @return the remaining requests
     */
    Long getRequestsRemaining();

    /**
     * Get the time until the request limit resets.
     *
     * @return the reset duration
     */
    Duration getRequestsReset();

    /**
     * Get the maximum number of tokens allowed in the time window.
     *
     * @return the tokens limit
     */
    Long getTokensLimit();

    /**
     * Get the number of tokens remaining in the current window.
     *
     * @return the remaining tokens
     */
    Long getTokensRemaining();

    /**
     * Get the time until the token limit resets.
     *
     * @return the reset duration
     */
    Duration getTokensReset();
}

EmptyRateLimit

Empty rate limit implementation with all nulls.

public class EmptyRateLimit implements RateLimit {
    @Override
    public Long getRequestsLimit() {
        return null;
    }

    @Override
    public Long getRequestsRemaining() {
        return null;
    }

    @Override
    public Duration getRequestsReset() {
        return null;
    }

    @Override
    public Long getTokensLimit() {
        return null;
    }

    @Override
    public Long getTokensRemaining() {
        return null;
    }

    @Override
    public Duration getTokensReset() {
        return null;
    }
}

Prompt Metadata

PromptMetadata Interface

Metadata for prompts with filter information.

public interface PromptMetadata extends Iterable<PromptFilterMetadata> {
    /**
     * Metadata for prompt filters.
     */
    interface PromptFilterMetadata {
        // Filter-specific metadata
    }
}

Usage Examples

Accessing Response Metadata

ChatResponse response = chatModel.call(prompt);

// Get metadata
ChatResponseMetadata metadata = response.getMetadata();

// Model information
String modelName = metadata.getModel();
String responseId = metadata.getId();

System.out.println("Model: " + modelName);
System.out.println("Response ID: " + responseId);

Token Usage Tracking

ChatResponse response = chatModel.call(prompt);

// Get usage information
Usage usage = response.getMetadata().getUsage();

Integer promptTokens = usage.getPromptTokens();
Integer completionTokens = usage.getCompletionTokens();
Integer totalTokens = usage.getTotalTokens();

System.out.println("Prompt tokens: " + promptTokens);
System.out.println("Completion tokens: " + completionTokens);
System.out.println("Total tokens: " + totalTokens);

// Calculate cost (example rates)
double promptCostPer1k = 0.03;   // $0.03 per 1k tokens
double completionCostPer1k = 0.06;  // $0.06 per 1k tokens

double cost = (promptTokens / 1000.0) * promptCostPer1k +
              (completionTokens / 1000.0) * completionCostPer1k;

System.out.println("Estimated cost: $" + String.format("%.4f", cost));

Rate Limit Monitoring

ChatResponse response = chatModel.call(prompt);

// Get rate limit info
RateLimit rateLimit = response.getMetadata().getRateLimit();

if (rateLimit != null) {
    Long requestsRemaining = rateLimit.getRequestsRemaining();
    Long tokensRemaining = rateLimit.getTokensRemaining();
    Duration requestsReset = rateLimit.getRequestsReset();

    System.out.println("Requests remaining: " + requestsRemaining);
    System.out.println("Tokens remaining: " + tokensRemaining);
    System.out.println("Reset in: " + requestsReset.toMinutes() + " minutes");

    // Check if approaching limits
    if (requestsRemaining != null && requestsRemaining < 10) {
        System.out.println("WARNING: Approaching rate limit!");
    }
}

Finish Reason Analysis

ChatResponse response = chatModel.call(prompt);

// Get finish reason from generation metadata
ChatGenerationMetadata genMetadata = response.getResult().getMetadata();
String finishReason = genMetadata.getFinishReason();

switch (finishReason) {
    case "stop":
        System.out.println("Completed normally");
        break;
    case "length":
        System.out.println("Stopped due to max token limit");
        break;
    case "tool_calls":
        System.out.println("Stopped to make tool calls");
        break;
    case "content_filter":
        System.out.println("Stopped by content filter");
        break;
    default:
        System.out.println("Unknown finish reason: " + finishReason);
}

Metadata-Based Retry Logic

@Service
public class RateLimitAwareChatService {
    private final ChatModel chatModel;

    public String chatWithRetry(String message) throws InterruptedException {
        int maxRetries = 3;
        int attempt = 0;

        while (attempt < maxRetries) {
            try {
                ChatResponse response = chatModel.call(new Prompt(message));

                // Check rate limits
                RateLimit rateLimit = response.getMetadata().getRateLimit();
                if (rateLimit != null) {
                    Long remaining = rateLimit.getRequestsRemaining();
                    if (remaining != null && remaining < 5) {
                        // Proactively slow down
                        Thread.sleep(1000);
                    }
                }

                return response.getResult().getOutput().getText();
            } catch (Exception e) {
                attempt++;
                if (attempt >= maxRetries) {
                    throw e;
                }
                // Exponential backoff
                Thread.sleep((long) Math.pow(2, attempt) * 1000);
            }
        }

        throw new RuntimeException("Max retries exceeded");
    }
}

Usage Aggregation

@Service
public class UsageTrackingService {
    private final AtomicLong totalPromptTokens = new AtomicLong(0);
    private final AtomicLong totalCompletionTokens = new AtomicLong(0);

    public String trackAndCall(ChatModel chatModel, Prompt prompt) {
        ChatResponse response = chatModel.call(prompt);

        // Track usage
        Usage usage = response.getMetadata().getUsage();
        totalPromptTokens.addAndGet(usage.getPromptTokens());
        totalCompletionTokens.addAndGet(usage.getCompletionTokens());

        return response.getResult().getOutput().getText();
    }

    public long getTotalTokens() {
        return totalPromptTokens.get() + totalCompletionTokens.get();
    }

    public double getEstimatedCost(double promptCostPer1k, double completionCostPer1k) {
        return (totalPromptTokens.get() / 1000.0) * promptCostPer1k +
               (totalCompletionTokens.get() / 1000.0) * completionCostPer1k;
    }
}

Building Custom Metadata

ChatGenerationMetadata customMetadata = ChatGenerationMetadata.builder()
    .finishReason("stop")
    .build();

Generation generation = new Generation(
    new AssistantMessage("Response text"),
    customMetadata
);

Accessing All Metadata Keys

ChatResponse response = chatModel.call(prompt);
ChatResponseMetadata metadata = response.getMetadata();

// Get all metadata keys
Set<String> keys = metadata.keySet();
System.out.println("Available metadata keys: " + keys);

// Access custom metadata
for (String key : keys) {
    Object value = metadata.get(key);
    System.out.println(key + ": " + value);
}

// Check for specific keys
if (metadata.containsKey("provider")) {
    String provider = metadata.get("provider");
    System.out.println("Provider: " + provider);
}

Streaming Response Metadata

Flux<ChatResponse> stream = chatModel.stream(prompt);

stream.subscribe(
    chunk -> {
        // Metadata available in each chunk
        ChatResponseMetadata metadata = chunk.getMetadata();

        // Usually only final chunk has complete metadata
        if (metadata != null && metadata.getUsage() != null) {
            System.out.println("Total tokens: " +
                metadata.getUsage().getTotalTokens());
        }
    }
);

Native Usage Information

ChatResponse response = chatModel.call(prompt);
Usage usage = response.getMetadata().getUsage();

// Get provider-specific usage details
Object nativeUsage = usage.getNativeUsage();

if (nativeUsage instanceof OpenAiUsage openAiUsage) {
    // Access OpenAI-specific fields
    int cachedTokens = openAiUsage.getCachedTokens();
    System.out.println("Cached tokens: " + cachedTokens);
}

Logging Metadata

@Service
public class MetadataLoggingService {
    private static final Logger log = LoggerFactory.getLogger(
        MetadataLoggingService.class
    );

    public String chatWithLogging(ChatModel chatModel, Prompt prompt) {
        ChatResponse response = chatModel.call(prompt);

        // Log metadata
        ChatResponseMetadata metadata = response.getMetadata();
        log.info("Chat completed - Model: {}, ID: {}, Tokens: {}",
            metadata.getModel(),
            metadata.getId(),
            metadata.getUsage().getTotalTokens()
        );

        // Log rate limits if available
        RateLimit rateLimit = metadata.getRateLimit();
        if (rateLimit != null && rateLimit.getRequestsRemaining() != null) {
            log.debug("Rate limit - Requests remaining: {}",
                rateLimit.getRequestsRemaining()
            );
        }

        return response.getResult().getOutput().getText();
    }
}

Complete Metadata Example

@Service
public class ComprehensiveMetadataService {

    public void analyzeResponse(ChatResponse response) {
        // Response metadata
        ChatResponseMetadata respMeta = response.getMetadata();

        System.out.println("=== Response Metadata ===");
        System.out.println("ID: " + respMeta.getId());
        System.out.println("Model: " + respMeta.getModel());

        // Usage
        Usage usage = respMeta.getUsage();
        if (usage != null) {
            System.out.println("\n=== Usage ===");
            System.out.println("Prompt tokens: " + usage.getPromptTokens());
            System.out.println("Completion tokens: " + usage.getCompletionTokens());
            System.out.println("Total tokens: " + usage.getTotalTokens());
        }

        // Rate limits
        RateLimit rateLimit = respMeta.getRateLimit();
        if (rateLimit != null) {
            System.out.println("\n=== Rate Limits ===");
            System.out.println("Requests remaining: " + rateLimit.getRequestsRemaining());
            System.out.println("Tokens remaining: " + rateLimit.getTokensRemaining());
        }

        // Generation metadata
        ChatGenerationMetadata genMeta = response.getResult().getMetadata();
        System.out.println("\n=== Generation Metadata ===");
        System.out.println("Finish reason: " + genMeta.getFinishReason());

        // Prompt metadata
        PromptMetadata promptMeta = respMeta.getPromptMetadata();
        if (promptMeta != null) {
            System.out.println("\n=== Prompt Metadata ===");
            // Access prompt filter metadata
        }
    }
}

Install with Tessl CLI

npx tessl i tessl/maven-org-springframework-ai--spring-ai-model@1.1.1

docs

index.md

tile.json