LangChain4j integration for Azure OpenAI providing chat, streaming, embeddings, image generation, audio transcription, and token counting capabilities
The token count estimator provides token count estimates for cost calculation and request validation before making API calls. Supports chat, embedding, and language models.
import dev.langchain4j.model.azure.AzureOpenAiTokenCountEstimator;
import dev.langchain4j.model.azure.AzureOpenAiChatModelName;
import dev.langchain4j.model.azure.AzureOpenAiEmbeddingModelName;
import dev.langchain4j.model.azure.AzureOpenAiLanguageModelName;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.data.message.UserMessage;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.data.message.SystemMessage;// Create estimator for specific model
AzureOpenAiTokenCountEstimator estimator = new AzureOpenAiTokenCountEstimator(
AzureOpenAiChatModelName.GPT_4
);
// Estimate tokens in text
String text = "Hello, how are you today?";
int tokenCount = estimator.estimateTokenCountInText(text);
System.out.println("Token count: " + tokenCount); // e.g., 7
// Estimate tokens in a message (includes formatting overhead)
ChatMessage message = UserMessage.from("What is the capital of France?");
int messageTokens = estimator.estimateTokenCountInMessage(message); // e.g., 11
// Estimate tokens in message list (includes conversation structure overhead)
List<ChatMessage> messages = List.of(
SystemMessage.from("You are a helpful assistant."),
UserMessage.from("What is AI?"),
AiMessage.from("AI stands for Artificial Intelligence...")
);
int totalTokens = estimator.estimateTokenCountInMessages(messages);package dev.langchain4j.model.azure;
/**
* Token count estimator for Azure OpenAI models.
* Uses same tokenization algorithm as target model (cl100k_base for most models).
* Thread-safe: Yes - instances are immutable and thread-safe.
* Accuracy: Typically ±1-2 tokens for text, ±2-5 for messages, ±5-10 for message lists.
* Performance: Fast, local tokenization (no API calls).
*/
class AzureOpenAiTokenCountEstimator implements dev.langchain4j.model.Tokenizer {
/**
* Creates estimator for chat model.
* @param modelName Chat model enum
* @throws IllegalArgumentException if modelName is null
*/
AzureOpenAiTokenCountEstimator(AzureOpenAiChatModelName modelName);
/**
* Creates estimator for embedding model.
* @param modelName Embedding model enum
* @throws IllegalArgumentException if modelName is null
*/
AzureOpenAiTokenCountEstimator(AzureOpenAiEmbeddingModelName modelName);
/**
* Creates estimator for language model.
* @param modelName Language model enum
* @throws IllegalArgumentException if modelName is null
*/
AzureOpenAiTokenCountEstimator(AzureOpenAiLanguageModelName modelName);
/**
* Creates estimator for custom model name.
* Falls back to cl100k_base encoding if model unknown.
* @param modelName Model name string
* @throws IllegalArgumentException if modelName is null or empty
*/
AzureOpenAiTokenCountEstimator(String modelName);
/**
* Estimates tokens in plain text.
* @param text Text to estimate (can be empty, not null)
* @return Token count (0 for empty string)
* @throws NullPointerException if text is null
*/
int estimateTokenCountInText(String text);
/**
* Estimates tokens in single message including formatting overhead.
* Includes tokens for role, name, and message structure.
* @param message ChatMessage (UserMessage, AiMessage, SystemMessage, etc.)
* @return Token count including ~3-5 tokens overhead
* @throws NullPointerException if message is null
*/
int estimateTokenCountInMessage(dev.langchain4j.data.message.ChatMessage message);
/**
* Estimates tokens in message list including conversation structure.
* Includes tokens for message formatting + conversation framing.
* @param messages Iterable of messages (can be empty, not null)
* @return Total token count including all overhead (~3 tokens base + per-message overhead)
* @throws NullPointerException if messages is null or contains null elements
*/
int estimateTokenCountInMessages(Iterable<dev.langchain4j.data.message.ChatMessage> messages);
}AzureOpenAiTokenCountEstimator estimator = new AzureOpenAiTokenCountEstimator(
AzureOpenAiChatModelName.GPT_3_5_TURBO
);
List<ChatMessage> messages = buildConversation();
int estimatedTokens = estimator.estimateTokenCountInMessages(messages);
// GPT-3.5-turbo: 4096 token limit, reserve 1000 for response
int maxInputTokens = 4096 - 1000;
if (estimatedTokens > maxInputTokens) {
// Trim conversation or reject request
messages = trimConversation(messages, maxInputTokens);
}
Response<AiMessage> response = chatModel.generate(messages);AzureOpenAiTokenCountEstimator estimator = new AzureOpenAiTokenCountEstimator(
AzureOpenAiChatModelName.GPT_4
);
List<ChatMessage> messages = getConversation();
int inputTokens = estimator.estimateTokenCountInMessages(messages);
// Estimate output tokens (conservative: 1.5x input for conversations)
int estimatedOutputTokens = (int) (inputTokens * 1.5);
int totalTokens = inputTokens + estimatedOutputTokens;
// Calculate cost (example GPT-4 rates)
double inputCost = inputTokens * 0.00003; // $0.03 per 1K input tokens
double outputCost = estimatedOutputTokens * 0.00006; // $0.06 per 1K output tokens
double totalCost = inputCost + outputCost;
System.out.printf("Estimated cost: $%.4f%n", totalCost);
// Implement budget check
if (totalCost > userBudget) {
throw new IllegalStateException("Request exceeds user budget");
}class ConversationManager {
private final AzureOpenAiTokenCountEstimator estimator;
private final List<ChatMessage> history = new ArrayList<>();
private final int maxTokens = 8000; // Leave room for response
ConversationManager(String modelName) {
this.estimator = new AzureOpenAiTokenCountEstimator(modelName);
}
void addMessage(ChatMessage message) {
history.add(message);
trimIfNeeded();
}
private void trimIfNeeded() {
int tokens = estimator.estimateTokenCountInMessages(history);
// Trim oldest messages (keep system message)
while (tokens > maxTokens && history.size() > 2) {
// Remove oldest user/assistant pair (index 1 and 2)
history.remove(1);
if (history.size() > 1) history.remove(1);
tokens = estimator.estimateTokenCountInMessages(history);
}
}
List<ChatMessage> getHistory() {
return new ArrayList<>(history);
}
}AzureOpenAiTokenCountEstimator estimator = new AzureOpenAiTokenCountEstimator(
AzureOpenAiEmbeddingModelName.TEXT_EMBEDDING_ADA_002
);
List<String> documents = loadDocuments();
int totalTokens = 0;
List<String> validDocs = new ArrayList<>();
for (String doc : documents) {
int tokens = estimator.estimateTokenCountInText(doc);
totalTokens += tokens;
if (tokens > 8191) {
// Document exceeds embedding model limit
System.err.printf("Document too large: %d tokens (max 8191)%n", tokens);
// Split or skip
} else {
validDocs.add(doc);
}
}
System.out.printf("Total tokens across %d documents: %d%n",
validDocs.size(), totalTokens);AzureOpenAiTokenCountEstimator estimator = new AzureOpenAiTokenCountEstimator(
AzureOpenAiEmbeddingModelName.TEXT_EMBEDDING_3_SMALL
);
List<String> texts = getAllTexts();
List<List<String>> batches = new ArrayList<>();
List<String> currentBatch = new ArrayList<>();
int currentBatchTokens = 0;
int maxBatchTokens = 8000; // Safety margin under 8191 limit
int maxBatchSize = 16; // API limit
for (String text : texts) {
int textTokens = estimator.estimateTokenCountInText(text);
// Start new batch if limits reached
if (currentBatchTokens + textTokens > maxBatchTokens ||
currentBatch.size() >= maxBatchSize) {
batches.add(currentBatch);
currentBatch = new ArrayList<>();
currentBatchTokens = 0;
}
currentBatch.add(text);
currentBatchTokens += textTokens;
}
// Add final batch
if (!currentBatch.isEmpty()) {
batches.add(currentBatch);
}
// Process batches
for (List<String> batch : batches) {
embedBatch(batch);
}Estimation accuracy:
Why estimates may differ from actual:
Best practice: Always leave safety margin
int estimated = estimator.estimateTokenCountInMessages(messages);
int safeLimit = maxTokens - 100; // 100-token safety margin
if (estimated > safeLimit) {
trimMessages();
}| Model | Context Window | Notes |
|---|---|---|
| GPT-3.5-turbo | 4,096 | Input + output total |
| GPT-3.5-turbo-16k | 16,384 | Input + output total |
| GPT-4 | 8,192 | Input + output total |
| GPT-4-32k | 32,768 | Input + output total |
| GPT-4-turbo | 128,000 | Input + output total |
| GPT-4o | 128,000 | Input + output total |
| Model | Max Tokens Per Input |
|---|---|
| text-embedding-ada-002 | 8,191 |
| text-embedding-3-small | 8,191 |
| text-embedding-3-large | 8,191 |
| Model | Context Window |
|---|---|
| gpt-3.5-turbo-instruct | 4,096 |
// Reserve tokens for response
int maxContextTokens = 4096;
int maxInputTokens = maxContextTokens - 1000; // Reserve 1000 for response
if (estimator.estimateTokenCountInMessages(messages) > maxInputTokens) {
messages = trimMessages(messages);
}// GOOD: Create once, reuse
private static final AzureOpenAiTokenCountEstimator ESTIMATOR =
new AzureOpenAiTokenCountEstimator(AzureOpenAiChatModelName.GPT_4);
// BAD: Creating repeatedly
for (String text : texts) {
AzureOpenAiTokenCountEstimator estimator =
new AzureOpenAiTokenCountEstimator("gpt-4"); // Wasteful!
int tokens = estimator.estimateTokenCountInText(text);
}int tokens = estimator.estimateTokenCountInMessages(messages);
if (tokens > modelLimit) {
throw new IllegalArgumentException(
String.format("Message too long: %d tokens (limit: %d)",
tokens, modelLimit)
);
}class TokenBudget {
private int remaining;
TokenBudget(int maxTokens) {
this.remaining = maxTokens;
}
boolean canAfford(int tokens) {
return remaining >= tokens;
}
void spend(int tokens) {
if (!canAfford(tokens)) {
throw new IllegalStateException("Insufficient token budget");
}
remaining -= tokens;
}
int getRemaining() {
return remaining;
}
void reset(int maxTokens) {
this.remaining = maxTokens;
}
}try {
int tokens = estimator.estimateTokenCountInText(text);
} catch (NullPointerException e) {
// Text or message was null
System.err.println("Input cannot be null");
} catch (IllegalArgumentException e) {
// Invalid model name in constructor
System.err.println("Invalid model: " + e.getMessage());
}Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-azure-open-ai@1.11.0