LangChain4j integration for Azure OpenAI providing chat, streaming, embeddings, image generation, audio transcription, and token counting capabilities
Chat models provide conversational AI capabilities using Azure-hosted OpenAI models like GPT-3.5-turbo and GPT-4. They support multi-turn conversations, function calling, structured output, and Azure-specific features.
import dev.langchain4j.model.azure.AzureOpenAiChatModel;
import dev.langchain4j.model.azure.AzureOpenAiStreamingChatModel;
import dev.langchain4j.model.azure.AzureOpenAiChatModelName;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.data.message.UserMessage;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.data.message.SystemMessage;
import dev.langchain4j.data.message.ToolExecutionResultMessage;
import dev.langchain4j.model.chat.ChatLanguageModel;
import dev.langchain4j.model.chat.StreamingChatLanguageModel;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.model.chat.ChatRequest;
import dev.langchain4j.model.chat.ChatResponse;
import dev.langchain4j.agent.tool.ToolSpecification;Synchronous chat model that returns complete responses.
AzureOpenAiChatModel model = AzureOpenAiChatModel.builder()
.endpoint("https://your-resource.openai.azure.com/")
.apiKey("your-api-key")
.deploymentName("gpt-4")
.serviceVersion("2024-02-15-preview")
.temperature(0.7)
.build();
// Simple generation
String response = model.generate("What is the capital of France?");
// Multi-turn conversation
List<ChatMessage> messages = List.of(
SystemMessage.from("You are a helpful assistant."),
UserMessage.from("What is the capital of France?")
);
Response<AiMessage> response = model.generate(messages);package dev.langchain4j.model.azure;
/**
* Azure OpenAI chat model for synchronous chat completions.
* Thread-safe: Yes - all instances are immutable and thread-safe.
* Blocking: Yes - chat() method blocks until complete response received.
* Timeout: Controlled by timeout builder parameter (default 60 seconds).
* Retries: Automatically retries on network/timeout/429 errors per retry policy.
*/
class AzureOpenAiChatModel implements dev.langchain4j.model.chat.ChatModel {
/**
* Creates a new builder for configuring AzureOpenAiChatModel.
* @return New Builder instance with no defaults set
*/
static Builder builder();
/**
* Returns default request parameters set at build time.
* @return Immutable ChatRequestParameters or null if not set
*/
dev.langchain4j.model.chat.ChatRequestParameters defaultRequestParameters();
/**
* Returns capabilities supported by this model instance.
* Capabilities depend on the underlying model (e.g., GPT-4 supports vision).
* @return Immutable set of Capability enums
*/
java.util.Set<dev.langchain4j.model.chat.Capability> supportedCapabilities();
/**
* Executes a chat request synchronously.
* @param request ChatRequest with messages, tools, and parameters
* @return ChatResponse with AI message, token usage, and finish reason
* @throws dev.langchain4j.exception.ContentFilteredException if content violates Azure policies (not retried)
* @throws java.util.concurrent.TimeoutException if request exceeds timeout (retried per policy)
* @throws IllegalArgumentException if request is invalid or null (not retried)
* @throws RuntimeException for network/API errors (retried based on HTTP status)
*/
dev.langchain4j.model.chat.ChatResponse chat(dev.langchain4j.model.chat.ChatRequest request);
/**
* Returns registered chat model listeners.
* @return Immutable list of ChatModelListener instances
*/
java.util.List<dev.langchain4j.model.chat.listener.ChatModelListener> listeners();
/**
* Returns the model provider identifier.
* @return Always returns ModelProvider.AZURE
*/
dev.langchain4j.model.provider.ModelProvider provider();
/**
* Builder for AzureOpenAiChatModel configuration.
* All methods return Builder for fluent chaining.
* Thread-safe: No - builder instances are not thread-safe.
* Use pattern: Create builder, configure, call build() once.
*/
class Builder {
// Mandatory configuration
/**
* @param endpoint Azure OpenAI endpoint URL
* @throws IllegalArgumentException if null, empty, or malformed
*/
Builder endpoint(String endpoint);
/**
* @param serviceVersion API version (e.g., "2024-02-15-preview")
* @throws IllegalArgumentException if null or empty
*/
Builder serviceVersion(String serviceVersion);
/**
* @param deploymentName Your deployment name in Azure
* @throws IllegalArgumentException if null or empty
*/
Builder deploymentName(String deploymentName);
// Authentication (choose exactly one)
/**
* @param apiKey Azure OpenAI API key (32-char hex)
* @throws IllegalArgumentException if null or empty
*/
Builder apiKey(String apiKey);
/**
* @param apiKey OpenAI API key (starts with "sk-")
* @throws IllegalArgumentException if null or empty
*/
Builder nonAzureApiKey(String apiKey);
/**
* @param credential Azure AD TokenCredential
* @throws IllegalArgumentException if null
*/
Builder tokenCredential(com.azure.core.credential.TokenCredential credential);
// Generation parameters
/**
* Maximum tokens in completion.
* @param maxTokens 1 to model's max (e.g., 4096 for GPT-3.5, 128000 for GPT-4 Turbo)
* @default null (no limit, uses model's default)
* @deprecated Use maxCompletionTokens instead
* @throws IllegalArgumentException if < 1
*/
@Deprecated
Builder maxTokens(Integer maxTokens);
/**
* Maximum tokens in completion (preferred over maxTokens).
* @param maxCompletionTokens 1 to model's max
* @default null (no limit)
* @throws IllegalArgumentException if < 1
*/
Builder maxCompletionTokens(Integer maxCompletionTokens);
/**
* Sampling temperature for randomness.
* @param temperature 0.0 (deterministic) to 2.0 (very random)
* @default 1.0
* @throws IllegalArgumentException if not in range [0.0, 2.0]
*/
Builder temperature(Double temperature);
/**
* Nucleus sampling threshold (alternative to temperature).
* @param topP 0.0 to 1.0
* @default 1.0 (consider all tokens)
* @throws IllegalArgumentException if not in range [0.0, 1.0]
*/
Builder topP(Double topP);
/**
* Logit bias for token adjustment.
* @param logitBias Map of token ID (string) to bias value (-100 to 100)
* @default null (no bias)
* @throws IllegalArgumentException if any bias not in range [-100, 100]
*/
Builder logitBias(java.util.Map<String, Integer> logitBias);
/**
* End-user identifier for abuse monitoring.
* @param user Unique user ID string
* @default null
*/
Builder user(String user);
/**
* Stop sequences that halt generation.
* @param stop List of up to 4 stop strings
* @default null (no stop sequences)
* @throws IllegalArgumentException if more than 4 sequences
*/
Builder stop(java.util.List<String> stop);
/**
* Presence penalty for topic diversity.
* @param presencePenalty -2.0 to 2.0
* @default 0.0 (no penalty)
* @throws IllegalArgumentException if not in range [-2.0, 2.0]
*/
Builder presencePenalty(Double presencePenalty);
/**
* Frequency penalty for token repetition.
* @param frequencyPenalty -2.0 to 2.0
* @default 0.0 (no penalty)
* @throws IllegalArgumentException if not in range [-2.0, 2.0]
*/
Builder frequencyPenalty(Double frequencyPenalty);
/**
* Random seed for deterministic sampling.
* @param seed Any long value
* @default null (random)
*/
Builder seed(Long seed);
// Advanced features
/**
* Response format specification (JSON, text, JSON schema).
* @param responseFormat ResponseFormat instance
* @default null (text format)
*/
Builder responseFormat(dev.langchain4j.model.output.ResponseFormat responseFormat);
/**
* Enables strict JSON schema validation.
* @param strictJsonSchema true for strict validation
* @default false
*/
Builder strictJsonSchema(Boolean strictJsonSchema);
/**
* Azure data sources for RAG (Azure AI Search, Cosmos DB).
* @param dataSources List of AzureChatExtensionConfiguration
* @default null (no data sources)
*/
Builder dataSources(java.util.List<com.azure.ai.openai.models.AzureChatExtensionConfiguration> dataSources);
/**
* Azure enhancements (OCR, grounding).
* @param enhancements AzureChatEnhancementConfiguration
* @default null (no enhancements)
*/
Builder enhancements(com.azure.ai.openai.models.AzureChatEnhancementConfiguration enhancements);
/**
* Reasoning effort for reasoning models (o1-preview, o1-mini).
* @param reasoningEffort LOW, MEDIUM, or HIGH
* @default null (model default)
*/
Builder reasoningEffort(com.azure.ai.openai.models.ReasoningEffortValue reasoningEffort);
/**
* Default request parameters for all requests.
* @param defaultRequestParameters ChatRequestParameters
* @default null
*/
Builder defaultRequestParameters(dev.langchain4j.model.chat.ChatRequestParameters defaultRequestParameters);
/**
* Supported capabilities (used for framework feature detection).
* @param supportedCapabilities Set of Capability enums
* @default Auto-detected based on model
*/
Builder supportedCapabilities(java.util.Set<dev.langchain4j.model.chat.Capability> supportedCapabilities);
/**
* Varargs overload for supported capabilities.
*/
Builder supportedCapabilities(dev.langchain4j.model.chat.Capability... capabilities);
// HTTP configuration
/**
* Request timeout.
* @param timeout Positive duration
* @default 60 seconds
* @throws IllegalArgumentException if null or non-positive
*/
Builder timeout(java.time.Duration timeout);
/**
* Simple retry count (mutually exclusive with retryOptions).
* @param maxRetries 0 to 10
* @default 3
* @throws IllegalArgumentException if < 0 or > 10
*/
Builder maxRetries(Integer maxRetries);
/**
* Advanced retry options (mutually exclusive with maxRetries).
* @param retryOptions RetryOptions with exponential backoff
* @default 3 retries, 1s base delay, 10s max delay
*/
Builder retryOptions(com.azure.core.http.policy.RetryOptions retryOptions);
/**
* HTTP proxy configuration.
* @param proxyOptions ProxyOptions with host, port, auth
* @default null (no proxy)
*/
Builder proxyOptions(com.azure.core.http.ProxyOptions proxyOptions);
/**
* Custom HTTP client provider.
* @param httpClientProvider HttpClientProvider implementation
* @default Azure SDK default HTTP client
*/
Builder httpClientProvider(com.azure.core.http.HttpClientProvider httpClientProvider);
/**
* Pre-configured OpenAI client (overrides other HTTP config).
* @param client OpenAIClient instance
* @default null (builder creates client)
*/
Builder openAIClient(com.azure.ai.openai.OpenAIClient client);
/**
* Custom HTTP headers for all requests.
* @param customHeaders Map of header name to value
* @default Empty map
*/
Builder customHeaders(java.util.Map<String, String> customHeaders);
/**
* User-Agent suffix for identification.
* @param userAgentSuffix Suffix string (e.g., "MyApp/1.0")
* @default null
*/
Builder userAgentSuffix(String userAgentSuffix);
// Observability
/**
* Enables full HTTP request/response logging (sensitive data!).
* @param logRequestsAndResponses true to enable
* @default false
*/
Builder logRequestsAndResponses(Boolean logRequestsAndResponses);
/**
* Chat model event listeners.
* @param listeners List of ChatModelListener implementations
* @default Empty list
*/
Builder listeners(java.util.List<dev.langchain4j.model.chat.listener.ChatModelListener> listeners);
/**
* Builds the AzureOpenAiChatModel instance.
* @return Immutable, thread-safe AzureOpenAiChatModel
* @throws IllegalStateException if required parameters missing
* @throws IllegalArgumentException if configuration is invalid
*/
AzureOpenAiChatModel build();
}
}Streaming chat model that returns tokens as they are generated, enabling real-time response display.
AzureOpenAiStreamingChatModel model = AzureOpenAiStreamingChatModel.builder()
.endpoint("https://your-resource.openai.azure.com/")
.apiKey("your-api-key")
.deploymentName("gpt-4")
.serviceVersion("2024-02-15-preview")
.temperature(0.7)
.build();
// Stream tokens as they arrive
model.generate("Tell me a story", new dev.langchain4j.model.output.StreamingResponseHandler<dev.langchain4j.data.message.AiMessage>() {
@Override
public void onNext(String token) {
System.out.print(token); // Called on Azure SDK thread
}
@Override
public void onComplete(dev.langchain4j.model.output.Response<dev.langchain4j.data.message.AiMessage> response) {
System.out.println("\n\nComplete!"); // Called on Azure SDK thread
}
@Override
public void onError(Throwable error) {
error.printStackTrace(); // Called on Azure SDK thread
}
});package dev.langchain4j.model.azure;
/**
* Azure OpenAI streaming chat model for token-by-token responses.
* Thread-safe: Yes - all instances are immutable and thread-safe.
* Non-blocking: Yes - chat() returns immediately, callbacks invoked asynchronously.
* Handler threading: Callbacks invoked on Azure SDK's event loop threads (not application threads).
* Handler requirements: Must be thread-safe if accessing shared state.
*/
class AzureOpenAiStreamingChatModel implements dev.langchain4j.model.chat.StreamingChatModel {
static Builder builder();
dev.langchain4j.model.chat.ChatRequestParameters defaultRequestParameters();
java.util.Set<dev.langchain4j.model.chat.Capability> supportedCapabilities();
/**
* Executes streaming chat request asynchronously.
* Returns immediately; handler callbacks invoked as tokens arrive.
* @param request ChatRequest with messages, tools, parameters
* @param handler StreamingChatResponseHandler for tokens and completion
* Callbacks invoked on Azure SDK threads - must be thread-safe.
* Do not block in callbacks to avoid impacting SDK thread pool.
*/
void chat(dev.langchain4j.model.chat.ChatRequest request,
dev.langchain4j.model.chat.StreamingChatResponseHandler handler);
java.util.List<dev.langchain4j.model.chat.listener.ChatModelListener> listeners();
dev.langchain4j.model.provider.ModelProvider provider();
/**
* Builder identical to AzureOpenAiChatModel.Builder except:
* - openAIAsyncClient() instead of openAIClient()
* - Default timeout is 120 seconds (longer for streaming)
*/
class Builder {
// Same methods as AzureOpenAiChatModel.Builder
/**
* Pre-configured async OpenAI client for streaming.
* @param client OpenAIAsyncClient instance
* @default null (builder creates client)
*/
Builder openAIAsyncClient(com.azure.ai.openai.OpenAIAsyncClient client);
/**
* @default 120 seconds (longer for streaming)
*/
@Override
Builder timeout(java.time.Duration timeout);
AzureOpenAiStreamingChatModel build();
}
}See Configuration Documentation for complete details on:
Control model behavior and output quality.
/**
* Controls randomness in sampling.
* - 0.0: Deterministic (always picks most likely token)
* - 0.3: Focused and consistent
* - 0.7: Balanced (good default for most tasks)
* - 1.0: Default, moderately creative
* - 1.5: Very creative
* - 2.0: Maximum randomness (experimental)
* @range 0.0 to 2.0
* @default 1.0
* Mutually exclusive with topP (use one or the other)
*/
.temperature(0.7)Use cases:
/**
* Alternative to temperature. Consider only top P probability mass.
* - 0.1: Very focused (top 10% probable tokens)
* - 0.5: Moderately focused
* - 0.9: Balanced
* - 1.0: Consider all tokens (default)
* @range 0.0 to 1.0
* @default 1.0
* Mutually exclusive with temperature
*/
.topP(0.9)/**
* Maximum tokens in completion.
* Does NOT include prompt tokens.
* Generation stops when limit reached or natural completion.
* @param maxCompletionTokens 1 to model_max
* Model limits:
* - GPT-3.5-turbo: 4096 total (prompt + completion)
* - GPT-3.5-turbo-16k: 16384 total
* - GPT-4: 8192 total
* - GPT-4-32k: 32768 total
* - GPT-4-turbo: 128000 total
* - GPT-4o: 128000 total
* @default null (no limit, model determines)
* @throws IllegalArgumentException if < 1
*/
.maxCompletionTokens(2000)/**
* Presence penalty: Penalizes NEW topics.
* Encourages model to talk about new topics.
* - Negative: Encourages repetition
* - 0.0: No penalty
* - Positive: Encourages diversity
* @range -2.0 to 2.0
* @default 0.0
*/
.presencePenalty(0.6)
/**
* Frequency penalty: Penalizes REPEATED tokens.
* Discourages word/phrase repetition.
* - Negative: Allows repetition
* - 0.0: No penalty
* - Positive: Discourages repetition
* @range -2.0 to 2.0
* @default 0.0
*/
.frequencyPenalty(0.5)/**
* Stop generation when any sequence is encountered.
* @param stop List of up to 4 stop strings
* Common uses:
* - Stop at specific markers: ["\n\n", "###"]
* - Stop at completion indicators: ["END", "DONE"]
* - Stop at dialogue markers: ["\nUser:", "\nAssistant:"]
* @default null (no stop sequences)
* @throws IllegalArgumentException if more than 4 sequences
*/
.stop(List.of("\n\n###\n\n", "END"))/**
* Random seed for deterministic output.
* Same seed + same input = same output (within model version).
* Useful for:
* - Testing and reproducibility
* - A/B testing prompts
* - Debugging response variations
* @param seed Any long value
* @default null (non-deterministic)
* Note: Requires temperature=0.0 or topP<1.0 for best determinism
*/
.seed(42L)Control output structure for parsing and validation.
/**
* Guarantees valid JSON output.
* Model MUST include "json" in prompt or system message.
* Response will be valid JSON (object or array).
*/
import dev.langchain4j.model.output.ResponseFormat;
.responseFormat(ResponseFormat.JSON)
// Example prompt: "Generate a JSON object with user information"/**
* Guarantees output matches JSON schema.
* Enables type-safe parsing and validation.
* Requires strictJsonSchema(true) for enforcement.
*/
String jsonSchema = """
{
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"email": {"type": "string", "format": "email"}
},
"required": ["name", "age"],
"additionalProperties": false
}
""";
.responseFormat(ResponseFormat.builder()
.type("json_schema")
.jsonSchema(jsonSchema)
.build())
.strictJsonSchema(true)Models can call functions (tools) to retrieve data or perform actions.
import dev.langchain4j.agent.tool.ToolSpecification;
import dev.langchain4j.agent.tool.ToolExecutionRequest;
import dev.langchain4j.data.message.ToolExecutionResultMessage;
// Define tool
ToolSpecification weatherTool = ToolSpecification.builder()
.name("get_weather")
.description("Get current weather for a location")
.addParameter("location", JsonSchemaProperty.STRING,
JsonSchemaProperty.description("City name"))
.addParameter("unit", JsonSchemaProperty.STRING,
JsonSchemaProperty.enums("celsius", "fahrenheit"))
.build();
// Include in request
ChatRequest request = ChatRequest.builder()
.messages(messages)
.toolSpecifications(List.of(weatherTool))
.build();
ChatResponse response = model.chat(request);
// Check if model wants to call tool
if (response.aiMessage().hasToolExecutionRequests()) {
for (ToolExecutionRequest req : response.aiMessage().toolExecutionRequests()) {
String toolName = req.name();
String arguments = req.arguments(); // JSON string
// Execute tool
String result = executeWeatherTool(arguments);
// Add result to conversation
messages.add(ToolExecutionResultMessage.from(req, result));
}
// Continue conversation with tool results
ChatResponse finalResponse = model.chat(ChatRequest.builder()
.messages(messages)
.build());
}Parallel function calling:
toolExecutionRequests() for all requests/**
* Integrate Azure data sources for retrieval-augmented generation.
* Supported: Azure AI Search, Azure Cosmos DB, Azure SQL, custom endpoints.
*/
import com.azure.ai.openai.models.AzureChatExtensionConfiguration;
import com.azure.ai.openai.models.AzureSearchChatExtensionConfiguration;
AzureChatExtensionConfiguration searchConfig = new AzureSearchChatExtensionConfiguration(
searchEndpoint,
indexName,
searchKey
);
.dataSources(List.of(searchConfig))/**
* Enable Azure enhancements like OCR and grounding.
* - OCR: Extract text from images in messages
* - Grounding: Citation and source attribution
*/
import com.azure.ai.openai.models.AzureChatEnhancementConfiguration;
import com.azure.ai.openai.models.AzureChatOCREnhancementConfiguration;
import com.azure.ai.openai.models.AzureChatGroundingEnhancementConfiguration;
AzureChatEnhancementConfiguration enhancements = new AzureChatEnhancementConfiguration()
.setOcr(new AzureChatOCREnhancementConfiguration().setEnabled(true))
.setGrounding(new AzureChatGroundingEnhancementConfiguration().setEnabled(true));
.enhancements(enhancements)/**
* Control reasoning effort for o1-preview and o1-mini models.
* Higher effort = more thorough reasoning = higher latency and cost.
*/
import com.azure.ai.openai.models.ReasoningEffortValue;
.reasoningEffort(ReasoningEffortValue.MEDIUM)
// Options: LOW, MEDIUM, HIGH
// - LOW: Fast, suitable for simple tasks
// - MEDIUM: Balanced (default)
// - HIGH: Thorough, for complex reasoning taskspackage dev.langchain4j.model.azure;
/**
* Predefined Azure OpenAI chat model names.
* Use with AzureOpenAiChatModelName.GPT_4.modelName()
*/
enum AzureOpenAiChatModelName {
/** gpt-35-turbo: GPT-3.5 Turbo, 4K context, 0613 or newer */
GPT_3_5_TURBO,
/** gpt-35-turbo-0301: March 2023 snapshot */
GPT_3_5_TURBO_0301,
/** gpt-35-turbo-0613: June 2023, function calling support */
GPT_3_5_TURBO_0613,
/** gpt-35-turbo-1106: November 2023, improved function calling */
GPT_3_5_TURBO_1106,
/** gpt-35-turbo-16k: 16K context window */
GPT_3_5_TURBO_16K,
/** gpt-35-turbo-16k-0613: 16K context, June 2023 */
GPT_3_5_TURBO_16K_0613,
/** gpt-4: GPT-4, 8K context */
GPT_4,
/** gpt-4-0613: June 2023 snapshot */
GPT_4_0613,
/** gpt-4-0125-preview: January 2025 preview */
GPT_4_0125_PREVIEW,
/** gpt-4-1106-preview: November 2023, 128K context */
GPT_4_1106_PREVIEW,
/** gpt-4-turbo: GPT-4 Turbo, 128K context, latest */
GPT_4_TURBO,
/** gpt-4-turbo-2024-04-09: April 2024 snapshot */
GPT_4_TURBO_2024_04_09,
/** gpt-4-32k: 32K context window */
GPT_4_32K,
/** gpt-4-32k-0613: 32K context, June 2023 */
GPT_4_32K_0613,
/** gpt-4-vision-preview: GPT-4 with vision, multimodal */
GPT_4_VISION_PREVIEW,
/** gpt-4o: GPT-4 Omni, multimodal, 128K context, fastest */
GPT_4_O;
/**
* Returns model name string for API requests.
* @return Model name (e.g., "gpt-4")
*/
String modelName();
/**
* Returns base model type without version.
* @return Model type (e.g., "gpt-4")
*/
String modelType();
/**
* Returns version suffix.
* @return Version (e.g., "0613") or empty string
*/
String modelVersion();
String toString();
}import dev.langchain4j.exception.ContentFilteredException;
import java.util.concurrent.TimeoutException;
try {
Response<String> response = model.generate(userMessage);
} catch (ContentFilteredException e) {
// Content violated Azure safety policies
// NOT retried automatically
// Check Azure OpenAI Studio for content filter settings
System.err.println("Content filtered: " + e.getMessage());
// Handle: prompt user to modify input
} catch (TimeoutException e) {
// Request exceeded timeout duration
// Retried automatically per retry policy
System.err.println("Request timed out");
// Handle: increase timeout or simplify request
} catch (IllegalArgumentException e) {
// Invalid request or configuration
// NOT retried
System.err.println("Invalid request: " + e.getMessage());
// Handle: fix code or validate input
} catch (RuntimeException e) {
// Network, API, or auth error
// Retry behavior depends on HTTP status
System.err.println("Error: " + e.getMessage());
// Handle: check connectivity, auth, quotas
}Azure OpenAI applies content filters to input (prompts) and output (completions).
Filter categories:
Filter levels (configured in Azure OpenAI Studio):
ContentFilteredException details:
Configure filters in Azure OpenAI Studio per deployment.
Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-azure-open-ai