LangChain4j OpenAI Integration providing Java access to OpenAI APIs including chat models, embeddings, image generation, audio transcription, and moderation.
Advanced features include experimental capabilities, internal utilities, and extensibility mechanisms. This includes the OpenAI Responses API for prompt caching and advanced streaming, SPI factories for custom builder creation, and internal components like the streaming response builder.
These features are designed for specialized use cases and may require deeper understanding of the library internals. Some features are marked as experimental and may change in future versions.
Experimental streaming chat model using OpenAI's Responses API, which provides advanced features like explicit prompt caching, safety identifiers, and enhanced configuration options.
@Experimental
public class OpenAiResponsesStreamingChatModel implements StreamingChatModel {
public static Builder builder();
// Core streaming method
public void doChat(ChatRequest chatRequest, StreamingChatResponseHandler handler);
// Configuration and metadata
public ChatRequestParameters defaultRequestParameters();
public List<ChatModelListener> listeners();
public ModelProvider provider();
}public static class Builder {
// Core configuration
public Builder apiKey(String apiKey);
public Builder baseUrl(String baseUrl);
public Builder organizationId(String organizationId);
public Builder modelName(String modelName);
// Generation parameters
public Builder temperature(Double temperature);
public Builder topP(Double topP);
public Builder maxOutputTokens(Integer maxOutputTokens);
public Builder maxToolCalls(Integer maxToolCalls);
public Builder parallelToolCalls(Boolean parallelToolCalls);
// Responses API specific features
public Builder previousResponseId(String previousResponseId);
public Builder topLogprobs(Integer topLogprobs);
public Builder truncation(String truncation);
public Builder include(List<String> include);
public Builder serviceTier(String serviceTier);
public Builder safetyIdentifier(String safetyIdentifier);
public Builder promptCacheKey(String promptCacheKey);
public Builder promptCacheRetention(String promptCacheRetention);
public Builder reasoningEffort(String reasoningEffort);
public Builder textVerbosity(String textVerbosity);
public Builder streamIncludeObfuscation(Boolean streamIncludeObfuscation);
public Builder store(Boolean store);
public Builder strict(Boolean strict);
// HTTP configuration
public Builder httpClientBuilder(HttpClientBuilder httpClientBuilder);
// Logging
public Builder logRequests(Boolean logRequests);
public Builder logResponses(Boolean logResponses);
// Listeners
public Builder listeners(List<ChatModelListener> listeners);
public Builder listeners(ChatModelListener... listeners);
public OpenAiResponsesStreamingChatModel build();
}import dev.langchain4j.model.openai.OpenAiResponsesStreamingChatModel;
// Create model with prompt caching
OpenAiResponsesStreamingChatModel model = OpenAiResponsesStreamingChatModel.builder()
.apiKey(System.getenv("OPENAI_API_KEY"))
.modelName("gpt-4o")
.promptCacheKey("my-app-system-prompt-v1")
.promptCacheRetention("ephemeral") // or "persistent"
.strict(true)
.build();
// Use with streaming handler
model.generate("What is quantum computing?", handler);// Configure explicit prompt caching
OpenAiResponsesStreamingChatModel cachedModel = OpenAiResponsesStreamingChatModel.builder()
.apiKey(apiKey)
.modelName("gpt-4o")
.promptCacheKey("system-prompt-key-" + System.currentTimeMillis())
.promptCacheRetention("persistent") // Cache persists across sessions
.build();
// First request: Full cost
List<ChatMessage> messages = List.of(
SystemMessage.from("Very long system prompt with detailed instructions..."),
UserMessage.from("Question 1")
);
cachedModel.doChat(ChatRequest.builder().messages(messages).build(), handler1);
// Subsequent requests with same cache key: Reduced cost for cached portion
messages = List.of(
SystemMessage.from("Very long system prompt with detailed instructions..."),
UserMessage.from("Question 2")
);
cachedModel.doChat(ChatRequest.builder().messages(messages).build(), handler2);// Continue from a previous response
String previousResponseId = "response-id-from-previous-call";
OpenAiResponsesStreamingChatModel continuationModel =
OpenAiResponsesStreamingChatModel.builder()
.apiKey(apiKey)
.modelName("gpt-4o")
.previousResponseId(previousResponseId)
.build();
// Continue the generation
continuationModel.generate("Continue the story...", handler);Internal utility for accumulating streaming responses from OpenAI into complete chat responses. Thread-safe implementation for building responses from streaming chunks.
@Internal
public class OpenAiStreamingResponseBuilder {
// Constructors
public OpenAiStreamingResponseBuilder();
public OpenAiStreamingResponseBuilder(boolean returnThinking);
public OpenAiStreamingResponseBuilder(boolean returnThinking, boolean accumulateToolCallId);
// Append streaming chunks
public void append(ChatCompletionResponse response);
public void append(CompletionResponse response);
public void append(ParsedAndRawResponse<ChatCompletionResponse> response);
// Build final response
public ChatResponse build();
}// Typically used internally by streaming models
OpenAiStreamingResponseBuilder builder = new OpenAiStreamingResponseBuilder();
// As chunks arrive from SSE stream
for (ChatCompletionResponse chunk : streamChunks) {
builder.append(chunk);
}
// Build final complete response
ChatResponse finalResponse = builder.build();Service Provider Interface factories for customizing model builder creation. These allow advanced users to provide custom builder implementations through Java's ServiceLoader mechanism.
// Chat model builder factory
@Internal
public interface OpenAiChatModelBuilderFactory
extends Supplier<OpenAiChatModel.OpenAiChatModelBuilder> {
OpenAiChatModel.OpenAiChatModelBuilder get();
}
// Streaming chat model builder factory
@Internal
public interface OpenAiStreamingChatModelBuilderFactory
extends Supplier<OpenAiStreamingChatModel.OpenAiStreamingChatModelBuilder> {
OpenAiStreamingChatModel.OpenAiStreamingChatModelBuilder get();
}
// Language model builder factory
@Internal
public interface OpenAiLanguageModelBuilderFactory
extends Supplier<OpenAiLanguageModel.OpenAiLanguageModelBuilder> {
OpenAiLanguageModel.OpenAiLanguageModelBuilder get();
}
// Streaming language model builder factory
@Internal
public interface OpenAiStreamingLanguageModelBuilderFactory
extends Supplier<OpenAiStreamingLanguageModel.OpenAiStreamingLanguageModelBuilder> {
OpenAiStreamingLanguageModel.OpenAiStreamingLanguageModelBuilder get();
}
// Embedding model builder factory
@Internal
public interface OpenAiEmbeddingModelBuilderFactory
extends Supplier<OpenAiEmbeddingModel.OpenAiEmbeddingModelBuilder> {
OpenAiEmbeddingModel.OpenAiEmbeddingModelBuilder get();
}
// Image model builder factory
@Internal
public interface OpenAiImageModelBuilderFactory
extends Supplier<OpenAiImageModel.OpenAiImageModelBuilder> {
OpenAiImageModel.OpenAiImageModelBuilder get();
}
// Moderation model builder factory
@Internal
public interface OpenAiModerationModelBuilderFactory
extends Supplier<OpenAiModerationModel.OpenAiModerationModelBuilder> {
OpenAiModerationModel.OpenAiModerationModelBuilder get();
}
// Audio transcription model builder factory
@Internal
public interface OpenAiAudioTranscriptionModelBuilderFactory
extends Supplier<OpenAiAudioTranscriptionModel.Builder> {
OpenAiAudioTranscriptionModel.Builder get();
}// Create custom builder factory
public class CustomChatModelBuilderFactory implements OpenAiChatModelBuilderFactory {
@Override
public OpenAiChatModel.OpenAiChatModelBuilder get() {
// Return customized builder with defaults
return OpenAiChatModel.builder()
.timeout(Duration.ofMinutes(5))
.maxRetries(5)
.logRequests(true)
.logResponses(true);
}
}
// Register via META-INF/services/dev.langchain4j.model.openai.spi.OpenAiChatModelBuilderFactory
// File content: com.example.CustomChatModelBuilderFactoryimport dev.langchain4j.http.client.HttpClientBuilder;
import java.net.http.HttpClient;
import java.time.Duration;
// Create custom HTTP client
HttpClientBuilder customHttpClient = new HttpClientBuilder() {
@Override
public HttpClientBuilder connectTimeout(Duration timeout) {
// Custom implementation
return this;
}
@Override
public HttpClientBuilder readTimeout(Duration timeout) {
// Custom implementation
return this;
}
@Override
public HttpClient build() {
return HttpClient.newBuilder()
.connectTimeout(Duration.ofSeconds(30))
.followRedirects(HttpClient.Redirect.NORMAL)
.build();
}
};
// Use with model
OpenAiChatModel model = OpenAiChatModel.builder()
.apiKey(apiKey)
.httpClientBuilder(customHttpClient)
.build();import java.util.function.Supplier;
// Headers that change per request
Supplier<Map<String, String>> dynamicHeaders = () -> {
Map<String, String> headers = new HashMap<>();
headers.put("X-Request-ID", UUID.randomUUID().toString());
headers.put("X-Timestamp", String.valueOf(System.currentTimeMillis()));
headers.put("X-User-Agent", "MyApp/1.0");
return headers;
};
OpenAiChatModel model = OpenAiChatModel.builder()
.apiKey(apiKey)
.customHeaders(dynamicHeaders)
.build();// Use with OpenAI-compatible APIs (Azure, local deployments, etc.)
OpenAiChatModel azureModel = OpenAiChatModel.builder()
.baseUrl("https://your-resource.openai.azure.com/")
.apiKey(azureApiKey)
.modelName("your-deployment-name")
.customHeaders(Map.of(
"api-key", azureApiKey,
"api-version", "2024-02-01"
))
.build();
// Local OpenAI-compatible server
OpenAiChatModel localModel = OpenAiChatModel.builder()
.baseUrl("http://localhost:8080/v1/")
.apiKey("not-needed-for-local")
.modelName("local-model")
.build();import dev.langchain4j.model.chat.listener.ChatModelListener;
import dev.langchain4j.model.chat.listener.ChatModelRequestContext;
import dev.langchain4j.model.chat.listener.ChatModelResponseContext;
import dev.langchain4j.model.chat.listener.ChatModelErrorContext;
public class CustomChatModelListener implements ChatModelListener {
@Override
public void onRequest(ChatModelRequestContext requestContext) {
System.out.println("Request to model: " + requestContext.model());
System.out.println("Messages: " + requestContext.messages().size());
System.out.println("Request ID: " + requestContext.attributes());
}
@Override
public void onResponse(ChatModelResponseContext responseContext) {
System.out.println("Response received");
System.out.println("Tokens: " + responseContext.response().metadata().tokenUsage());
System.out.println("Duration: " +
(responseContext.responseTimestamp() - responseContext.requestTimestamp()) + "ms");
}
@Override
public void onError(ChatModelErrorContext errorContext) {
System.err.println("Error occurred: " + errorContext.error().getMessage());
System.err.println("Request messages: " + errorContext.request().messages().size());
}
}
// Use with model
OpenAiChatModel model = OpenAiChatModel.builder()
.apiKey(apiKey)
.listeners(List.of(new CustomChatModelListener()))
.build();public class RetryableModel {
private final OpenAiChatModel model;
private final int maxRetries;
private final Duration retryDelay;
public RetryableModel(String apiKey, int maxRetries, Duration retryDelay) {
this.model = OpenAiChatModel.builder()
.apiKey(apiKey)
.maxRetries(0) // Disable built-in retries
.build();
this.maxRetries = maxRetries;
this.retryDelay = retryDelay;
}
public Response<AiMessage> generateWithRetry(String prompt) {
Exception lastException = null;
for (int attempt = 0; attempt < maxRetries; attempt++) {
try {
return model.generate(prompt);
} catch (Exception e) {
lastException = e;
System.err.println("Attempt " + (attempt + 1) + " failed: " + e.getMessage());
if (attempt < maxRetries - 1) {
try {
Thread.sleep(retryDelay.toMillis() * (attempt + 1)); // Exponential backoff
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new RuntimeException(ie);
}
}
}
}
throw new RuntimeException("All retry attempts failed", lastException);
}
}public class CircuitBreakerModel {
private final OpenAiChatModel model;
private final AtomicInteger failureCount = new AtomicInteger(0);
private final AtomicLong lastFailureTime = new AtomicLong(0);
private final int threshold;
private final Duration resetTimeout;
private enum State {
CLOSED, // Normal operation
OPEN, // Failing, reject requests
HALF_OPEN // Testing if recovered
}
private volatile State state = State.CLOSED;
public CircuitBreakerModel(String apiKey, int threshold, Duration resetTimeout) {
this.model = OpenAiChatModel.builder()
.apiKey(apiKey)
.build();
this.threshold = threshold;
this.resetTimeout = resetTimeout;
}
public Response<AiMessage> generate(String prompt) {
if (state == State.OPEN) {
if (System.currentTimeMillis() - lastFailureTime.get() > resetTimeout.toMillis()) {
state = State.HALF_OPEN;
} else {
throw new RuntimeException("Circuit breaker is OPEN");
}
}
try {
Response<AiMessage> response = model.generate(prompt);
onSuccess();
return response;
} catch (Exception e) {
onFailure();
throw e;
}
}
private void onSuccess() {
failureCount.set(0);
state = State.CLOSED;
}
private void onFailure() {
int failures = failureCount.incrementAndGet();
lastFailureTime.set(System.currentTimeMillis());
if (failures >= threshold) {
state = State.OPEN;
System.err.println("Circuit breaker opened after " + failures + " failures");
}
}
}Reduce costs by caching long system prompts:
// Use Responses API for explicit caching
OpenAiResponsesStreamingChatModel model = OpenAiResponsesStreamingChatModel.builder()
.apiKey(apiKey)
.modelName("gpt-4o")
.promptCacheKey("my-system-prompt-v1")
.promptCacheRetention("persistent")
.build();
// First request caches the system prompt
// Subsequent requests reuse cached prompt at reduced costTrack safety-related requests:
OpenAiResponsesStreamingChatModel model = OpenAiResponsesStreamingChatModel.builder()
.apiKey(apiKey)
.modelName("gpt-4o")
.safetyIdentifier("user-12345-session-abc")
.build();Continue long responses:
// Get response ID from first response
String responseId = getResponseId(firstResponse);
// Create continuation model
OpenAiResponsesStreamingChatModel continuationModel =
OpenAiResponsesStreamingChatModel.builder()
.apiKey(apiKey)
.modelName("gpt-4o")
.previousResponseId(responseId)
.build();// Reuse HTTP connections
HttpClient sharedClient = HttpClient.newBuilder()
.version(HttpClient.Version.HTTP_2)
.connectTimeout(Duration.ofSeconds(30))
.build();
HttpClientBuilder builder = createBuilderWithClient(sharedClient);
// Share across multiple models
OpenAiChatModel model1 = OpenAiChatModel.builder()
.apiKey(apiKey)
.httpClientBuilder(builder)
.build();
OpenAiEmbeddingModel model2 = OpenAiEmbeddingModel.builder()
.apiKey(apiKey)
.httpClientBuilder(builder)
.build();// Process multiple requests efficiently
public class BatchProcessor {
private final OpenAiChatModel model;
private final ExecutorService executor;
public BatchProcessor(String apiKey, int parallelism) {
this.model = OpenAiChatModel.builder()
.apiKey(apiKey)
.build();
this.executor = Executors.newFixedThreadPool(parallelism);
}
public List<String> processBatch(List<String> prompts) {
List<CompletableFuture<String>> futures = prompts.stream()
.map(prompt -> CompletableFuture.supplyAsync(
() -> model.generate(prompt),
executor
))
.collect(Collectors.toList());
return futures.stream()
.map(CompletableFuture::join)
.collect(Collectors.toList());
}
}Prefer standard OpenAiChatModel over experimental features unless you specifically need advanced capabilities.
Features marked @Experimental may change:
For prompt caching:
Implement robust error handling:
Test advanced features separately:
// Before: Standard streaming model
OpenAiStreamingChatModel standardModel = OpenAiStreamingChatModel.builder()
.apiKey(apiKey)
.modelName("gpt-4o")
.build();
// After: Responses API with caching
OpenAiResponsesStreamingChatModel responsesModel =
OpenAiResponsesStreamingChatModel.builder()
.apiKey(apiKey)
.modelName("gpt-4o")
.promptCacheKey("system-prompt-key")
.build();
// Same usage pattern
responsesModel.generate(prompt, handler);Monitor for deprecation warnings:
Enable detailed logging:
OpenAiChatModel model = OpenAiChatModel.builder()
.apiKey(apiKey)
.logRequests(true)
.logResponses(true)
.logger(LoggerFactory.getLogger("OpenAI"))
.build();For custom base URLs:
Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-open-ai