CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-dev-langchain4j--langchain4j-open-ai

LangChain4j OpenAI Integration providing Java access to OpenAI APIs including chat models, embeddings, image generation, audio transcription, and moderation.

Overview
Eval results
Files

request-response.mddocs/

Request and Response Metadata

OpenAI-specific extensions to standard LangChain4j request parameters and response metadata provide access to advanced features like reasoning effort, service tiers, parallel tool calls, and detailed token usage breakdowns. These classes enable fine-grained control over model behavior and access to comprehensive response information.

Request parameters can be set at both the model level (as defaults) and per-request (for specific calls), with per-request parameters overriding defaults. Response metadata includes OpenAI-specific fields like system fingerprints, creation timestamps, and service tier information.

Capabilities

OpenAiChatRequestParameters

OpenAI-specific chat request parameters that extend the standard LangChain4j parameters with additional OpenAI features.

public class OpenAiChatRequestParameters extends DefaultChatRequestParameters {
    public static final OpenAiChatRequestParameters EMPTY;

    public static Builder builder();

    // Standard parameters (inherited from DefaultChatRequestParameters)
    public String modelName();
    public Double temperature();
    public Double topP();
    public Double frequencyPenalty();
    public Double presencePenalty();
    public Integer maxOutputTokens();
    public List<String> stopSequences();
    public List<ToolSpecification> toolSpecifications();
    public ToolChoice toolChoice();
    public ResponseFormat responseFormat();

    // OpenAI-specific parameters
    public Integer maxCompletionTokens();
    public Map<String, Integer> logitBias();
    public Boolean parallelToolCalls();
    public Integer seed();
    public String user();
    public Boolean store();
    public Map<String, String> metadata();
    public String serviceTier();
    public String reasoningEffort();
    public Map<String, Object> customParameters();

    // Combining parameters
    public OpenAiChatRequestParameters overrideWith(ChatRequestParameters other);
    public OpenAiChatRequestParameters defaultedBy(ChatRequestParameters defaults);
}

Builder

public static class Builder extends DefaultChatRequestParameters.Builder<Builder> {
    // Standard parameters
    public Builder modelName(String modelName);
    public Builder modelName(OpenAiChatModelName modelName);
    public Builder temperature(Double temperature);
    public Builder topP(Double topP);
    public Builder frequencyPenalty(Double frequencyPenalty);
    public Builder presencePenalty(Double presencePenalty);
    public Builder maxOutputTokens(Integer maxOutputTokens);
    public Builder stopSequences(List<String> stopSequences);
    public Builder toolSpecifications(List<ToolSpecification> toolSpecifications);
    public Builder toolChoice(ToolChoice toolChoice);
    public Builder responseFormat(ResponseFormat responseFormat);

    // OpenAI-specific parameters
    public Builder maxCompletionTokens(Integer maxCompletionTokens);
    public Builder logitBias(Map<String, Integer> logitBias);
    public Builder parallelToolCalls(Boolean parallelToolCalls);
    public Builder seed(Integer seed);
    public Builder user(String user);
    public Builder store(Boolean store);
    public Builder metadata(Map<String, String> metadata);
    public Builder serviceTier(String serviceTier);
    public Builder reasoningEffort(String reasoningEffort);
    public Builder customParameters(Map<String, Object> customParameters);

    // Override with another instance
    public Builder overrideWith(ChatRequestParameters other);

    public OpenAiChatRequestParameters build();
}

Basic Usage Example

import dev.langchain4j.model.openai.OpenAiChatRequestParameters;
import dev.langchain4j.model.openai.OpenAiChatModelName;

// Create request parameters
OpenAiChatRequestParameters params = OpenAiChatRequestParameters.builder()
    .modelName(OpenAiChatModelName.GPT_4_O)
    .temperature(0.7)
    .maxCompletionTokens(500)
    .seed(42)  // Deterministic output
    .build();

// Use with model
OpenAiChatModel model = OpenAiChatModel.builder()
    .apiKey(apiKey)
    .defaultRequestParameters(params)
    .build();

Per-Request Parameters Example

// Set default parameters
OpenAiChatRequestParameters defaults = OpenAiChatRequestParameters.builder()
    .temperature(0.7)
    .maxCompletionTokens(1000)
    .build();

OpenAiChatModel model = OpenAiChatModel.builder()
    .apiKey(apiKey)
    .defaultRequestParameters(defaults)
    .build();

// Override for specific request
OpenAiChatRequestParameters requestParams = OpenAiChatRequestParameters.builder()
    .temperature(0.3)  // More focused for this request
    .seed(123)  // Deterministic output
    .build();

ChatRequest request = ChatRequest.builder()
    .messages(UserMessage.from("Explain quantum computing"))
    .parameters(requestParams)
    .build();

ChatResponse response = model.doChat(request);

Reasoning Model Configuration Example

// Configure for o3-mini reasoning model
OpenAiChatRequestParameters reasoningParams = OpenAiChatRequestParameters.builder()
    .modelName(OpenAiChatModelName.O3_MINI)
    .reasoningEffort("high")  // More thorough reasoning
    .maxCompletionTokens(10000)  // Reasoning needs more tokens
    .build();

OpenAiChatModel reasoningModel = OpenAiChatModel.builder()
    .apiKey(apiKey)
    .defaultRequestParameters(reasoningParams)
    .timeout(Duration.ofMinutes(5))
    .build();

Tool Calling Configuration Example

// Configure tool calling behavior
OpenAiChatRequestParameters toolParams = OpenAiChatRequestParameters.builder()
    .parallelToolCalls(true)  // Allow parallel execution
    .maxCompletionTokens(2000)
    .build();

ChatRequest request = ChatRequest.builder()
    .messages(UserMessage.from("What's the weather in Paris and London?"))
    .toolSpecifications(List.of(weatherTool))
    .parameters(toolParams)
    .build();

Parameter Combination Example

// Default parameters for all requests
OpenAiChatRequestParameters defaults = OpenAiChatRequestParameters.builder()
    .temperature(0.7)
    .maxCompletionTokens(1000)
    .user("app-user-123")
    .build();

// Request-specific overrides
OpenAiChatRequestParameters overrides = OpenAiChatRequestParameters.builder()
    .temperature(0.3)  // Override temperature
    .seed(42)  // Add seed
    .build();

// Combine: overrides take precedence
OpenAiChatRequestParameters combined = defaults.overrideWith(overrides);

// Result has:
// - temperature: 0.3 (from overrides)
// - maxCompletionTokens: 1000 (from defaults)
// - user: "app-user-123" (from defaults)
// - seed: 42 (from overrides)

OpenAiChatResponseMetadata

OpenAI-specific chat response metadata extending standard response metadata with additional OpenAI fields.

public class OpenAiChatResponseMetadata extends ChatResponseMetadata {
    public static Builder builder();

    // Standard metadata (inherited)
    public String id();
    public String modelName();
    public FinishReason finishReason();

    // OpenAI-specific token usage
    public OpenAiTokenUsage tokenUsage();

    // OpenAI-specific fields
    public Long created();
    public String serviceTier();
    public String systemFingerprint();

    // Raw response access
    public SuccessfulHttpResponse rawHttpResponse();
    public List<ServerSentEvent> rawServerSentEvents();

    // Create modified copy
    public Builder toBuilder();
}

Builder

public static class Builder extends ChatResponseMetadata.Builder<Builder> {
    // Standard fields
    public Builder id(String id);
    public Builder modelName(String modelName);
    public Builder tokenUsage(TokenUsage tokenUsage);
    public Builder finishReason(FinishReason finishReason);

    // OpenAI-specific fields
    public Builder created(Long created);
    public Builder serviceTier(String serviceTier);
    public Builder systemFingerprint(String systemFingerprint);
    public Builder rawHttpResponse(SuccessfulHttpResponse rawHttpResponse);
    public Builder rawServerSentEvents(List<ServerSentEvent> rawServerSentEvents);

    public OpenAiChatResponseMetadata build();
}

Basic Usage Example

ChatResponse response = model.doChat(request);
OpenAiChatResponseMetadata metadata = (OpenAiChatResponseMetadata) response.metadata();

// Standard fields
System.out.println("Response ID: " + metadata.id());
System.out.println("Model: " + metadata.modelName());
System.out.println("Finish reason: " + metadata.finishReason());

// Token usage
OpenAiTokenUsage usage = metadata.tokenUsage();
System.out.println("Input tokens: " + usage.inputTokenCount());
System.out.println("Output tokens: " + usage.outputTokenCount());
System.out.println("Total tokens: " + usage.totalTokenCount());

// OpenAI-specific fields
System.out.println("Created: " + new Date(metadata.created() * 1000));
System.out.println("Service tier: " + metadata.serviceTier());
System.out.println("System fingerprint: " + metadata.systemFingerprint());

Detailed Token Analysis Example

OpenAiChatResponseMetadata metadata = (OpenAiChatResponseMetadata) response.metadata();
OpenAiTokenUsage usage = metadata.tokenUsage();

// Basic counts
System.out.println("=== Token Usage ===");
System.out.println("Input: " + usage.inputTokenCount());
System.out.println("Output: " + usage.outputTokenCount());
System.out.println("Total: " + usage.totalTokenCount());

// Cached tokens (if using prompt caching)
if (usage.inputTokensDetails() != null) {
    Integer cached = usage.inputTokensDetails().cachedTokens();
    if (cached != null && cached > 0) {
        System.out.println("\n=== Caching ===");
        System.out.println("Cached tokens: " + cached);
        System.out.println("New tokens: " + (usage.inputTokenCount() - cached));
        System.out.println("Cache savings: " +
            String.format("%.1f%%", (cached * 100.0 / usage.inputTokenCount())));
    }
}

// Reasoning tokens (for o1/o3 models)
if (usage.outputTokensDetails() != null) {
    Integer reasoning = usage.outputTokensDetails().reasoningTokens();
    if (reasoning != null && reasoning > 0) {
        System.out.println("\n=== Reasoning ===");
        System.out.println("Reasoning tokens: " + reasoning);
        System.out.println("Response tokens: " + (usage.outputTokenCount() - reasoning));
        System.out.println("Reasoning ratio: " +
            String.format("%.1f%%", (reasoning * 100.0 / usage.outputTokenCount())));
    }
}

System Fingerprint Tracking Example

public class SystemFingerprintTracker {
    private String lastFingerprint;

    public void trackResponse(OpenAiChatResponseMetadata metadata) {
        String currentFingerprint = metadata.systemFingerprint();

        if (lastFingerprint == null) {
            lastFingerprint = currentFingerprint;
            System.out.println("Initial system fingerprint: " + currentFingerprint);
        } else if (!lastFingerprint.equals(currentFingerprint)) {
            System.out.println("WARNING: System fingerprint changed!");
            System.out.println("Old: " + lastFingerprint);
            System.out.println("New: " + currentFingerprint);
            System.out.println("Model behavior may have changed.");
            lastFingerprint = currentFingerprint;
        }
    }
}

Parameter Details

Max Completion Tokens

Maximum tokens for the model's response:

  • Replaces deprecated maxTokens
  • Does not include prompt tokens
  • Setting too low may truncate responses
  • Range: 1 to model's maximum

Logit Bias

Modify likelihood of specific tokens appearing:

  • Map of token ID to bias value
  • Range: -100 (never) to 100 (always)
  • Use for precise output control
  • Example: Discourage certain words
Map<String, Integer> bias = Map.of(
    "1234", -10,  // Discourage token 1234
    "5678", 5     // Encourage token 5678
);

OpenAiChatRequestParameters params = OpenAiChatRequestParameters.builder()
    .logitBias(bias)
    .build();

Parallel Tool Calls

Allow multiple tool calls in one response:

  • true: Model can call multiple tools simultaneously
  • false: Tools called sequentially
  • Improves efficiency for independent operations
  • Default: model-dependent

Seed

Integer for deterministic sampling:

  • Same seed + parameters = same output
  • Useful for testing and reproducibility
  • Not guaranteed 100% deterministic across versions
  • Range: any integer

User Identifier

Track end-user for monitoring and abuse detection:

  • String identifier for the user
  • Helps OpenAI detect and mitigate abuse
  • Recommended for production applications
  • Not visible to users

Store

Control conversation storage:

  • true: Store for model training/improvement
  • false: Don't store
  • Default varies by API tier
  • Privacy consideration

Metadata

Custom metadata for the request:

  • Map of string key-value pairs
  • Helps with tracking and analytics
  • Not used by the model
  • Returned in usage tracking
Map<String, String> metadata = Map.of(
    "user_id", "12345",
    "session_id", "abc-def-ghi",
    "application", "my-app"
);

Service Tier

OpenAI service tier selection:

  • "auto": Automatic selection (default)
  • "default": Standard tier
  • Future: May support additional tiers
  • Affects latency and availability

Reasoning Effort

Control reasoning depth for o1/o3 models:

  • "low": Faster, less thorough
  • "medium": Balanced
  • "high": Slower, more thorough
  • Only applicable to reasoning models

Custom Parameters

Additional custom parameters for API requests:

  • Map of string keys to object values
  • For experimental or undocumented features
  • Use with caution
  • May break with API changes

Response Metadata Details

ID

Unique identifier for the response:

  • Format: chatcmpl-...
  • Use for support and debugging
  • Reference in API documentation

Model Name

Actual model that processed the request:

  • May differ from requested model
  • Includes version/snapshot information
  • Use for auditing and tracking

Created

Unix timestamp of response creation:

  • Seconds since epoch
  • Convert to Date: new Date(created * 1000)
  • Use for latency tracking

Service Tier

Service tier used for the request:

  • Matches request or auto-selected
  • Affects billing in tiered pricing
  • Track for cost analysis

System Fingerprint

Identifier for model configuration:

  • Changes when model behavior changes
  • Monitor for unexpected changes
  • Use for reproducibility tracking

Finish Reason

Why generation stopped:

  • STOP: Natural completion
  • LENGTH: Max tokens reached
  • TOOL_EXECUTION: Tool call made
  • CONTENT_FILTER: Content filtered
  • OTHER: Other reason

Best Practices

Default vs Per-Request Parameters

// Good: Set stable defaults at model level
OpenAiChatModel model = OpenAiChatModel.builder()
    .apiKey(apiKey)
    .defaultRequestParameters(
        OpenAiChatRequestParameters.builder()
            .temperature(0.7)
            .maxCompletionTokens(1000)
            .user("app-user")
            .build()
    )
    .build();

// Override when needed
ChatRequest specificRequest = ChatRequest.builder()
    .messages(messages)
    .parameters(
        OpenAiChatRequestParameters.builder()
            .temperature(0.3)  // Override for this request
            .build()
    )
    .build();

Monitoring Token Usage

public class ResponseAnalyzer {
    public void analyzeResponse(ChatResponse response) {
        OpenAiChatResponseMetadata metadata =
            (OpenAiChatResponseMetadata) response.metadata();

        OpenAiTokenUsage usage = metadata.tokenUsage();

        // Log usage
        logger.info("Request {} used {} tokens",
            metadata.id(),
            usage.totalTokenCount());

        // Alert on high usage
        if (usage.totalTokenCount() > 10000) {
            logger.warn("High token usage detected: {} tokens",
                usage.totalTokenCount());
        }

        // Track costs
        double cost = calculateCost(metadata.modelName(), usage);
        metrics.recordCost(cost);
    }
}

Deterministic Testing

@Test
public void testDeterministicOutput() {
    OpenAiChatRequestParameters params = OpenAiChatRequestParameters.builder()
        .temperature(0.0)  // Minimum randomness
        .seed(42)  // Fixed seed
        .build();

    String prompt = "What is 2+2?";

    // Multiple calls should give same result
    String result1 = model.generate(prompt, params);
    String result2 = model.generate(prompt, params);

    assertEquals(result1, result2);
}

Feature Detection

public class ModelCapabilities {
    public boolean supportsParallelToolCalls(String modelName) {
        return modelName.startsWith("gpt-4") ||
               modelName.startsWith("gpt-3.5");
    }

    public boolean supportsReasoningEffort(String modelName) {
        return modelName.startsWith("o1") ||
               modelName.startsWith("o3");
    }

    public OpenAiChatRequestParameters configureForModel(String modelName) {
        OpenAiChatRequestParameters.Builder builder =
            OpenAiChatRequestParameters.builder()
                .modelName(modelName);

        if (supportsParallelToolCalls(modelName)) {
            builder.parallelToolCalls(true);
        }

        if (supportsReasoningEffort(modelName)) {
            builder.reasoningEffort("medium");
        }

        return builder.build();
    }
}

Install with Tessl CLI

npx tessl i tessl/maven-dev-langchain4j--langchain4j-open-ai

docs

advanced-features.md

audio-transcription-models.md

chat-models.md

embedding-models.md

image-models.md

index.md

language-models.md

model-catalog.md

moderation-models.md

request-response.md

token-management.md

README.md

tile.json