Core classes and interfaces of LangChain4j providing foundational abstractions for LLM interaction, RAG, embeddings, agents, and observability
Package: dev.langchain4j.model.chat
Thread-Safety: Implementation-dependent, typically thread-safe
Primary Interfaces: ChatModel, StreamingChatModel
Chat models provide conversational AI capabilities with support for multi-turn conversations, tool calling, and multimodal inputs.
package dev.langchain4j.model.chat;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.model.chat.request.ChatRequest;
import dev.langchain4j.model.chat.response.ChatResponse;
/**
* Synchronous chat model interface
* Thread-Safety: Implementation-dependent
*/
public interface ChatModel {
/**
* Send chat request with full control
* @param request Complete chat request with messages and parameters
* @return Complete chat response with AI message, token usage, finish reason
*/
ChatResponse chat(ChatRequest request);
/**
* Chat with message history
* @param messages Conversation history
* @return Complete chat response
*/
ChatResponse chat(List<ChatMessage> messages);
/**
* Simple text chat (most convenient)
* @param userMessage User's text message
* @return AI's text response
*/
String chat(String userMessage);
}package dev.langchain4j.model.chat;
import dev.langchain4j.model.chat.response.StreamingChatResponseHandler;
import dev.langchain4j.model.chat.response.StreamingHandle;
/**
* Streaming chat model interface for token-by-token responses
* Thread-Safety: Implementation-dependent
*/
public interface StreamingChatModel {
/**
* Stream chat response with full control
* @param request Complete chat request
* @param handler Callback for partial and complete responses
* @return Handle to cancel streaming
*/
StreamingHandle chat(ChatRequest request, StreamingChatResponseHandler handler);
/**
* Stream chat with message history
* @param messages Conversation history
* @param handler Callback for partial and complete responses
* @return Handle to cancel streaming
*/
StreamingHandle chat(List<ChatMessage> messages, StreamingChatResponseHandler handler);
}package dev.langchain4j.model.chat.request;
/**
* Complete chat request with messages and parameters
* Immutability: Immutable, thread-safe
*/
public class ChatRequest {
private final List<ChatMessage> messages;
private final ChatRequestParameters parameters;
public static Builder builder() { /* ... */ }
public List<ChatMessage> messages() { /* ... */ }
public ChatRequestParameters parameters() { /* ... */ }
}package dev.langchain4j.model.chat.response;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.model.output.TokenUsage;
import dev.langchain4j.model.output.FinishReason;
/**
* Complete chat response
* Immutability: Immutable, thread-safe
*/
public class ChatResponse {
private final AiMessage aiMessage;
private final TokenUsage tokenUsage; // May be null
private final FinishReason finishReason;
public AiMessage aiMessage() { /* ... */ }
public TokenUsage tokenUsage() { /* ... */ } // Check for null
public FinishReason finishReason() { /* ... */ }
}package dev.langchain4j.model.chat.request;
/**
* Model configuration parameters
* Immutability: Immutable, thread-safe
*/
public interface ChatRequestParameters {
String modelName();
Double temperature(); // 0.0-2.0, higher = more creative
Double topP(); // 0.0-1.0, nucleus sampling
Integer maxTokens(); // Maximum output tokens
List<String> stopSequences();
Double presencePenalty(); // -2.0 to 2.0, penalize repeated topics
Double frequencyPenalty(); // -2.0 to 2.0, penalize repeated tokens
}import dev.langchain4j.model.chat.ChatModel;
// Initialize from provider-specific module (e.g., OpenAiChatModel)
ChatModel model = /* provider-specific initialization */;
// Simple text chat
String response = model.chat("What is the capital of France?");
System.out.println(response); // "The capital of France is Paris."import dev.langchain4j.model.chat.request.ChatRequest;
import dev.langchain4j.model.chat.request.DefaultChatRequestParameters;
import dev.langchain4j.model.chat.response.ChatResponse;
import dev.langchain4j.data.message.UserMessage;
ChatRequest request = ChatRequest.builder()
.messages(UserMessage.from("Explain quantum computing briefly"))
.parameters(DefaultChatRequestParameters.builder()
.temperature(0.7)
.maxTokens(100)
.build())
.build();
ChatResponse response = model.chat(request);
String text = response.aiMessage().text();
// Check token usage (may be null)
if (response.tokenUsage() != null) {
System.out.println("Input tokens: " + response.tokenUsage().inputTokenCount());
System.out.println("Output tokens: " + response.tokenUsage().outputTokenCount());
}import dev.langchain4j.model.chat.StreamingChatModel;
import dev.langchain4j.model.chat.response.StreamingChatResponseHandler;
import dev.langchain4j.model.chat.response.PartialResponse;
import dev.langchain4j.model.chat.response.StreamingHandle;
StreamingChatModel streamingModel = /* provider-specific initialization */;
StreamingHandle handle = streamingModel.chat(request, new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(PartialResponse response) {
// Process each token as it arrives
System.out.print(response.partialText());
}
@Override
public void onCompleteResponse(ChatResponse response) {
// Finalize processing
System.out.println("\n\nComplete!");
}
@Override
public void onError(Throwable error) {
System.err.println("Error: " + error.getMessage());
}
});
// Can cancel streaming if needed
// handle.cancel();import dev.langchain4j.data.message.*;
import java.util.ArrayList;
List<ChatMessage> conversation = new ArrayList<>();
// Turn 1
conversation.add(UserMessage.from("What is 2+2?"));
ChatResponse response1 = model.chat(conversation);
conversation.add(response1.aiMessage()); // "2+2 equals 4."
// Turn 2
conversation.add(UserMessage.from("What about 2*3?"));
ChatResponse response2 = model.chat(conversation);
conversation.add(response2.aiMessage()); // "2*3 equals 6."
// Turn 3 - Model has full context
conversation.add(UserMessage.from("Add those two results together"));
ChatResponse response3 = model.chat(conversation);
System.out.println(response3.aiMessage().text()); // "4 + 6 equals 10."Check your provider's documentation for:
import dev.langchain4j.exception.*;
try {
String response = model.chat(message);
} catch (AuthenticationException e) {
// Invalid credentials - do not retry
handleAuthError(e);
} catch (RateLimitException e) {
// Rate limit - retry after delay
retryWithBackoff(e);
} catch (LangChain4jException e) {
// Other errors
handleError(e);
}ChatResponse response = model.chat(request);
TokenUsage usage = response.tokenUsage();
if (usage != null) {
logUsage(usage.inputTokenCount(), usage.outputTokenCount());
} else {
// Some models don't provide token counts
log.warn("Token usage not available");
}// For long responses, streaming provides better UX
// User sees output immediately instead of waiting
streamingModel.chat(request, handler);// Keep conversation history within model's context window
// Implement sliding window or summarization if needed
if (conversation.size() > MAX_MESSAGES) {
conversation = conversation.subList(conversation.size() - MAX_MESSAGES, conversation.size());
}| Pitfall | Solution |
|---|---|
| Not handling exceptions | Always catch at least LangChain4jException |
Ignoring null TokenUsage | Check if (tokenUsage != null) |
| Exceeding context window | Implement message pruning or summarization |
| Not validating inputs | Sanitize user inputs before sending |
| Reusing instances unsafely | Check thread-safety in provider docs |
Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-core@1.11.0