Core classes and interfaces of LangChain4j providing foundational abstractions for LLM interaction, RAG, embeddings, agents, and observability
Package: dev.langchain4j.model.language
Thread-Safety: Implementation-dependent, typically thread-safe
Primary Interfaces: LanguageModel, StreamingLanguageModel
Language models provide simple text completion without the conversation structure of chat models. Use when you don't need multi-turn context or message types.
package dev.langchain4j.model.language;
import dev.langchain4j.model.output.Response;
/**
* Language model interface for text completion
* Simpler than ChatModel - no conversation structure
* Thread-Safety: Implementation-dependent, typically thread-safe
*/
public interface LanguageModel {
/**
* Generate text completion for a prompt
* @param prompt Input prompt (non-null)
* @return Response with generated text
*/
Response<String> generate(String prompt);
}package dev.langchain4j.model.language;
import dev.langchain4j.model.output.StreamingResponseHandler;
import dev.langchain4j.model.output.StreamingHandle;
/**
* Streaming language model for token-by-token generation
* Thread-Safety: Implementation-dependent, typically thread-safe
*/
public interface StreamingLanguageModel {
/**
* Generate streaming text completion
* @param prompt Input prompt (non-null)
* @param handler Callback for partial and complete responses
* @return Handle to cancel streaming
*/
StreamingHandle generate(String prompt, StreamingResponseHandler<String> handler);
}import dev.langchain4j.model.language.LanguageModel;
import dev.langchain4j.model.output.Response;
// Initialize from provider-specific module
LanguageModel model = /* provider-specific initialization */;
// Generate text
Response<String> response = model.generate("Complete this sentence: The capital of France is");
String completion = response.content();
System.out.println(completion); // "Paris."
// Access token usage if available
if (response.tokenUsage() != null) {
System.out.println("Tokens: " + response.tokenUsage().totalTokenCount());
}import dev.langchain4j.model.language.StreamingLanguageModel;
import dev.langchain4j.model.output.StreamingResponseHandler;
StreamingLanguageModel streamingModel = /* provider-specific initialization */;
streamingModel.generate(
"Write a short poem about AI:",
new StreamingResponseHandler<String>() {
@Override
public void onPartialResponse(String partialResponse) {
// Print each token as it arrives
System.out.print(partialResponse);
}
@Override
public void onCompleteResponse(Response<String> response) {
System.out.println("\n\nComplete!");
if (response.tokenUsage() != null) {
System.out.println("Total tokens: " + response.tokenUsage().totalTokenCount());
}
}
@Override
public void onError(Throwable error) {
System.err.println("Error: " + error.getMessage());
}
}
);List<String> prompts = List.of(
"Translate to French: Hello",
"Translate to Spanish: Hello",
"Translate to German: Hello"
);
for (String prompt : prompts) {
Response<String> response = model.generate(prompt);
System.out.println(prompt + " -> " + response.content());
}String prompt = "The benefits of regular exercise include:";
Response<String> response = model.generate(prompt);
System.out.println(response.content());String template = "Write a product description for: %s\n\nDescription:";
for (String product : products) {
String prompt = String.format(template, product);
Response<String> response = model.generate(prompt);
saveDescription(product, response.content());
}List<String> texts = loadTexts();
for (String text : texts) {
String prompt = String.format("Translate to French: %s\n\nTranslation:", text);
Response<String> response = model.generate(prompt);
saveTranslation(text, response.content());
}String prompt = """
Write a Java function that:
- Takes a list of integers
- Returns the sum of even numbers
Code:
""";
Response<String> response = model.generate(prompt);
System.out.println(response.content());// ❌ BAD: Vague prompt
String vague = "Tell me about dogs";
// ✅ GOOD: Specific prompt
String specific = "List 5 common dog breeds and their typical characteristics:\n\n1.";// For models that support it, use stop sequences
// to prevent over-generation
String prompt = "Q: What is 2+2?\nA:";
// Model will stop at newline
Response<String> response = model.generate(prompt);
String answer = response.content().split("\n")[0];StringBuilder fullResponse = new StringBuilder();
streamingModel.generate(prompt, new StreamingResponseHandler<String>() {
@Override
public void onPartialResponse(String partial) {
fullResponse.append(partial);
updateUI(partial); // Real-time UI updates
}
@Override
public void onCompleteResponse(Response<String> response) {
finalizeOutput(fullResponse.toString());
}
@Override
public void onError(Throwable error) {
handleError(error);
}
});| Pitfall | Solution |
|---|---|
| Using for conversations | Use ChatModel for multi-turn conversations |
| Not handling exceptions | Wrap in try-catch with proper error handling |
Ignoring null TokenUsage | Check if (tokenUsage != null) |
| Unclear prompts | Be specific and provide examples |
| Not setting max tokens | Long completions may be truncated |
// Synchronous: Wait for complete response
// Best for: Short completions, batch processing
Response<String> response = model.generate(prompt);
// Streaming: Process tokens as they arrive
// Best for: Long responses, real-time UI, better UX
streamingModel.generate(prompt, handler);// For independent prompts, consider parallel processing
ExecutorService executor = Executors.newFixedThreadPool(10);
List<CompletableFuture<String>> futures = prompts.stream()
.map(prompt -> CompletableFuture.supplyAsync(() -> {
return model.generate(prompt).content();
}, executor))
.collect(Collectors.toList());
// Wait for all completions
List<String> results = futures.stream()
.map(CompletableFuture::join)
.collect(Collectors.toList());
executor.shutdown();Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-core@1.11.0