Core classes and interfaces of LangChain4j providing foundational abstractions for LLM interaction, RAG, embeddings, agents, and observability
Package: dev.langchain4j.model.listener
Thread-Safety: Listeners should be thread-safe
Use Case: Monitoring, logging, metrics, debugging LLM interactions
Observability in LangChain4j provides comprehensive monitoring of model interactions, tool executions, and RAG operations through event-based listeners.
LangChain4j uses an event-driven observability model where listeners are notified of lifecycle events:
package dev.langchain4j.model.listener;
import dev.langchain4j.model.chat.request.ChatRequest;
import dev.langchain4j.model.chat.response.ChatResponse;
/**
* Base listener for model events
* Thread-Safety: Implementations should be thread-safe
*/
public interface ModelListener {
/**
* Called before request is sent to model
* @param request The chat request
*/
default void onRequest(RequestEvent event) { }
/**
* Called after successful response
* @param response The chat response
*/
default void onResponse(ResponseEvent event) { }
/**
* Called when an error occurs
* @param error The error
*/
default void onError(ErrorEvent event) { }
}package dev.langchain4j.model.listener;
import dev.langchain4j.model.chat.request.ChatRequest;
import java.time.Instant;
/**
* Event fired before model request
* Immutability: Immutable, thread-safe
*/
public class RequestEvent {
private final String modelId;
private final ChatRequest request;
private final Instant timestamp;
private final Map<String, Object> attributes;
public String modelId() { return modelId; }
public ChatRequest request() { return request; }
public Instant timestamp() { return timestamp; }
public Map<String, Object> attributes() { return attributes; }
}package dev.langchain4j.model.listener;
import dev.langchain4j.model.chat.response.ChatResponse;
import java.time.Duration;
/**
* Event fired after model response
* Immutability: Immutable, thread-safe
*/
public class ResponseEvent {
private final String modelId;
private final ChatRequest request;
private final ChatResponse response;
private final Duration duration;
private final Instant timestamp;
public String modelId() { return modelId; }
public ChatRequest request() { return request; }
public ChatResponse response() { return response; }
public Duration duration() { return duration; }
public Instant timestamp() { return timestamp; }
}package dev.langchain4j.model.listener;
/**
* Event fired when error occurs
* Immutability: Immutable, thread-safe
*/
public class ErrorEvent {
private final String modelId;
private final ChatRequest request;
private final Throwable error;
private final Duration duration;
private final Instant timestamp;
public String modelId() { return modelId; }
public ChatRequest request() { return request; }
public Throwable error() { return error; }
public Duration duration() { return duration; }
public Instant timestamp() { return timestamp; }
}import dev.langchain4j.model.listener.ModelListener;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Simple logging listener
*/
public class LoggingListener implements ModelListener {
private static final Logger log = LoggerFactory.getLogger(LoggingListener.class);
@Override
public void onRequest(RequestEvent event) {
log.info("Request to model: {}", event.modelId());
log.debug("Request details: {}", event.request());
}
@Override
public void onResponse(ResponseEvent event) {
log.info("Response from model: {} in {}ms",
event.modelId(),
event.duration().toMillis());
if (event.response().tokenUsage() != null) {
log.info("Token usage - Input: {}, Output: {}",
event.response().tokenUsage().inputTokenCount(),
event.response().tokenUsage().outputTokenCount());
}
}
@Override
public void onError(ErrorEvent event) {
log.error("Error from model: {} after {}ms - {}",
event.modelId(),
event.duration().toMillis(),
event.error().getMessage(),
event.error());
}
}import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Timer;
/**
* Metrics tracking with Micrometer
*/
public class MetricsListener implements ModelListener {
private final MeterRegistry meterRegistry;
private final Timer requestTimer;
public MetricsListener(MeterRegistry meterRegistry) {
this.meterRegistry = meterRegistry;
this.requestTimer = Timer.builder("llm.request")
.description("LLM request duration")
.register(meterRegistry);
}
@Override
public void onResponse(ResponseEvent event) {
// Record request duration
requestTimer.record(event.duration());
// Record token usage
if (event.response().tokenUsage() != null) {
meterRegistry.counter("llm.tokens.input",
"model", event.modelId()
).increment(event.response().tokenUsage().inputTokenCount());
meterRegistry.counter("llm.tokens.output",
"model", event.modelId()
).increment(event.response().tokenUsage().outputTokenCount());
}
// Record finish reason
meterRegistry.counter("llm.finish_reason",
"model", event.modelId(),
"reason", event.response().finishReason().toString()
).increment();
}
@Override
public void onError(ErrorEvent event) {
// Record errors
meterRegistry.counter("llm.errors",
"model", event.modelId(),
"error_type", event.error().getClass().getSimpleName()
).increment();
}
}import java.util.concurrent.atomic.AtomicReference;
/**
* Track costs based on token usage
*/
public class CostTrackingListener implements ModelListener {
private final AtomicReference<Double> totalCost = new AtomicReference<>(0.0);
// Costs per 1K tokens (example OpenAI pricing)
private static final Map<String, Pricing> PRICING = Map.of(
"gpt-4", new Pricing(0.03, 0.06),
"gpt-3.5-turbo", new Pricing(0.0005, 0.0015)
);
@Override
public void onResponse(ResponseEvent event) {
TokenUsage usage = event.response().tokenUsage();
if (usage == null) return;
String modelId = event.modelId();
Pricing pricing = PRICING.get(modelId);
if (pricing == null) return;
// Calculate cost
double inputCost = (usage.inputTokenCount() / 1000.0) * pricing.inputCostPer1K;
double outputCost = (usage.outputTokenCount() / 1000.0) * pricing.outputCostPer1K;
double requestCost = inputCost + outputCost;
// Update total
totalCost.updateAndGet(current -> current + requestCost);
log.info("Request cost: ${}, Total cost: ${}",
String.format("%.6f", requestCost),
String.format("%.6f", totalCost.get()));
}
public double getTotalCost() {
return totalCost.get();
}
public void resetCost() {
totalCost.set(0.0);
}
private static class Pricing {
final double inputCostPer1K;
final double outputCostPer1K;
Pricing(double inputCostPer1K, double outputCostPer1K) {
this.inputCostPer1K = inputCostPer1K;
this.outputCostPer1K = outputCostPer1K;
}
}
}/**
* Detailed debugging output
*/
public class DebugListener implements ModelListener {
@Override
public void onRequest(RequestEvent event) {
System.out.println("=== REQUEST ===");
System.out.println("Model: " + event.modelId());
System.out.println("Time: " + event.timestamp());
System.out.println("Messages: " + event.request().messages().size());
for (ChatMessage message : event.request().messages()) {
System.out.println(" - " + message.type() + ": " + truncate(message.text(), 100));
}
if (event.request().parameters() != null) {
ChatRequestParameters params = event.request().parameters();
System.out.println("Parameters:");
System.out.println(" Temperature: " + params.temperature());
System.out.println(" Max tokens: " + params.maxTokens());
}
}
@Override
public void onResponse(ResponseEvent event) {
System.out.println("=== RESPONSE ===");
System.out.println("Duration: " + event.duration().toMillis() + "ms");
System.out.println("Response: " + truncate(event.response().aiMessage().text(), 200));
if (event.response().tokenUsage() != null) {
System.out.println("Tokens: " +
event.response().tokenUsage().inputTokenCount() + " in, " +
event.response().tokenUsage().outputTokenCount() + " out");
}
System.out.println("Finish reason: " + event.response().finishReason());
}
@Override
public void onError(ErrorEvent event) {
System.err.println("=== ERROR ===");
System.err.println("Model: " + event.modelId());
System.err.println("Duration: " + event.duration().toMillis() + "ms");
System.err.println("Error: " + event.error().getMessage());
event.error().printStackTrace();
}
private String truncate(String text, int maxLength) {
if (text == null || text.length() <= maxLength) return text;
return text.substring(0, maxLength) + "...";
}
}// Provider-specific models typically accept listeners
// Example (actual API varies by provider):
OpenAiChatModel model = OpenAiChatModel.builder()
.apiKey(apiKey)
.modelName("gpt-4")
.listeners(List.of(
new LoggingListener(),
new MetricsListener(meterRegistry),
new CostTrackingListener()
))
.build();
// Or add listener after creation (if supported)
model.addListener(new DebugListener());/**
* Combines multiple listeners
*/
public class CompositeListener implements ModelListener {
private final List<ModelListener> listeners;
public CompositeListener(ModelListener... listeners) {
this.listeners = Arrays.asList(listeners);
}
@Override
public void onRequest(RequestEvent event) {
for (ModelListener listener : listeners) {
try {
listener.onRequest(event);
} catch (Exception e) {
log.error("Listener error in onRequest", e);
}
}
}
@Override
public void onResponse(ResponseEvent event) {
for (ModelListener listener : listeners) {
try {
listener.onResponse(event);
} catch (Exception e) {
log.error("Listener error in onResponse", e);
}
}
}
@Override
public void onError(ErrorEvent event) {
for (ModelListener listener : listeners) {
try {
listener.onError(event);
} catch (Exception e) {
log.error("Listener error in onError", e);
}
}
}
}import java.util.concurrent.ExecutorService;
/**
* Wraps listener to execute asynchronously
* Prevents slow listeners from blocking main thread
*/
public class AsyncListener implements ModelListener {
private final ModelListener delegate;
private final ExecutorService executor;
public AsyncListener(ModelListener delegate, ExecutorService executor) {
this.delegate = delegate;
this.executor = executor;
}
@Override
public void onRequest(RequestEvent event) {
executor.submit(() -> delegate.onRequest(event));
}
@Override
public void onResponse(ResponseEvent event) {
executor.submit(() -> delegate.onResponse(event));
}
@Override
public void onError(ErrorEvent event) {
executor.submit(() -> delegate.onError(event));
}
}// ✅ GOOD: Quick logging
@Override
public void onResponse(ResponseEvent event) {
log.info("Response in {}ms", event.duration().toMillis());
}
// ❌ BAD: Slow operations block main thread
@Override
public void onResponse(ResponseEvent event) {
database.save(event); // Blocking database call!
}
// ✅ BETTER: Use async wrapper
ModelListener asyncListener = new AsyncListener(dbListener, executor);// ✅ GOOD: Catch exceptions in listeners
@Override
public void onResponse(ResponseEvent event) {
try {
processEvent(event);
} catch (Exception e) {
log.error("Listener error", e);
// Don't rethrow - prevents breaking other listeners
}
}// ✅ GOOD: Thread-safe counters
private final AtomicLong requestCount = new AtomicLong();
@Override
public void onRequest(RequestEvent event) {
requestCount.incrementAndGet();
}
// ❌ BAD: Not thread-safe
private long requestCount = 0;
@Override
public void onRequest(RequestEvent event) {
requestCount++; // Race condition!
}// ✅ GOOD: Structured logging with MDC
@Override
public void onRequest(RequestEvent event) {
MDC.put("model_id", event.modelId());
MDC.put("request_id", UUID.randomUUID().toString());
log.info("LLM request started");
MDC.clear();
}/**
* Comprehensive monitoring
*/
public class MonitoringListener implements ModelListener {
private final AtomicLong totalRequests = new AtomicLong();
private final AtomicLong successfulRequests = new AtomicLong();
private final AtomicLong failedRequests = new AtomicLong();
private final LongAdder totalInputTokens = new LongAdder();
private final LongAdder totalOutputTokens = new LongAdder();
private final DoubleAdder totalCost = new DoubleAdder();
private final List<Long> latencies = new CopyOnWriteArrayList<>();
@Override
public void onRequest(RequestEvent event) {
totalRequests.incrementAndGet();
}
@Override
public void onResponse(ResponseEvent event) {
successfulRequests.incrementAndGet();
// Track latency
latencies.add(event.duration().toMillis());
// Track tokens
if (event.response().tokenUsage() != null) {
totalInputTokens.add(event.response().tokenUsage().inputTokenCount());
totalOutputTokens.add(event.response().tokenUsage().outputTokenCount());
// Calculate cost (example pricing)
double cost = calculateCost(event.response().tokenUsage(), event.modelId());
totalCost.add(cost);
}
}
@Override
public void onError(ErrorEvent event) {
failedRequests.incrementAndGet();
}
public Stats getStats() {
long avgLatency = latencies.stream()
.mapToLong(Long::longValue)
.average()
.orElse(0.0);
return new Stats(
totalRequests.get(),
successfulRequests.get(),
failedRequests.get(),
totalInputTokens.sum(),
totalOutputTokens.sum(),
totalCost.sum(),
avgLatency
);
}
}| Pitfall | Solution |
|---|---|
| Slow listeners block requests | Use async wrapper |
| Not thread-safe | Use atomic types |
| Throwing exceptions | Catch and log, don't rethrow |
| No error handling | Wrap in try-catch |
| Storing too much data | Use sampling or aggregation |
Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-core@1.11.0