This package provides a deprecated integration module that enables Java applications to interact with GitHub Models through the LangChain4j framework. It offers chat models (both synchronous and streaming), embedding models, and support for AI services with tool integration, JSON schema responses, and responsible AI features. The module wraps Azure AI Inference SDK to provide a unified API for accessing various language models hosted on GitHub Models, including chat completion capabilities, embeddings generation, and content filtering management. As of version 1.10.0, this module has been marked for deprecation and future removal, with users recommended to migrate to the langchain4j-openai-official module for enhanced functionality and better integration. The library is designed for reusability as a foundational component in LLM-powered Java applications that need to leverage GitHub-hosted AI models, offering builder patterns for configuration, support for proxy options, custom timeouts, and comprehensive model service versioning capabilities.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Practical guide for using GitHubModelsStreamingChatModel with real-time token delivery.
GitHubModelsStreamingChatModel model = GitHubModelsStreamingChatModel.builder()
.gitHubToken(System.getenv("GITHUB_TOKEN"))
.modelName("gpt-4o")
.build();
ChatRequest request = ChatRequest.builder()
.messages(UserMessage.from("Tell me a story about a dragon"))
.build();
model.chat(request, new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
System.out.print(token);
System.out.flush();
}
@Override
public void onCompleteResponse(ChatResponse response) {
System.out.println("\n[Complete]");
System.out.println("Tokens used: " + response.metadata().tokenUsage().totalTokenCount());
}
@Override
public void onError(Throwable error) {
System.err.println("Error: " + error.getMessage());
}
});StringBuilder fullResponse = new StringBuilder();
model.chat(request, new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
fullResponse.append(token);
System.out.print(token);
}
@Override
public void onCompleteResponse(ChatResponse response) {
String complete = fullResponse.toString();
saveResponse(complete);
}
@Override
public void onError(Throwable error) {
System.err.println("Failed after: " + fullResponse.length() + " chars");
}
});import javafx.application.Platform;
import javafx.scene.control.TextArea;
TextArea outputArea = new TextArea();
model.chat(request, new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
// Update UI on JavaFX thread
Platform.runLater(() -> {
outputArea.appendText(token);
});
}
@Override
public void onCompleteResponse(ChatResponse response) {
Platform.runLater(() -> {
statusLabel.setText("Complete");
submitButton.setEnabled(true);
});
}
@Override
public void onError(Throwable error) {
Platform.runLater(() -> {
showErrorDialog(error.getMessage());
});
}
});import javax.swing.SwingUtilities;
import javax.swing.JTextArea;
JTextArea textArea = new JTextArea();
model.chat(request, new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
SwingUtilities.invokeLater(() -> {
textArea.append(token);
textArea.setCaretPosition(textArea.getDocument().getLength());
});
}
@Override
public void onCompleteResponse(ChatResponse response) {
SwingUtilities.invokeLater(() -> {
statusBar.setText("Complete");
});
}
@Override
public void onError(Throwable error) {
SwingUtilities.invokeLater(() -> {
JOptionPane.showMessageDialog(null, error.getMessage());
});
}
});// Example with server-sent events (SSE)
@GetMapping(value = "/chat/stream", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
public Flux<String> streamChat(@RequestParam String message) {
return Flux.create(emitter -> {
ChatRequest request = ChatRequest.builder()
.messages(UserMessage.from(message))
.build();
model.chat(request, new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
emitter.next(token);
}
@Override
public void onCompleteResponse(ChatResponse response) {
emitter.complete();
}
@Override
public void onError(Throwable error) {
emitter.error(error);
}
});
});
}List<ChatMessage> conversation = new ArrayList<>();
conversation.add(SystemMessage.from("You are a helpful assistant."));
// First turn
conversation.add(UserMessage.from("Hello!"));
StringBuilder response1 = new StringBuilder();
model.chat(ChatRequest.builder().messages(conversation).build(),
new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
response1.append(token);
System.out.print(token);
}
@Override
public void onCompleteResponse(ChatResponse response) {
conversation.add(response.aiMessage());
System.out.println("\n");
// Second turn
conversation.add(UserMessage.from("Tell me a joke"));
StringBuilder response2 = new StringBuilder();
model.chat(ChatRequest.builder().messages(conversation).build(),
new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
response2.append(token);
System.out.print(token);
}
@Override
public void onCompleteResponse(ChatResponse resp) {
conversation.add(resp.aiMessage());
}
@Override
public void onError(Throwable error) { }
});
}
@Override
public void onError(Throwable error) { }
});ToolSpecification tool = ToolSpecification.builder()
.name("calculate")
.description("Perform calculation")
.addParameter("expression", "string", "Math expression")
.build();
ChatRequest request = ChatRequest.builder()
.messages(UserMessage.from("What is 25 * 17?"))
.parameters(ChatRequestParameters.builder()
.toolSpecifications(tool)
.build())
.build();
model.chat(request, new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
System.out.print(token);
}
@Override
public void onCompleteResponse(ChatResponse response) {
if (response.aiMessage().hasToolExecutionRequests()) {
for (ToolExecutionRequest toolRequest : response.aiMessage().toolExecutionRequests()) {
System.out.println("\nTool: " + toolRequest.name());
System.out.println("Args: " + toolRequest.arguments());
// Execute tool and continue
String result = executeTool(toolRequest);
List<ChatMessage> messages = new ArrayList<>();
messages.add(UserMessage.from("What is 25 * 17?"));
messages.add(response.aiMessage());
messages.add(ToolExecutionResultMessage.from(toolRequest, result));
// Stream final response
model.chat(ChatRequest.builder().messages(messages).build(),
new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
System.out.print(token);
}
@Override
public void onCompleteResponse(ChatResponse finalResponse) { }
@Override
public void onError(Throwable error) { }
});
}
}
}
@Override
public void onError(Throwable error) {
System.err.println("Error: " + error.getMessage());
}
});model.chat(request, new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
System.out.print(token);
}
@Override
public void onCompleteResponse(ChatResponse response) {
if (response.metadata().finishReason() == FinishReason.CONTENT_FILTER) {
System.out.println("\nContent was filtered by responsible AI policies");
String filterMsg = response.aiMessage().text();
System.out.println("Filter message: " + filterMsg);
} else {
System.out.println("\nComplete");
}
}
@Override
public void onError(Throwable error) {
System.err.println("Error: " + error.getMessage());
}
});int maxRetries = 3;
AtomicInteger attempt = new AtomicInteger(0);
StreamingChatResponseHandler retryingHandler = new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
System.out.print(token);
}
@Override
public void onCompleteResponse(ChatResponse response) {
System.out.println("\nSuccess on attempt " + attempt.get());
}
@Override
public void onError(Throwable error) {
if (attempt.incrementAndGet() < maxRetries) {
System.err.println("\nRetrying... (attempt " + attempt.get() + ")");
model.chat(request, this);
} else {
System.err.println("\nFailed after " + maxRetries + " attempts");
}
}
};
model.chat(request, retryingHandler);GitHubModelsStreamingChatModel model = GitHubModelsStreamingChatModel.builder()
.gitHubToken(token)
.modelName("gpt-4o")
.timeout(Duration.ofSeconds(30))
.build();
// Timeout errors delivered to onError()
model.chat(request, new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) { }
@Override
public void onCompleteResponse(ChatResponse response) { }
@Override
public void onError(Throwable error) {
if (error instanceof TimeoutException) {
System.err.println("Request timed out");
}
}
});AtomicInteger tokenCount = new AtomicInteger(0);
AtomicLong startTime = new AtomicLong(System.currentTimeMillis());
model.chat(request, new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
int count = tokenCount.incrementAndGet();
System.out.print(token);
if (count % 10 == 0) {
long elapsed = System.currentTimeMillis() - startTime.get();
double tokensPerSec = (count * 1000.0) / elapsed;
System.err.println("\n[" + count + " tokens, " +
String.format("%.1f", tokensPerSec) + " tokens/sec]");
}
}
@Override
public void onCompleteResponse(ChatResponse response) {
long elapsed = System.currentTimeMillis() - startTime.get();
System.out.println("\nTotal time: " + elapsed + "ms");
}
@Override
public void onError(Throwable error) { }
});AtomicBoolean cancelled = new AtomicBoolean(false);
StringBuilder accumulated = new StringBuilder();
model.chat(request, new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
if (cancelled.get()) {
// Note: Can't actually stop the stream, but can ignore tokens
return;
}
accumulated.append(token);
System.out.print(token);
}
@Override
public void onCompleteResponse(ChatResponse response) {
if (cancelled.get()) {
System.out.println("\nCancelled, partial: " + accumulated.toString());
}
}
@Override
public void onError(Throwable error) { }
});
// Set flag from another thread to stop processing tokens
// cancelButton.setOnAction(e -> cancelled.set(true));StringBuilder buffer = new StringBuilder();
ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
ScheduledFuture<?> flushTask = scheduler.scheduleAtFixedRate(() -> {
synchronized (buffer) {
if (buffer.length() > 0) {
System.out.print(buffer.toString());
buffer.setLength(0);
}
}
}, 0, 100, TimeUnit.MILLISECONDS);
model.chat(request, new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
synchronized (buffer) {
buffer.append(token);
}
}
@Override
public void onCompleteResponse(ChatResponse response) {
flushTask.cancel(false);
synchronized (buffer) {
if (buffer.length() > 0) {
System.out.print(buffer.toString());
}
}
scheduler.shutdown();
}
@Override
public void onError(Throwable error) {
flushTask.cancel(false);
scheduler.shutdown();
}
});GitHubModelsStreamingChatModel model = GitHubModelsStreamingChatModel.builder()
.gitHubToken(token)
.modelName("gpt-4o-mini") // Faster model for streaming
.temperature(0.8)
.maxTokens(2000)
.timeout(Duration.ofSeconds(90)) // Longer timeout for streaming
.build();// Fast streaming for chat
GitHubModelsStreamingChatModel chatModel = GitHubModelsStreamingChatModel.builder()
.gitHubToken(token)
.modelName("gpt-4o-mini")
.build();
// High quality for important content
GitHubModelsStreamingChatModel contentModel = GitHubModelsStreamingChatModel.builder()
.gitHubToken(token)
.modelName("gpt-4o")
.temperature(0.7)
.build();Important: Handler callbacks execute on background threads. When updating UI components:
Platform.runLater()SwingUtilities.invokeLater()runOnUiThread() or HandlerNever block in handler callbacks - keep processing fast to maintain streaming performance.
Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-github-models@1.11.0docs