This package provides a deprecated integration module that enables Java applications to interact with GitHub Models through the LangChain4j framework. It offers chat models (both synchronous and streaming), embedding models, and support for AI services with tool integration, JSON schema responses, and responsible AI features. The module wraps Azure AI Inference SDK to provide a unified API for accessing various language models hosted on GitHub Models, including chat completion capabilities, embeddings generation, and content filtering management. As of version 1.10.0, this module has been marked for deprecation and future removal, with users recommended to migrate to the langchain4j-openai-official module for enhanced functionality and better integration. The library is designed for reusability as a foundational component in LLM-powered Java applications that need to leverage GitHub-hosted AI models, offering builder patterns for configuration, support for proxy options, custom timeouts, and comprehensive model service versioning capabilities.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Recommended patterns and practices for using langchain4j-github-models effectively.
✅ DO:
// ✅ Good
.gitHubToken(System.getenv("GITHUB_TOKEN"))
// ✅ Good
.gitHubToken(secretManager.getSecret("github-token"))❌ DON'T:
// ❌ Bad
.gitHubToken("ghp_hardcoded_token_12345")✅ DO:
❌ DON'T:
// ✅ Good
logger.error("API call failed: correlationId={}", correlationId);
// ❌ Bad
logger.error("API call failed with token: {}", token);✅ DO: Create once, reuse many times
public class ChatService {
private final GitHubModelsChatModel model;
public ChatService() {
this.model = GitHubModelsChatModel.builder()
.gitHubToken(System.getenv("GITHUB_TOKEN"))
.modelName("gpt-4o")
.build();
}
public String chat(String message) {
return model.chat(createRequest(message))
.aiMessage().text();
}
}❌ DON'T: Create new instances for every request
// ❌ Bad - creates new model every time
public String chat(String message) {
GitHubModelsChatModel model = GitHubModelsChatModel.builder()
.gitHubToken(System.getenv("GITHUB_TOKEN"))
.modelName("gpt-4o")
.build();
return model.chat(request).aiMessage().text();
}✅ DO: Use enum constants for model names
// ✅ Good - type-safe, autocomplete
.modelName(GitHubModelsChatModelName.GPT_4_O)⚠️ OK: Use strings when flexibility needed
// ⚠️ OK - flexible but error-prone
.modelName("gpt-4o")✅ DO: Match timeout to expected response time
// Interactive UI
.timeout(Duration.ofSeconds(30))
// Background processing
.timeout(Duration.ofSeconds(120))
// Streaming
.timeout(Duration.ofSeconds(90))❌ DON'T: Use extreme values
// ❌ Too short - likely to fail
.timeout(Duration.ofSeconds(5))
// ❌ Too long - poor UX
.timeout(Duration.ofMinutes(30))✅ DO: Wrap API calls in try-catch
try {
ChatResponse response = model.chat(request);
return response.aiMessage().text();
} catch (HttpResponseException e) {
logger.error("Chat failed: {}", e.getMessage());
return "I apologize, but I'm having trouble responding right now.";
}❌ DON'T: Let exceptions propagate to users
✅ DO: Handle different finish reasons
ChatResponse response = model.chat(request);
switch (response.metadata().finishReason()) {
case STOP:
return response.aiMessage().text();
case LENGTH:
logger.warn("Response truncated");
return response.aiMessage().text() + "...";
case CONTENT_FILTER:
logger.info("Content filtered");
return "Response unavailable due to content policy.";
default:
return response.aiMessage().text();
}✅ DO: Retry transient failures with backoff
int maxRetries = 3;
for (int i = 0; i < maxRetries; i++) {
try {
return model.chat(request);
} catch (HttpResponseException e) {
if (i == maxRetries - 1 || !isRetryable(e)) {
throw e;
}
Thread.sleep(1000 * (long) Math.pow(2, i));
}
}✅ DO: Match model to task
// Simple, high-volume tasks
.modelName(GitHubModelsChatModelName.GPT_4_O_MINI)
// Complex reasoning
.modelName(GitHubModelsChatModelName.GPT_4_O)
// Vision tasks
.modelName(GitHubModelsChatModelName.PHI_3_5_VISION_INSTRUCT)✅ DO: Set reasonable max_tokens
// Short answers
.maxTokens(100)
// Paragraphs
.maxTokens(500)
// Articles
.maxTokens(2000)❌ DON'T: Request more tokens than needed
// ❌ Wastes tokens and time
.maxTokens(4000) // When you only need 200✅ DO: Let the model handle batching
// ✅ Good - automatic batching
List<TextSegment> allSegments = loadSegments(); // e.g., 100
Response<List<Embedding>> response = model.embedAll(allSegments);❌ DON'T: Process one at a time
// ❌ Bad - inefficient
for (TextSegment segment : segments) {
model.embedAll(Arrays.asList(segment)); // 100 separate calls!
}✅ DO: Use streaming for better UX
// ✅ Good - user sees progress
GitHubModelsStreamingChatModel streamingModel = ...
streamingModel.chat(request, handler);⚠️ OK: Use synchronous for short responses
// ⚠️ OK for quick responses
GitHubModelsChatModel model = ...
ChatResponse response = model.chat(request);✅ DO: Provide clear instructions
SystemMessage.from("You are a helpful assistant that answers in 2-3 sentences.")
UserMessage.from("Explain what photosynthesis is.")❌ DON'T: Use vague prompts
UserMessage.from("Tell me about plants.")✅ DO: Include relevant context
SystemMessage.from("Answer based on this document: " + documentText)
UserMessage.from("What is the main conclusion?")✅ DO: Set behavior with system messages
ChatRequest.builder()
.messages(
SystemMessage.from("You are a technical support agent. Be concise and helpful."),
UserMessage.from("My app crashed.")
)
.build();✅ DO: Specify desired length in prompt and max_tokens
SystemMessage.from("Answer in exactly 3 bullet points.")
// Also set
.maxTokens(200)✅ DO: Separate test and production tokens
public static String getToken() {
String env = System.getenv("APP_ENV");
if ("test".equals(env)) {
return System.getenv("GITHUB_TOKEN_TEST");
}
return System.getenv("GITHUB_TOKEN");
}✅ DO: Use mocks for unit tests
@Test
public void testChatService() {
ChatCompletionsClient mockClient = mock(ChatCompletionsClient.class);
when(mockClient.complete(any())).thenReturn(mockResponse());
GitHubModelsChatModel model = GitHubModelsChatModel.builder()
.chatCompletionsClient(mockClient)
.modelName("test")
.build();
// Test your code
}✅ DO: Have integration tests with real API
@Test
@Tag("integration")
public void testRealAPI() {
GitHubModelsChatModel model = GitHubModelsChatModel.builder()
.gitHubToken(System.getenv("GITHUB_TOKEN_TEST"))
.modelName("gpt-4o-mini") // Use cheaper model
.build();
ChatResponse response = model.chat(request);
assertNotNull(response.aiMessage().text());
}✅ DO: Track requests with correlation IDs
Map<String, String> headers = new HashMap<>();
headers.put("X-Correlation-ID", UUID.randomUUID().toString());
model.builder()
.customHeaders(headers)
.build();✅ DO: Implement listeners for observability
public class MetricsListener implements ChatModelListener {
@Override
public void onRequest(ChatModelRequestContext context) {
metrics.increment("chat.requests");
}
@Override
public void onResponse(ChatModelResponseContext context) {
int tokens = context.response().metadata()
.tokenUsage().totalTokenCount();
metrics.gauge("chat.tokens", tokens);
}
@Override
public void onError(ChatModelErrorContext context) {
metrics.increment("chat.errors");
}
}✅ DO: Log with appropriate levels
logger.info("Chat request processed: correlationId={}, tokens={}",
correlationId, tokenCount);
logger.warn("Response truncated: maxTokens={}", maxTokens);
logger.error("Chat failed: correlationId={}, error={}",
correlationId, e.getMessage());✅ DO: Configure by environment
public static GitHubModelsChatModel createModel() {
String env = System.getenv("APP_ENV");
boolean isProduction = "production".equals(env);
return GitHubModelsChatModel.builder()
.gitHubToken(System.getenv("GITHUB_TOKEN"))
.modelName("gpt-4o")
.timeout(isProduction ? Duration.ofSeconds(60) : Duration.ofMinutes(5))
.maxRetries(isProduction ? 5 : 1)
.logRequestsAndResponses(!isProduction)
.build();
}✅ DO: Implement health checks
public boolean isModelHealthy() {
try {
ChatResponse response = model.chat(ChatRequest.builder()
.messages(UserMessage.from("test"))
.build());
return response != null;
} catch (Exception e) {
logger.error("Health check failed", e);
return false;
}
}✅ DO: Handle failures gracefully
public String chat(String message) {
try {
return model.chat(request).aiMessage().text();
} catch (HttpResponseException e) {
logger.error("Primary model failed, trying fallback", e);
try {
return fallbackModel.chat(request).aiMessage().text();
} catch (Exception fallbackError) {
logger.error("Fallback also failed", fallbackError);
return "Service temporarily unavailable.";
}
}
}✅ DO: Implement application-level rate limiting
RateLimiter rateLimiter = RateLimiter.create(10.0); // 10 requests/second
public ChatResponse chat(ChatRequest request) {
if (!rateLimiter.tryAcquire()) {
throw new RateLimitException("Rate limit exceeded");
}
return model.chat(request);
}// ❌ BAD - creates model every iteration
for (String message : messages) {
GitHubModelsChatModel model = GitHubModelsChatModel.builder()
.gitHubToken(token)
.modelName("gpt-4o")
.build();
model.chat(request);
}
// ✅ GOOD - reuse model
GitHubModelsChatModel model = GitHubModelsChatModel.builder()
.gitHubToken(token)
.modelName("gpt-4o")
.build();
for (String message : messages) {
model.chat(request);
}// ❌ BAD - ignores truncation
String response = model.chat(request).aiMessage().text();
// ✅ GOOD - handles truncation
ChatResponse response = model.chat(request);
if (response.metadata().finishReason() == FinishReason.LENGTH) {
logger.warn("Response was truncated");
}// ❌ BAD
.temperature(0.7523)
.maxTokens(1247)
// ✅ GOOD
.temperature(0.7) // Balanced creativity
.maxTokens(1000) // Approximately 750 words// ❌ BAD
try {
model.chat(request);
} catch (Exception e) {
// Silently ignored!
}
// ✅ GOOD
try {
model.chat(request);
} catch (Exception e) {
logger.error("Chat failed", e);
throw new ServiceException("Chat service unavailable", e);
}✅ DO:
// Configuration
public class ModelConfig {
public static GitHubModelsChatModel createChatModel() {
return GitHubModelsChatModel.builder()
.gitHubToken(System.getenv("GITHUB_TOKEN"))
.modelName("gpt-4o")
.temperature(0.7)
.maxTokens(1000)
.build();
}
}
// Service logic
public class ChatService {
private final GitHubModelsChatModel model;
public ChatService() {
this.model = ModelConfig.createChatModel();
}
public String chat(String message) {
return model.chat(createRequest(message))
.aiMessage().text();
}
}✅ DO: Inject models as dependencies
@Service
public class ChatService {
private final GitHubModelsChatModel model;
@Autowired
public ChatService(GitHubModelsChatModel model) {
this.model = model;
}
}✅ DO: Create reusable error handlers
public class ModelErrorHandler {
public static String handleError(Exception e) {
if (e instanceof HttpResponseException) {
HttpResponseException httpError = (HttpResponseException) e;
// Handle based on status code
}
return "Service temporarily unavailable.";
}
}Security:
Configuration:
Error Handling:
Performance:
Production:
Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-github-modelsdocs