This package provides a deprecated integration module that enables Java applications to interact with GitHub Models through the LangChain4j framework. It offers chat models (both synchronous and streaming), embedding models, and support for AI services with tool integration, JSON schema responses, and responsible AI features. The module wraps Azure AI Inference SDK to provide a unified API for accessing various language models hosted on GitHub Models, including chat completion capabilities, embeddings generation, and content filtering management. As of version 1.10.0, this module has been marked for deprecation and future removal, with users recommended to migrate to the langchain4j-openai-official module for enhanced functionality and better integration. The library is designed for reusability as a foundational component in LLM-powered Java applications that need to leverage GitHub-hosted AI models, offering builder patterns for configuration, support for proxy options, custom timeouts, and comprehensive model service versioning capabilities.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Practical guide for using GitHubModelsEmbeddingModel for text embeddings and semantic search.
GitHubModelsEmbeddingModel model = GitHubModelsEmbeddingModel.builder()
.gitHubToken(System.getenv("GITHUB_TOKEN"))
.modelName(GitHubModelsEmbeddingModelName.TEXT_EMBEDDING_3_SMALL)
.build();
List<TextSegment> segments = Arrays.asList(
TextSegment.from("Hello world")
);
Response<List<Embedding>> response = model.embedAll(segments);
Embedding embedding = response.content().get(0);
System.out.println("Dimension: " + embedding.dimension());
System.out.println("Vector: " + Arrays.toString(embedding.vector()));List<TextSegment> segments = Arrays.asList(
TextSegment.from("Machine learning is a subset of AI"),
TextSegment.from("Deep learning uses neural networks"),
TextSegment.from("Natural language processing handles text")
);
Response<List<Embedding>> response = model.embedAll(segments);
List<Embedding> embeddings = response.content();
for (int i = 0; i < embeddings.size(); i++) {
System.out.println("Segment " + i + " dimension: " +
embeddings.get(i).dimension());
}
// Check token usage
TokenUsage usage = response.tokenUsage();
System.out.println("Tokens used: " + usage.inputTokenCount());GitHubModelsEmbeddingModel model = GitHubModelsEmbeddingModel.builder()
.gitHubToken(token)
.modelName(GitHubModelsEmbeddingModelName.TEXT_EMBEDDING_3_SMALL)
.build();
// Embed documents
List<TextSegment> documents = Arrays.asList(
TextSegment.from("Paris is the capital of France"),
TextSegment.from("Berlin is the capital of Germany"),
TextSegment.from("Tokyo is the capital of Japan")
);
Response<List<Embedding>> docResponse = model.embedAll(documents);
List<Embedding> docEmbeddings = docResponse.content();
// Embed query
List<TextSegment> query = Arrays.asList(
TextSegment.from("What is the capital of France?")
);
Response<List<Embedding>> queryResponse = model.embedAll(query);
Embedding queryEmbedding = queryResponse.content().get(0);
// Find most similar
int bestMatch = -1;
double bestSimilarity = -1.0;
for (int i = 0; i < docEmbeddings.size(); i++) {
double similarity = cosineSimilarity(queryEmbedding, docEmbeddings.get(i));
System.out.println("Document " + i + " similarity: " + similarity);
if (similarity > bestSimilarity) {
bestSimilarity = similarity;
bestMatch = i;
}
}
System.out.println("\nBest match: Document " + bestMatch);
System.out.println("Text: " + documents.get(bestMatch).text());public static double cosineSimilarity(Embedding a, Embedding b) {
float[] vectorA = a.vector();
float[] vectorB = b.vector();
if (vectorA.length != vectorB.length) {
throw new IllegalArgumentException("Vectors must have same dimension");
}
double dotProduct = 0.0;
double normA = 0.0;
double normB = 0.0;
for (int i = 0; i < vectorA.length; i++) {
dotProduct += vectorA[i] * vectorB[i];
normA += vectorA[i] * vectorA[i];
normB += vectorB[i] * vectorB[i];
}
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}GitHubModelsEmbeddingModel model = GitHubModelsEmbeddingModel.builder()
.gitHubToken(token)
.modelName(GitHubModelsEmbeddingModelName.TEXT_EMBEDDING_3_LARGE)
.build();
List<Document> documents = loadDocuments();
List<TextSegment> segments = documents.stream()
.map(doc -> TextSegment.from(doc.getText()))
.collect(Collectors.toList());
Response<List<Embedding>> response = model.embedAll(segments);
List<Embedding> embeddings = response.content();
// Store in vector database
for (int i = 0; i < documents.size(); i++) {
String id = documents.get(i).getId();
String text = documents.get(i).getText();
float[] vector = embeddings.get(i).vector();
vectorDB.insert(id, text, vector);
}// Embed query
String userQuery = "How does machine learning work?";
Embedding queryEmbedding = model.embedAll(Arrays.asList(
TextSegment.from(userQuery)
)).content().get(0);
// Search vector DB
List<SearchResult> results = vectorDB.search(
queryEmbedding.vector(),
topK = 5
);
for (SearchResult result : results) {
System.out.println("Score: " + result.getScore());
System.out.println("Text: " + result.getText());
}List<TextSegment> allDocuments = loadManyDocuments(); // e.g., 1000 docs
List<Embedding> allEmbeddings = new ArrayList<>();
// Model automatically batches into groups of 16
Response<List<Embedding>> response = model.embedAll(allDocuments);
allEmbeddings.addAll(response.content());
System.out.println("Generated " + allEmbeddings.size() + " embeddings");
System.out.println("Total tokens: " + response.tokenUsage().inputTokenCount());List<TextSegment> allDocuments = loadManyDocuments();
List<Embedding> allEmbeddings = new ArrayList<>();
int chunkSize = 100;
int processed = 0;
for (int i = 0; i < allDocuments.size(); i += chunkSize) {
int end = Math.min(i + chunkSize, allDocuments.size());
List<TextSegment> chunk = allDocuments.subList(i, end);
System.out.println("Processing documents " + i + " to " + end);
Response<List<Embedding>> response = model.embedAll(chunk);
allEmbeddings.addAll(response.content());
processed += chunk.size();
double progress = (processed * 100.0) / allDocuments.size();
System.out.println("Progress: " + String.format("%.1f", progress) + "%");
}The model automatically splits requests larger than 16 segments:
// These 50 segments will be processed in 4 API calls:
// Batch 1: 16 segments
// Batch 2: 16 segments
// Batch 3: 16 segments
// Batch 4: 2 segments
List<TextSegment> segments = new ArrayList<>();
for (int i = 0; i < 50; i++) {
segments.add(TextSegment.from("Document " + i));
}
// Automatically batched
Response<List<Embedding>> response = model.embedAll(segments);
List<Embedding> embeddings = response.content(); // All 50 embeddings
System.out.println("Embeddings: " + embeddings.size()); // 50// Default: 3072 dimensions
GitHubModelsEmbeddingModel largeModel = GitHubModelsEmbeddingModel.builder()
.gitHubToken(token)
.modelName("text-embedding-3-large")
.build();
// Custom: 512 dimensions (saves storage, slight quality reduction)
GitHubModelsEmbeddingModel reducedModel = GitHubModelsEmbeddingModel.builder()
.gitHubToken(token)
.modelName("text-embedding-3-large")
.dimensions(512)
.build();
Response<List<Embedding>> response = reducedModel.embedAll(segments);
Embedding emb = response.content().get(0);
System.out.println("Dimension: " + emb.dimension()); // 512// Vector DB requires 768-dimensional vectors
GitHubModelsEmbeddingModel model = GitHubModelsEmbeddingModel.builder()
.gitHubToken(token)
.modelName("text-embedding-3-large")
.dimensions(768)
.build();Note: Only OpenAI text-embedding-3 models support custom dimensions. Cohere models have fixed dimensions (1024).
// General purpose, balanced
.modelName(GitHubModelsEmbeddingModelName.TEXT_EMBEDDING_3_SMALL) // 1536 dim
// Highest quality
.modelName(GitHubModelsEmbeddingModelName.TEXT_EMBEDDING_3_LARGE) // 3072 dim
// Multilingual (100+ languages)
.modelName(GitHubModelsEmbeddingModelName.COHERE_EMBED_V3_MULTILINGUAL) // 1024 dim
// English only (optimized)
.modelName(GitHubModelsEmbeddingModelName.COHERE_EMBED_V3_ENGLISH) // 1024 dim| Model | Dimensions | Custom Dims | Best For |
|---|---|---|---|
| TEXT_EMBEDDING_3_SMALL | 1536 | Yes | General use, balanced |
| TEXT_EMBEDDING_3_LARGE | 3072 | Yes | Maximum quality |
| COHERE_EMBED_V3_ENGLISH | 1024 | No | English-only apps |
| COHERE_EMBED_V3_MULTILINGUAL | 1024 | No | Multilingual apps |
List<TextSegment> documents = loadDocuments();
Response<List<Embedding>> response = model.embedAll(documents);
List<Embedding> embeddings = response.content();
// Use k-means or hierarchical clustering on embeddings
// Example with simple distance-based grouping:
Map<Integer, List<Integer>> clusters = new HashMap<>();
// Implement clustering algorithm using cosine similarity
for (int i = 0; i < embeddings.size(); i++) {
for (int j = i + 1; j < embeddings.size(); j++) {
double similarity = cosineSimilarity(embeddings.get(i), embeddings.get(j));
if (similarity > 0.8) {
// Group similar documents
addToCluster(clusters, i, j);
}
}
}double DUPLICATE_THRESHOLD = 0.95;
List<TextSegment> documents = loadDocuments();
Response<List<Embedding>> response = model.embedAll(documents);
List<Embedding> embeddings = response.content();
Set<Integer> duplicates = new HashSet<>();
for (int i = 0; i < embeddings.size(); i++) {
if (duplicates.contains(i)) continue;
for (int j = i + 1; j < embeddings.size(); j++) {
if (duplicates.contains(j)) continue;
double similarity = cosineSimilarity(embeddings.get(i), embeddings.get(j));
if (similarity > DUPLICATE_THRESHOLD) {
System.out.println("Potential duplicate found:");
System.out.println(" Doc " + i + ": " + documents.get(i).text());
System.out.println(" Doc " + j + ": " + documents.get(j).text());
System.out.println(" Similarity: " + similarity);
duplicates.add(j);
}
}
}// User has viewed/liked these documents
List<TextSegment> likedDocs = Arrays.asList(
TextSegment.from("Introduction to machine learning"),
TextSegment.from("Neural network architectures")
);
// Get average embedding of liked content
List<Embedding> likedEmbeddings = model.embedAll(likedDocs).content();
float[] avgVector = averageEmbeddings(likedEmbeddings);
// Find similar content from catalog
List<TextSegment> catalog = loadCatalog();
List<Embedding> catalogEmbeddings = model.embedAll(catalog).content();
List<RecommendationScore> recommendations = new ArrayList<>();
for (int i = 0; i < catalogEmbeddings.size(); i++) {
double similarity = cosineSimilarity(
new Embedding(avgVector),
catalogEmbeddings.get(i)
);
recommendations.add(new RecommendationScore(i, similarity));
}
// Sort and return top recommendations
recommendations.sort((a, b) -> Double.compare(b.score, a.score));
List<RecommendationScore> topRecommendations = recommendations.subList(0, 10);GitHubModelsEmbeddingModel model = GitHubModelsEmbeddingModel.builder()
.gitHubToken(token)
.modelName(GitHubModelsEmbeddingModelName.COHERE_EMBED_V3_MULTILINGUAL)
.build();
// Documents in different languages
List<TextSegment> docs = Arrays.asList(
TextSegment.from("Hello world"), // English
TextSegment.from("Bonjour le monde"), // French
TextSegment.from("Hola mundo"), // Spanish
TextSegment.from("Hallo Welt") // German
);
List<Embedding> docEmbeddings = model.embedAll(docs).content();
// Query in any language
String query = "salutations"; // French
Embedding queryEmb = model.embedAll(Arrays.asList(
TextSegment.from(query)
)).content().get(0);
// Find matches across languages
for (int i = 0; i < docEmbeddings.size(); i++) {
double similarity = cosineSimilarity(queryEmb, docEmbeddings.get(i));
System.out.println(docs.get(i).text() + ": " + similarity);
}try {
Response<List<Embedding>> response = model.embedAll(segments);
List<Embedding> embeddings = response.content();
} catch (HttpResponseException e) {
System.err.println("HTTP error: " + e.getResponse().getStatusCode());
System.err.println("Message: " + e.getMessage());
if (e.getResponse().getStatusCode() == 429) {
// Rate limit - retry with backoff
Thread.sleep(1000);
// Retry...
}
}List<TextSegment> segments = new ArrayList<>(); // Empty
Response<List<Embedding>> response = model.embedAll(segments);
List<Embedding> embeddings = response.content();
System.out.println("Count: " + embeddings.size()); // 0// Process in optimal chunks for your use case
int optimalChunkSize = 100; // Balance between API calls and memory
List<TextSegment> allDocs = loadDocuments();
for (int i = 0; i < allDocs.size(); i += optimalChunkSize) {
List<TextSegment> chunk = allDocs.subList(i,
Math.min(i + optimalChunkSize, allDocs.size()));
Response<List<Embedding>> response = model.embedAll(chunk);
processEmbeddings(response.content());
}// Create once, reuse many times
GitHubModelsEmbeddingModel model = GitHubModelsEmbeddingModel.builder()
.gitHubToken(token)
.modelName(GitHubModelsEmbeddingModelName.TEXT_EMBEDDING_3_SMALL)
.build();
// Use for multiple embedding operations
Response<List<Embedding>> response1 = model.embedAll(documents1);
Response<List<Embedding>> response2 = model.embedAll(documents2);
Response<List<Embedding>> response3 = model.embedAll(query);Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-github-models@1.11.0docs