LangChain4j integration library for Hugging Face inference capabilities including chat, language, and embedding models
Task-oriented cookbook for common use cases with LangChain4j Hugging Face integration.
import dev.langchain4j.model.huggingface.HuggingFaceEmbeddingModel;
import dev.langchain4j.data.embedding.Embedding;
HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/all-MiniLM-L6-v2")
.build();
Embedding embedding = model.embed("Hello world").content();
float[] vector = embedding.vector();
int dimension = embedding.dimension();import dev.langchain4j.data.segment.TextSegment;
import java.util.List;
List<TextSegment> texts = List.of(
TextSegment.from("First document"),
TextSegment.from("Second document"),
TextSegment.from("Third document")
);
List<Embedding> embeddings = model.embedAll(texts).content();
for (int i = 0; i < embeddings.size(); i++) {
System.out.println("Doc " + i + ": " +
embeddings.get(i).dimension() + " dimensions");
}import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.data.embedding.Embedding;
import java.util.List;
// Setup
HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/all-MiniLM-L6-v2")
.build();
// Embed query
String query = "machine learning tutorial";
Embedding queryEmb = model.embed(query).content();
// Embed documents
List<String> documents = List.of(
"Machine learning is a subset of AI",
"Deep learning uses neural networks",
"Python is a programming language",
"AI models learn from data"
);
List<TextSegment> segments = documents.stream()
.map(TextSegment::from)
.toList();
List<Embedding> docEmbs = model.embedAll(segments).content();
// Calculate similarities
record ScoredDoc(int index, String text, double score) {}
List<ScoredDoc> scored = new ArrayList<>();
for (int i = 0; i < docEmbs.size(); i++) {
double similarity = cosineSimilarity(
queryEmb.vector(),
docEmbs.get(i).vector()
);
scored.add(new ScoredDoc(i, documents.get(i), similarity));
}
// Sort by similarity (descending)
scored.sort((a, b) -> Double.compare(b.score(), a.score()));
// Print results
System.out.println("Query: " + query);
for (ScoredDoc doc : scored) {
System.out.printf("%.3f: %s%n", doc.score(), doc.text());
}
// Helper: Cosine similarity
static double cosineSimilarity(float[] a, float[] b) {
double dot = 0.0, normA = 0.0, normB = 0.0;
for (int i = 0; i < a.length; i++) {
dot += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
}import dev.langchain4j.data.embedding.Embedding;
import java.util.*;
// Generate embeddings
List<String> documents = List.of(
"Machine learning trains models",
"AI learns from data",
"Java is object-oriented",
"Python is a language",
"Neural networks are powerful",
"Code is written by developers"
);
List<TextSegment> segments = documents.stream()
.map(TextSegment::from)
.toList();
List<Embedding> embeddings = model.embedAll(segments).content();
// Simple k-means clustering (k=2)
int k = 2;
Random rand = new Random(42);
// Initialize centroids randomly
List<float[]> centroids = new ArrayList<>();
for (int i = 0; i < k; i++) {
centroids.add(embeddings.get(rand.nextInt(embeddings.size())).vector());
}
// Iterate
int maxIterations = 10;
for (int iter = 0; iter < maxIterations; iter++) {
// Assign to clusters
Map<Integer, List<Integer>> clusters = new HashMap<>();
for (int i = 0; i < embeddings.size(); i++) {
int closest = 0;
double minDist = Double.MAX_VALUE;
for (int c = 0; c < k; c++) {
double dist = euclideanDistance(
embeddings.get(i).vector(),
centroids.get(c)
);
if (dist < minDist) {
minDist = dist;
closest = c;
}
}
clusters.computeIfAbsent(closest, _ -> new ArrayList<>()).add(i);
}
// Update centroids
for (int c = 0; c < k; c++) {
List<Integer> members = clusters.get(c);
if (members != null && !members.isEmpty()) {
float[] newCentroid = new float[embeddings.get(0).dimension()];
for (int idx : members) {
float[] vec = embeddings.get(idx).vector();
for (int d = 0; d < vec.length; d++) {
newCentroid[d] += vec[d];
}
}
for (int d = 0; d < newCentroid.length; d++) {
newCentroid[d] /= members.size();
}
centroids.set(c, newCentroid);
}
}
}
// Print clusters
for (int c = 0; c < k; c++) {
System.out.println("Cluster " + c + ":");
for (int idx : clusters.get(c)) {
System.out.println(" - " + documents.get(idx));
}
}
static double euclideanDistance(float[] a, float[] b) {
double sum = 0;
for (int i = 0; i < a.length; i++) {
double diff = a[i] - b[i];
sum += diff * diff;
}
return Math.sqrt(sum);
}import dev.langchain4j.data.embedding.Embedding;
import java.util.*;
// Document storage with embeddings
class DocumentStore {
record Doc(String id, String text, float[] embedding) {}
private final List<Doc> docs = new ArrayList<>();
private final HuggingFaceEmbeddingModel model;
DocumentStore(HuggingFaceEmbeddingModel model) {
this.model = model;
}
void add(String id, String text) {
float[] embedding = model.embed(text).content().vector();
docs.add(new Doc(id, text, embedding));
}
void addBatch(Map<String, String> documents) {
List<String> texts = new ArrayList<>(documents.values());
List<TextSegment> segments = texts.stream()
.map(TextSegment::from)
.toList();
List<Embedding> embeddings = model.embedAll(segments).content();
int i = 0;
for (Map.Entry<String, String> entry : documents.entrySet()) {
docs.add(new Doc(
entry.getKey(),
entry.getValue(),
embeddings.get(i++).vector()
));
}
}
List<Doc> search(String query, int topK) {
float[] queryEmb = model.embed(query).content().vector();
return docs.stream()
.map(doc -> new ScoredDoc(
doc,
cosineSimilarity(queryEmb, doc.embedding())
))
.sorted((a, b) -> Double.compare(b.score, a.score))
.limit(topK)
.map(sd -> sd.doc)
.toList();
}
record ScoredDoc(Doc doc, double score) {}
private static double cosineSimilarity(float[] a, float[] b) {
double dot = 0.0, normA = 0.0, normB = 0.0;
for (int i = 0; i < a.length; i++) {
dot += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
}
}
// Usage
HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/all-MiniLM-L6-v2")
.build();
DocumentStore store = new DocumentStore(model);
// Add documents
Map<String, String> docs = Map.of(
"doc1", "Machine learning tutorial",
"doc2", "Java programming guide",
"doc3", "Deep learning with Python"
);
store.addBatch(docs);
// Search
List<DocumentStore.Doc> results = store.search("AI tutorial", 2);
for (DocumentStore.Doc doc : results) {
System.out.println(doc.id() + ": " + doc.text());
}// Use multilingual model
HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
.build();
// Embed texts in different languages
List<String> texts = List.of(
"Hello world", // English
"Bonjour le monde", // French
"Hola mundo", // Spanish
"你好世界" // Chinese
);
List<TextSegment> segments = texts.stream()
.map(TextSegment::from)
.toList();
List<Embedding> embeddings = model.embedAll(segments).content();
// Calculate cross-lingual similarity
float[] english = embeddings.get(0).vector();
float[] french = embeddings.get(1).vector();
double similarity = cosineSimilarity(english, french);
System.out.printf("English-French similarity: %.3f%n", similarity);Note: These tasks use deprecated classes. Migrate to OpenAiChatModel from langchain4j-open-ai module.
import dev.langchain4j.model.huggingface.HuggingFaceChatModel;
HuggingFaceChatModel model = HuggingFaceChatModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("tiiuae/falcon-7b-instruct")
.temperature(0.3) // Lower for factual responses
.build();
String answer = model.chat("What is the capital of France?");
System.out.println(answer);import dev.langchain4j.data.message.*;
import dev.langchain4j.model.chat.request.ChatRequest;
import dev.langchain4j.model.chat.response.ChatResponse;
import java.util.ArrayList;
import java.util.List;
List<ChatMessage> history = new ArrayList<>();
history.add(SystemMessage.from("You are a helpful assistant"));
// Turn 1
history.add(UserMessage.from("What is machine learning?"));
ChatRequest request1 = ChatRequest.builder()
.messages(history)
.build();
ChatResponse response1 = model.chat(request1);
history.add(AiMessage.from(response1.aiMessage().text()));
System.out.println("AI: " + response1.aiMessage().text());
// Turn 2
history.add(UserMessage.from("Can you give an example?"));
ChatRequest request2 = ChatRequest.builder()
.messages(history)
.build();
ChatResponse response2 = model.chat(request2);
history.add(AiMessage.from(response2.aiMessage().text()));
System.out.println("AI: " + response2.aiMessage().text());Note: These tasks use deprecated HuggingFaceLanguageModel. Migrate to OpenAiChatModel.
import dev.langchain4j.model.huggingface.HuggingFaceLanguageModel;
HuggingFaceLanguageModel model = HuggingFaceLanguageModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("microsoft/Phi-3-mini-4k-instruct")
.temperature(0.2) // Lower for deterministic code
.maxNewTokens(200)
.build();
String prompt = "Write a Java function to check if a number is prime:\n";
String code = model.generate(prompt).content();
System.out.println(code);HuggingFaceLanguageModel model = HuggingFaceLanguageModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("tiiuae/falcon-7b-instruct")
.temperature(0.7)
.maxNewTokens(100)
.build();
String prompt = "The three main benefits of functional programming are";
String completion = model.generate(prompt).content();
System.out.println(prompt + " " + completion);HuggingFaceLanguageModel model = HuggingFaceLanguageModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("tiiuae/falcon-7b-instruct")
.temperature(1.0) // Higher for creativity
.maxNewTokens(300)
.build();
String prompt = "Write a short story about a robot learning to paint:";
String story = model.generate(prompt).content();
System.out.println(story);Combine embeddings with chat model for RAG:
// Setup embedding model
HuggingFaceEmbeddingModel embedModel = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/all-MiniLM-L6-v2")
.build();
// Setup chat model (use OpenAI module in production)
// For this example, showing deprecated approach
HuggingFaceChatModel chatModel = HuggingFaceChatModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("tiiuae/falcon-7b-instruct")
.build();
// Knowledge base
List<String> documents = List.of(
"LangChain4j is a Java framework for LLM applications",
"Hugging Face provides thousands of pre-trained models",
"Vector embeddings represent text as numerical vectors"
);
// Embed documents
List<TextSegment> segments = documents.stream()
.map(TextSegment::from)
.toList();
List<Embedding> docEmbeddings = embedModel.embedAll(segments).content();
// User query
String query = "What is LangChain4j?";
Embedding queryEmb = embedModel.embed(query).content();
// Find relevant documents
int topK = 2;
List<String> relevant = new ArrayList<>();
for (int i = 0; i < docEmbeddings.size(); i++) {
double sim = cosineSimilarity(
queryEmb.vector(),
docEmbeddings.get(i).vector()
);
if (relevant.size() < topK) {
relevant.add(documents.get(i));
}
}
// Build prompt with context
String context = String.join("\n", relevant);
String prompt = String.format(
"Context:\n%s\n\nQuestion: %s\n\nAnswer:",
context,
query
);
// Generate answer
String answer = chatModel.chat(prompt);
System.out.println(answer);import java.util.concurrent.ConcurrentHashMap;
import java.util.Map;
class EmbeddingCache {
private final HuggingFaceEmbeddingModel model;
private final Map<String, float[]> cache = new ConcurrentHashMap<>();
EmbeddingCache(HuggingFaceEmbeddingModel model) {
this.model = model;
}
float[] embed(String text) {
return cache.computeIfAbsent(text, t ->
model.embed(t).content().vector()
);
}
List<float[]> embedBatch(List<String> texts) {
List<String> uncached = texts.stream()
.filter(t -> !cache.containsKey(t))
.toList();
if (!uncached.isEmpty()) {
List<TextSegment> segments = uncached.stream()
.map(TextSegment::from)
.toList();
List<Embedding> embeddings = model.embedAll(segments).content();
for (int i = 0; i < uncached.size(); i++) {
cache.put(uncached.get(i), embeddings.get(i).vector());
}
}
return texts.stream()
.map(cache::get)
.toList();
}
void clear() {
cache.clear();
}
int size() {
return cache.size();
}
}
// Usage
EmbeddingCache cache = new EmbeddingCache(embedModel);
float[] vec1 = cache.embed("text"); // API call
float[] vec2 = cache.embed("text"); // From cacheimport java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.List;
class AsyncEmbedder {
private final HuggingFaceEmbeddingModel model;
private final ExecutorService executor;
AsyncEmbedder(HuggingFaceEmbeddingModel model, int threads) {
this.model = model;
this.executor = Executors.newFixedThreadPool(threads);
}
CompletableFuture<List<Embedding>> embedAllAsync(List<String> texts) {
return CompletableFuture.supplyAsync(() -> {
List<TextSegment> segments = texts.stream()
.map(TextSegment::from)
.toList();
return model.embedAll(segments).content();
}, executor);
}
void shutdown() {
executor.shutdown();
}
}
// Usage
AsyncEmbedder embedder = new AsyncEmbedder(embedModel, 4);
CompletableFuture<List<Embedding>> future =
embedder.embedAllAsync(List.of("text1", "text2"));
future.thenAccept(embeddings -> {
System.out.println("Got " + embeddings.size() + " embeddings");
}).exceptionally(ex -> {
System.err.println("Error: " + ex.getMessage());
return null;
});
embedder.shutdown();import java.time.Duration;
class RetryableEmbedder {
private final HuggingFaceEmbeddingModel model;
private final int maxRetries;
private final Duration retryDelay;
RetryableEmbedder(HuggingFaceEmbeddingModel model,
int maxRetries,
Duration retryDelay) {
this.model = model;
this.maxRetries = maxRetries;
this.retryDelay = retryDelay;
}
Embedding embedWithRetry(String text) throws InterruptedException {
int attempts = 0;
while (attempts < maxRetries) {
try {
return model.embed(text).content();
} catch (RuntimeException e) {
attempts++;
if (attempts >= maxRetries) {
throw e;
}
System.err.println("Attempt " + attempts + " failed, retrying...");
Thread.sleep(retryDelay.toMillis());
}
}
throw new RuntimeException("Max retries exceeded");
}
}
// Usage
RetryableEmbedder embedder = new RetryableEmbedder(
embedModel,
3,
Duration.ofSeconds(2)
);
try {
Embedding emb = embedder.embedWithRetry("text");
} catch (RuntimeException | InterruptedException e) {
System.err.println("Failed after retries: " + e.getMessage());
}class FallbackEmbedder {
private final HuggingFaceEmbeddingModel primary;
private final HuggingFaceEmbeddingModel fallback;
FallbackEmbedder(HuggingFaceEmbeddingModel primary,
HuggingFaceEmbeddingModel fallback) {
this.primary = primary;
this.fallback = fallback;
}
Embedding embed(String text) {
try {
return primary.embed(text).content();
} catch (RuntimeException e) {
System.err.println("Primary failed, using fallback");
return fallback.embed(text).content();
}
}
}
// Usage
HuggingFaceEmbeddingModel primary = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/all-mpnet-base-v2")
.build();
HuggingFaceEmbeddingModel fallback = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/all-MiniLM-L6-v2")
.build();
FallbackEmbedder embedder = new FallbackEmbedder(primary, fallback);Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-hugging-face