LangChain4j integration library for Hugging Face inference capabilities including chat, language, and embedding models
Fastest paths to working code with LangChain4j Hugging Face integration.
Maven:
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-hugging-face</artifactId>
<version>1.11.0</version>
</dependency>Gradle:
implementation 'dev.langchain4j:langchain4j-hugging-face:1.11.0'Get API Token: https://huggingface.co/settings/tokens
Set Environment Variable:
export HF_API_KEY="your_token_here"import dev.langchain4j.model.huggingface.HuggingFaceEmbeddingModel;
// One-liner with defaults
HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel
.withAccessToken(System.getenv("HF_API_KEY"));
// Generate embedding
float[] vector = model.embed("Hello world").content().vector();
System.out.println("Embedding dimension: " + vector.length);import dev.langchain4j.model.huggingface.HuggingFaceEmbeddingModel;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.model.output.Response;
import java.util.List;
HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/all-MiniLM-L6-v2")
.build();
// Single text
Response<Embedding> single = model.embed("Hello world");
float[] vector = single.content().vector();
// Multiple texts (more efficient)
List<TextSegment> segments = List.of(
TextSegment.from("First text"),
TextSegment.from("Second text")
);
Response<List<Embedding>> batch = model.embedAll(segments);
List<Embedding> embeddings = batch.content();// Fast and efficient (384 dimensions)
HuggingFaceEmbeddingModel fast = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/all-MiniLM-L6-v2")
.build();
// High quality (768 dimensions)
HuggingFaceEmbeddingModel quality = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/all-mpnet-base-v2")
.build();
// Multilingual
HuggingFaceEmbeddingModel multilingual = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
.build();Note: HuggingFaceChatModel is deprecated. Use OpenAiChatModel from langchain4j-open-ai module instead.
import dev.langchain4j.model.huggingface.HuggingFaceChatModel;
HuggingFaceChatModel model = HuggingFaceChatModel
.withAccessToken(System.getenv("HF_API_KEY"));
String response = model.chat("What is Java?");import dev.langchain4j.model.huggingface.HuggingFaceChatModel;
import dev.langchain4j.model.chat.response.ChatResponse;
import dev.langchain4j.data.message.UserMessage;
import java.util.List;
HuggingFaceChatModel model = HuggingFaceChatModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("tiiuae/falcon-7b-instruct")
.temperature(0.7)
.maxNewTokens(200)
.build();
// Simple string chat
String simpleResponse = model.chat("Explain machine learning");
// With message objects
ChatResponse response = model.chat(UserMessage.from("What is Java?"));
String aiMessage = response.aiMessage().text();// Add dependency: langchain4j-open-ai
import dev.langchain4j.model.openai.OpenAiChatModel;
OpenAiChatModel model = OpenAiChatModel.builder()
.apiKey(System.getenv("HF_API_KEY"))
.baseUrl("https://router.huggingface.co/v1")
.modelName("tiiuae/falcon-7b-instruct:hf-inference")
.temperature(0.7)
.build();
String response = model.generate("What is Java?");Note: HuggingFaceLanguageModel is deprecated. Use OpenAiChatModel from langchain4j-open-ai module instead.
import dev.langchain4j.model.huggingface.HuggingFaceLanguageModel;
HuggingFaceLanguageModel model = HuggingFaceLanguageModel
.withAccessToken(System.getenv("HF_API_KEY"));
String text = model.generate("Write a haiku:").content();import dev.langchain4j.model.huggingface.HuggingFaceLanguageModel;
import dev.langchain4j.model.output.Response;
HuggingFaceLanguageModel model = HuggingFaceLanguageModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("microsoft/Phi-3-mini-4k-instruct")
.temperature(0.8)
.maxNewTokens(150)
.build();
Response<String> response = model.generate("Explain quantum computing:");
String generated = response.content();import dev.langchain4j.model.huggingface.HuggingFaceEmbeddingModel;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.data.embedding.Embedding;
import java.util.List;
HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/all-MiniLM-L6-v2")
.build();
// Embed query
Embedding query = model.embed("machine learning tutorial")
.content();
// Embed documents
List<TextSegment> docs = List.of(
TextSegment.from("Machine learning is a subset of AI"),
TextSegment.from("Deep learning uses neural networks"),
TextSegment.from("Python is a programming language")
);
List<Embedding> docEmbeddings = model.embedAll(docs).content();
// Find most similar (cosine similarity)
double maxSimilarity = -1;
int bestMatch = -1;
for (int i = 0; i < docEmbeddings.size(); i++) {
double sim = cosineSimilarity(query.vector(),
docEmbeddings.get(i).vector());
if (sim > maxSimilarity) {
maxSimilarity = sim;
bestMatch = i;
}
}
System.out.println("Best match: " + docs.get(bestMatch).text());
// Helper method
static double cosineSimilarity(float[] a, float[] b) {
double dot = 0.0, normA = 0.0, normB = 0.0;
for (int i = 0; i < a.length; i++) {
dot += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
}import dev.langchain4j.data.segment.TextSegment;
import java.util.List;
import java.util.stream.Collectors;
HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/all-MiniLM-L6-v2")
.build();
List<String> texts = List.of(
"Document 1 content",
"Document 2 content",
"Document 3 content"
);
// Convert to TextSegments
List<TextSegment> segments = texts.stream()
.map(TextSegment::from)
.collect(Collectors.toList());
// Batch embed (single API call)
List<float[]> vectors = model.embedAll(segments)
.content()
.stream()
.map(emb -> emb.vector())
.collect(Collectors.toList());import java.time.Duration;
HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/all-MiniLM-L6-v2")
.baseUrl("https://custom-endpoint.example.com/") // Custom endpoint
.timeout(Duration.ofSeconds(30)) // Longer timeout
.waitForModel(true) // Wait if loading
.build();import dev.langchain4j.model.output.Response;
import dev.langchain4j.data.embedding.Embedding;
HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
.accessToken(System.getenv("HF_API_KEY"))
.modelId("sentence-transformers/all-MiniLM-L6-v2")
.build();
try {
Response<Embedding> response = model.embed("text");
float[] vector = response.content().vector();
System.out.println("Success: " + vector.length + " dimensions");
} catch (RuntimeException e) {
// Error format: "status code: <code>; body: <body>"
if (e.getMessage().contains("401")) {
System.err.println("Invalid API token");
} else if (e.getMessage().contains("404")) {
System.err.println("Model not found");
} else if (e.getMessage().contains("429")) {
System.err.println("Rate limited");
} else {
System.err.println("Error: " + e.getMessage());
}
}// For testing without API calls, use SPI to provide mock client
// See SPI Extensions guide for details
import dev.langchain4j.model.huggingface.HuggingFaceEmbeddingModel;
import dev.langchain4j.data.embedding.Embedding;
// In production code
HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
.accessToken("test-key") // Will use mock in tests
.build();
// Mock returns predefined embeddings
Embedding result = model.embed("test").content();public class HuggingFaceConfig {
private static final String API_KEY = System.getenv("HF_API_KEY");
private static final String BASE_URL = System.getenv()
.getOrDefault("HF_BASE_URL",
"https://router.huggingface.co/hf-inference/");
public static HuggingFaceEmbeddingModel createEmbeddingModel() {
return HuggingFaceEmbeddingModel.builder()
.accessToken(API_KEY)
.modelId("sentence-transformers/all-MiniLM-L6-v2")
.baseUrl(BASE_URL)
.timeout(Duration.ofSeconds(
Integer.parseInt(
System.getenv().getOrDefault("HF_TIMEOUT", "15")
)
))
.build();
}
}Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-hugging-face