CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-dev-langchain4j--langchain4j-hugging-face

LangChain4j integration library for Hugging Face inference capabilities including chat, language, and embedding models

Overview
Eval results
Files

quick-start.mddocs/

Quick Start Guide

Fastest paths to working code with LangChain4j Hugging Face integration.

Prerequisites

  1. Add Dependency:

Maven:

<dependency>
    <groupId>dev.langchain4j</groupId>
    <artifactId>langchain4j-hugging-face</artifactId>
    <version>1.11.0</version>
</dependency>

Gradle:

implementation 'dev.langchain4j:langchain4j-hugging-face:1.11.0'
  1. Get API Token: https://huggingface.co/settings/tokens

  2. Set Environment Variable:

export HF_API_KEY="your_token_here"

Embedding Models (Recommended)

Minimal Embedding Example

import dev.langchain4j.model.huggingface.HuggingFaceEmbeddingModel;

// One-liner with defaults
HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel
    .withAccessToken(System.getenv("HF_API_KEY"));

// Generate embedding
float[] vector = model.embed("Hello world").content().vector();
System.out.println("Embedding dimension: " + vector.length);

Standard Embedding Setup

import dev.langchain4j.model.huggingface.HuggingFaceEmbeddingModel;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.model.output.Response;
import java.util.List;

HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
    .accessToken(System.getenv("HF_API_KEY"))
    .modelId("sentence-transformers/all-MiniLM-L6-v2")
    .build();

// Single text
Response<Embedding> single = model.embed("Hello world");
float[] vector = single.content().vector();

// Multiple texts (more efficient)
List<TextSegment> segments = List.of(
    TextSegment.from("First text"),
    TextSegment.from("Second text")
);
Response<List<Embedding>> batch = model.embedAll(segments);
List<Embedding> embeddings = batch.content();

Common Embedding Models

// Fast and efficient (384 dimensions)
HuggingFaceEmbeddingModel fast = HuggingFaceEmbeddingModel.builder()
    .accessToken(System.getenv("HF_API_KEY"))
    .modelId("sentence-transformers/all-MiniLM-L6-v2")
    .build();

// High quality (768 dimensions)
HuggingFaceEmbeddingModel quality = HuggingFaceEmbeddingModel.builder()
    .accessToken(System.getenv("HF_API_KEY"))
    .modelId("sentence-transformers/all-mpnet-base-v2")
    .build();

// Multilingual
HuggingFaceEmbeddingModel multilingual = HuggingFaceEmbeddingModel.builder()
    .accessToken(System.getenv("HF_API_KEY"))
    .modelId("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
    .build();

Chat Models (Deprecated)

Note: HuggingFaceChatModel is deprecated. Use OpenAiChatModel from langchain4j-open-ai module instead.

Minimal Chat Example (Deprecated)

import dev.langchain4j.model.huggingface.HuggingFaceChatModel;

HuggingFaceChatModel model = HuggingFaceChatModel
    .withAccessToken(System.getenv("HF_API_KEY"));

String response = model.chat("What is Java?");

Standard Chat Setup (Deprecated)

import dev.langchain4j.model.huggingface.HuggingFaceChatModel;
import dev.langchain4j.model.chat.response.ChatResponse;
import dev.langchain4j.data.message.UserMessage;
import java.util.List;

HuggingFaceChatModel model = HuggingFaceChatModel.builder()
    .accessToken(System.getenv("HF_API_KEY"))
    .modelId("tiiuae/falcon-7b-instruct")
    .temperature(0.7)
    .maxNewTokens(200)
    .build();

// Simple string chat
String simpleResponse = model.chat("Explain machine learning");

// With message objects
ChatResponse response = model.chat(UserMessage.from("What is Java?"));
String aiMessage = response.aiMessage().text();

Migration to OpenAI Module (Recommended)

// Add dependency: langchain4j-open-ai

import dev.langchain4j.model.openai.OpenAiChatModel;

OpenAiChatModel model = OpenAiChatModel.builder()
    .apiKey(System.getenv("HF_API_KEY"))
    .baseUrl("https://router.huggingface.co/v1")
    .modelName("tiiuae/falcon-7b-instruct:hf-inference")
    .temperature(0.7)
    .build();

String response = model.generate("What is Java?");

Language Models (Deprecated)

Note: HuggingFaceLanguageModel is deprecated. Use OpenAiChatModel from langchain4j-open-ai module instead.

Minimal Language Model Example (Deprecated)

import dev.langchain4j.model.huggingface.HuggingFaceLanguageModel;

HuggingFaceLanguageModel model = HuggingFaceLanguageModel
    .withAccessToken(System.getenv("HF_API_KEY"));

String text = model.generate("Write a haiku:").content();

Standard Language Model Setup (Deprecated)

import dev.langchain4j.model.huggingface.HuggingFaceLanguageModel;
import dev.langchain4j.model.output.Response;

HuggingFaceLanguageModel model = HuggingFaceLanguageModel.builder()
    .accessToken(System.getenv("HF_API_KEY"))
    .modelId("microsoft/Phi-3-mini-4k-instruct")
    .temperature(0.8)
    .maxNewTokens(150)
    .build();

Response<String> response = model.generate("Explain quantum computing:");
String generated = response.content();

Common Patterns

Pattern: Semantic Search

import dev.langchain4j.model.huggingface.HuggingFaceEmbeddingModel;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.data.embedding.Embedding;
import java.util.List;

HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
    .accessToken(System.getenv("HF_API_KEY"))
    .modelId("sentence-transformers/all-MiniLM-L6-v2")
    .build();

// Embed query
Embedding query = model.embed("machine learning tutorial")
    .content();

// Embed documents
List<TextSegment> docs = List.of(
    TextSegment.from("Machine learning is a subset of AI"),
    TextSegment.from("Deep learning uses neural networks"),
    TextSegment.from("Python is a programming language")
);
List<Embedding> docEmbeddings = model.embedAll(docs).content();

// Find most similar (cosine similarity)
double maxSimilarity = -1;
int bestMatch = -1;
for (int i = 0; i < docEmbeddings.size(); i++) {
    double sim = cosineSimilarity(query.vector(),
                                   docEmbeddings.get(i).vector());
    if (sim > maxSimilarity) {
        maxSimilarity = sim;
        bestMatch = i;
    }
}
System.out.println("Best match: " + docs.get(bestMatch).text());

// Helper method
static double cosineSimilarity(float[] a, float[] b) {
    double dot = 0.0, normA = 0.0, normB = 0.0;
    for (int i = 0; i < a.length; i++) {
        dot += a[i] * b[i];
        normA += a[i] * a[i];
        normB += b[i] * b[i];
    }
    return dot / (Math.sqrt(normA) * Math.sqrt(normB));
}

Pattern: Batch Processing

import dev.langchain4j.data.segment.TextSegment;
import java.util.List;
import java.util.stream.Collectors;

HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
    .accessToken(System.getenv("HF_API_KEY"))
    .modelId("sentence-transformers/all-MiniLM-L6-v2")
    .build();

List<String> texts = List.of(
    "Document 1 content",
    "Document 2 content",
    "Document 3 content"
);

// Convert to TextSegments
List<TextSegment> segments = texts.stream()
    .map(TextSegment::from)
    .collect(Collectors.toList());

// Batch embed (single API call)
List<float[]> vectors = model.embedAll(segments)
    .content()
    .stream()
    .map(emb -> emb.vector())
    .collect(Collectors.toList());

Pattern: Custom Configuration

import java.time.Duration;

HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
    .accessToken(System.getenv("HF_API_KEY"))
    .modelId("sentence-transformers/all-MiniLM-L6-v2")
    .baseUrl("https://custom-endpoint.example.com/")  // Custom endpoint
    .timeout(Duration.ofSeconds(30))                   // Longer timeout
    .waitForModel(true)                                // Wait if loading
    .build();

Pattern: Error Handling

import dev.langchain4j.model.output.Response;
import dev.langchain4j.data.embedding.Embedding;

HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
    .accessToken(System.getenv("HF_API_KEY"))
    .modelId("sentence-transformers/all-MiniLM-L6-v2")
    .build();

try {
    Response<Embedding> response = model.embed("text");
    float[] vector = response.content().vector();
    System.out.println("Success: " + vector.length + " dimensions");
} catch (RuntimeException e) {
    // Error format: "status code: <code>; body: <body>"
    if (e.getMessage().contains("401")) {
        System.err.println("Invalid API token");
    } else if (e.getMessage().contains("404")) {
        System.err.println("Model not found");
    } else if (e.getMessage().contains("429")) {
        System.err.println("Rate limited");
    } else {
        System.err.println("Error: " + e.getMessage());
    }
}

Testing Setup

Mock for Unit Tests

// For testing without API calls, use SPI to provide mock client
// See SPI Extensions guide for details

import dev.langchain4j.model.huggingface.HuggingFaceEmbeddingModel;
import dev.langchain4j.data.embedding.Embedding;

// In production code
HuggingFaceEmbeddingModel model = HuggingFaceEmbeddingModel.builder()
    .accessToken("test-key")  // Will use mock in tests
    .build();

// Mock returns predefined embeddings
Embedding result = model.embed("test").content();

Environment-Specific Configuration

public class HuggingFaceConfig {
    private static final String API_KEY = System.getenv("HF_API_KEY");
    private static final String BASE_URL = System.getenv()
        .getOrDefault("HF_BASE_URL",
                      "https://router.huggingface.co/hf-inference/");

    public static HuggingFaceEmbeddingModel createEmbeddingModel() {
        return HuggingFaceEmbeddingModel.builder()
            .accessToken(API_KEY)
            .modelId("sentence-transformers/all-MiniLM-L6-v2")
            .baseUrl(BASE_URL)
            .timeout(Duration.ofSeconds(
                Integer.parseInt(
                    System.getenv().getOrDefault("HF_TIMEOUT", "15")
                )
            ))
            .build();
    }
}

Next Steps

  • More Examples: See Common Tasks
  • All Configuration: See Configuration Guide
  • API Details: See Embedding Model API
  • Troubleshooting: See Error Handling
  • Advanced: See SPI Extensions

Install with Tessl CLI

npx tessl i tessl/maven-dev-langchain4j--langchain4j-hugging-face@1.11.0

docs

chat-model.md

client-api.md

common-tasks.md

configuration.md

embedding-model.md

error-handling.md

index.md

language-model.md

migration-guide.md

model-names.md

quick-start.md

spi-extensions.md

tile.json