CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-model

Core model interfaces and abstractions for Spring AI framework providing portable API for chat, embeddings, images, audio, and tool calling across multiple AI providers

Overview
Eval results
Files

document-enrichment.mddocs/reference/

Document Enrichment and Transformation

AI-powered document enrichment utilities for extracting keywords and generating summaries that are added as metadata to documents. These transformers enhance documents with AI-generated metadata for improved retrieval and understanding.

Capabilities

Keyword Metadata Enricher

Extracts keywords from documents using AI and adds them as metadata.

public class KeywordMetadataEnricher implements DocumentTransformer {
    /**
     * Constant for the metadata key where keywords are stored.
     */
    public static final String EXCERPT_KEYWORDS_METADATA_KEY = "excerpt_keywords";

    /**
     * Placeholder for context in keyword templates.
     */
    public static final String CONTEXT_STR_PLACEHOLDER = "context_str";

    /**
     * Default template for keyword extraction.
     */
    public static final String KEYWORDS_TEMPLATE = "{context_str}. Give %s unique keywords for this document. Format as comma separated. Keywords: ";

    /**
     * Construct a KeywordMetadataEnricher with keyword count.
     *
     * @param chatModel the chat model to use for keyword extraction
     * @param keywordCount the number of keywords to extract
     */
    public KeywordMetadataEnricher(ChatModel chatModel, int keywordCount);

    /**
     * Construct a KeywordMetadataEnricher with custom template.
     *
     * @param chatModel the chat model to use for keyword extraction
     * @param keywordsTemplate the custom prompt template for extraction
     */
    public KeywordMetadataEnricher(ChatModel chatModel, PromptTemplate keywordsTemplate);

    /**
     * Apply keyword extraction to documents.
     *
     * @param documents the documents to enrich
     * @return the enriched documents with keywords in metadata
     */
    @Override
    List<Document> apply(List<Document> documents);

    /**
     * Create a builder for KeywordMetadataEnricher.
     *
     * @param chatModel the chat model to use
     * @return a new builder
     */
    public static Builder builder(ChatModel chatModel);
}

KeywordMetadataEnricher.Builder

public static final class Builder {
    /**
     * Construct a builder with chat model.
     *
     * @param chatModel the chat model
     */
    public Builder(ChatModel chatModel);

    /**
     * Set the number of keywords to extract.
     *
     * @param keywordCount the number of keywords (must be >= 1)
     * @return this builder
     */
    public Builder keywordCount(int keywordCount);

    /**
     * Set a custom prompt template for keyword extraction.
     *
     * @param keywordsTemplate the custom template
     * @return this builder
     */
    public Builder keywordsTemplate(PromptTemplate keywordsTemplate);

    /**
     * Build the KeywordMetadataEnricher.
     *
     * @return the configured enricher
     */
    public KeywordMetadataEnricher build();
}

Summary Metadata Enricher

Generates summaries for documents and adds them as metadata, optionally including summaries of adjacent documents.

public class SummaryMetadataEnricher implements DocumentTransformer {
    /**
     * Default template for summary extraction.
     */
    public static final String DEFAULT_SUMMARY_EXTRACT_TEMPLATE = """
        Here is the content of the section:
        {context_str}

        Summarize the key topics and entities of the section.

        Summary:""";

    /**
     * Construct a SummaryMetadataEnricher with summary types.
     *
     * @param chatModel the chat model to use for summarization
     * @param summaryTypes the types of summaries to generate (CURRENT, PREVIOUS, NEXT)
     */
    public SummaryMetadataEnricher(ChatModel chatModel, List<SummaryType> summaryTypes);

    /**
     * Construct a SummaryMetadataEnricher with full configuration.
     *
     * @param chatModel the chat model to use for summarization
     * @param summaryTypes the types of summaries to generate
     * @param summaryTemplate the template for summary generation
     * @param metadataMode the metadata mode for document formatting
     */
    public SummaryMetadataEnricher(
        ChatModel chatModel,
        List<SummaryType> summaryTypes,
        String summaryTemplate,
        MetadataMode metadataMode
    );

    /**
     * Apply summary generation to documents.
     *
     * @param documents the documents to enrich
     * @return the enriched documents with summaries in metadata
     */
    @Override
    List<Document> apply(List<Document> documents);
}

SummaryType Enum

public enum SummaryType {
    /**
     * Include summary of the previous document.
     * Stored in metadata as "prev_section_summary".
     */
    PREVIOUS,

    /**
     * Include summary of the current document.
     * Stored in metadata as "section_summary".
     */
    CURRENT,

    /**
     * Include summary of the next document.
     * Stored in metadata as "next_section_summary".
     */
    NEXT
}

MetadataMode Enum

Controls how metadata is included when formatting document content.

public enum MetadataMode {
    /**
     * Include all metadata fields when formatting document content.
     */
    ALL,

    /**
     * Include only metadata relevant for embedding operations.
     */
    EMBED,

    /**
     * Include only metadata relevant for inference operations.
     */
    INFERENCE,

    /**
     * Do not include any metadata in formatted content.
     */
    NONE
}

Usage Examples

Basic Keyword Extraction

import org.springframework.ai.model.transformer.KeywordMetadataEnricher;
import org.springframework.ai.chat.model.ChatModel;
import org.springframework.ai.document.Document;

@Service
public class DocumentEnrichmentService {
    private final ChatModel chatModel;

    public List<Document> enrichWithKeywords(List<Document> documents) {
        // Extract 5 keywords from each document
        KeywordMetadataEnricher enricher = new KeywordMetadataEnricher(chatModel, 5);

        // Apply enrichment
        List<Document> enriched = enricher.apply(documents);

        // Access keywords
        for (Document doc : enriched) {
            String keywords = (String) doc.getMetadata()
                .get(KeywordMetadataEnricher.EXCERPT_KEYWORDS_METADATA_KEY);
            System.out.println("Keywords: " + keywords);
        }

        return enriched;
    }
}

Keyword Extraction with Builder

import org.springframework.ai.model.transformer.KeywordMetadataEnricher;

// Using builder for configuration
KeywordMetadataEnricher enricher = KeywordMetadataEnricher.builder(chatModel)
    .keywordCount(10)
    .build();

List<Document> enriched = enricher.apply(documents);

Custom Keyword Template

import org.springframework.ai.chat.prompt.PromptTemplate;

// Create custom template
PromptTemplate customTemplate = new PromptTemplate("""
    Text: {context_str}

    Extract the most important technical keywords and concepts.
    Return as a comma-separated list.
    Keywords:
    """);

// Use custom template
KeywordMetadataEnricher enricher = KeywordMetadataEnricher.builder(chatModel)
    .keywordsTemplate(customTemplate)
    .build();

List<Document> enriched = enricher.apply(documents);

Basic Summary Generation

import org.springframework.ai.model.transformer.SummaryMetadataEnricher;
import org.springframework.ai.model.transformer.SummaryMetadataEnricher.SummaryType;

// Generate only current document summaries
SummaryMetadataEnricher enricher = new SummaryMetadataEnricher(
    chatModel,
    List.of(SummaryType.CURRENT)
);

List<Document> enriched = enricher.apply(documents);

// Access summaries
for (Document doc : enriched) {
    String summary = (String) doc.getMetadata().get("section_summary");
    System.out.println("Summary: " + summary);
}

Summary with Adjacent Context

import org.springframework.ai.document.MetadataMode;

// Include summaries of previous, current, and next documents
SummaryMetadataEnricher enricher = new SummaryMetadataEnricher(
    chatModel,
    List.of(SummaryType.PREVIOUS, SummaryType.CURRENT, SummaryType.NEXT)
);

List<Document> enriched = enricher.apply(documents);

// Access all summary types
for (Document doc : enriched) {
    Map<String, Object> metadata = doc.getMetadata();

    String prevSummary = (String) metadata.get("prev_section_summary");
    String currentSummary = (String) metadata.get("section_summary");
    String nextSummary = (String) metadata.get("next_section_summary");

    System.out.println("Previous: " + prevSummary);
    System.out.println("Current: " + currentSummary);
    System.out.println("Next: " + nextSummary);
}

Custom Summary Template

String customTemplate = """
    Document content:
    {context_str}

    Provide a concise one-sentence summary highlighting the main topic.

    Summary:
    """;

SummaryMetadataEnricher enricher = new SummaryMetadataEnricher(
    chatModel,
    List.of(SummaryType.CURRENT),
    customTemplate,
    MetadataMode.ALL
);

Combined Enrichment Pipeline

@Service
public class DocumentProcessingPipeline {
    private final ChatModel chatModel;

    public List<Document> processDocuments(List<Document> documents) {
        // Step 1: Extract keywords
        KeywordMetadataEnricher keywordEnricher =
            KeywordMetadataEnricher.builder(chatModel)
                .keywordCount(5)
                .build();

        List<Document> withKeywords = keywordEnricher.apply(documents);

        // Step 2: Generate summaries
        SummaryMetadataEnricher summaryEnricher = new SummaryMetadataEnricher(
            chatModel,
            List.of(SummaryType.CURRENT)
        );

        List<Document> fullyEnriched = summaryEnricher.apply(withKeywords);

        // Now documents have both keywords and summaries
        return fullyEnriched;
    }
}

Using Enriched Metadata for Search

public class EnrichedDocumentSearch {
    public void demonstrateSearch(List<Document> enrichedDocs, String query) {
        // Filter documents by keywords
        List<Document> relevantDocs = enrichedDocs.stream()
            .filter(doc -> {
                String keywords = (String) doc.getMetadata()
                    .get(KeywordMetadataEnricher.EXCERPT_KEYWORDS_METADATA_KEY);
                return keywords != null && keywords.toLowerCase().contains(query.toLowerCase());
            })
            .toList();

        // Display summaries of relevant documents
        for (Document doc : relevantDocs) {
            String summary = (String) doc.getMetadata().get("section_summary");
            System.out.println("Relevant doc summary: " + summary);
        }
    }
}

Metadata Mode Configuration

import org.springframework.ai.document.MetadataMode;

// Include all metadata in summary generation
SummaryMetadataEnricher allMetadata = new SummaryMetadataEnricher(
    chatModel,
    List.of(SummaryType.CURRENT),
    SummaryMetadataEnricher.DEFAULT_SUMMARY_EXTRACT_TEMPLATE,
    MetadataMode.ALL  // Include existing metadata when summarizing
);

// Or exclude metadata from summarization
SummaryMetadataEnricher noMetadata = new SummaryMetadataEnricher(
    chatModel,
    List.of(SummaryType.CURRENT),
    SummaryMetadataEnricher.DEFAULT_SUMMARY_EXTRACT_TEMPLATE,
    MetadataMode.NONE  // Only use document content
);

Spring Configuration

@Configuration
public class EnrichmentConfig {

    @Bean
    public KeywordMetadataEnricher keywordEnricher(ChatModel chatModel) {
        return KeywordMetadataEnricher.builder(chatModel)
            .keywordCount(7)
            .build();
    }

    @Bean
    public SummaryMetadataEnricher summaryEnricher(ChatModel chatModel) {
        return new SummaryMetadataEnricher(
            chatModel,
            List.of(SummaryType.PREVIOUS, SummaryType.CURRENT, SummaryType.NEXT)
        );
    }
}

Install with Tessl CLI

npx tessl i tessl/maven-org-springframework-ai--spring-ai-model@1.1.1

docs

index.md

tile.json