tessl/maven-org-springframework-ai--spring-ai-commons

Common classes used across Spring AI providing document processing, text transformation, embedding utilities, observability support, and tokenization capabilities for AI application development

Overview

Eval results

Files

Quick Start Guide

Name: tessl/maven-org-springframework-ai--spring-ai-commons
Author: tessl

This guide will help you get started with Spring AI Commons in minutes.

Installation

Add the dependency to your Maven pom.xml:

<dependency>
  <groupId>org.springframework.ai</groupId>
  <artifactId>spring-ai-commons</artifactId>
  <version>1.1.2</version>
</dependency>

Requirements: Java 17 or higher

Step 1: Read Documents

import org.springframework.ai.document.Document;
import org.springframework.ai.reader.TextReader;
import org.springframework.core.io.ClassPathResource;
import java.util.List;

// Read from a text file
TextReader reader = new TextReader(new ClassPathResource("knowledge-base.txt"));
List<Document> documents = reader.get();

System.out.println("Loaded " + documents.size() + " documents");

Step 2: Create Documents

import org.springframework.ai.document.Document;

// Create a document manually
Document doc = Document.builder()
    .text("Spring AI Commons provides foundational abstractions for AI development")
    .metadata("source", "documentation")
    .metadata("category", "overview")
    .build();

System.out.println("Document ID: " + doc.getId());

Step 3: Split into Chunks

import org.springframework.ai.transformer.splitter.TokenTextSplitter;

// Create a splitter for embedding-sized chunks
TokenTextSplitter splitter = TokenTextSplitter.builder()
    .withChunkSize(800)  // 800 tokens per chunk
    .withMinChunkSizeChars(100)
    .build();

// Split documents
List<Document> chunks = splitter.apply(documents);

System.out.println("Created " + chunks.size() + " chunks");

Step 4: Count Tokens

import org.springframework.ai.tokenizer.JTokkitTokenCountEstimator;
import com.knuddels.jtokkit.api.EncodingType;

// Create token estimator
JTokkitTokenCountEstimator estimator = new JTokkitTokenCountEstimator(
    EncodingType.CL100K_BASE  // For GPT-3.5/GPT-4
);

// Count tokens in a document
int tokenCount = estimator.estimate(doc.getText());
System.out.println("Token count: " + tokenCount);

Step 5: Batch for Embedding

import org.springframework.ai.embedding.TokenCountBatchingStrategy;

// Create batching strategy
TokenCountBatchingStrategy batchingStrategy = new TokenCountBatchingStrategy(
    EncodingType.CL100K_BASE,
    8191,  // OpenAI embedding limit
    0.1    // 10% reserve
);

// Batch chunks for efficient embedding
List<List<Document>> batches = batchingStrategy.batch(chunks);

System.out.println("Created " + batches.size() + " batches for embedding");

Step 6: Format for AI

import org.springframework.ai.document.MetadataMode;

// Format document for embedding (excludes certain metadata)
String embedContent = doc.getFormattedContent(MetadataMode.EMBED);

// Format document for LLM inference (excludes different metadata)
String inferenceContent = doc.getFormattedContent(MetadataMode.INFERENCE);

// Get just the text (no metadata)
String textOnly = doc.getFormattedContent(MetadataMode.NONE);

Complete Example: RAG Pipeline

import org.springframework.ai.document.Document;
import org.springframework.ai.document.MetadataMode;
import org.springframework.ai.reader.TextReader;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.embedding.TokenCountBatchingStrategy;
import org.springframework.ai.tokenizer.JTokkitTokenCountEstimator;
import org.springframework.core.io.ClassPathResource;
import com.knuddels.jtokkit.api.EncodingType;
import java.util.List;

public class RAGPipeline {
    public void processDocuments() {
        // 1. Read documents
        TextReader reader = new TextReader(new ClassPathResource("knowledge-base.txt"));
        List<Document> documents = reader.get();
        
        // 2. Split into chunks
        TokenTextSplitter splitter = TokenTextSplitter.builder()
            .withChunkSize(500)
            .build();
        List<Document> chunks = splitter.apply(documents);
        
        // 3. Batch for embedding
        TokenCountBatchingStrategy batchingStrategy = new TokenCountBatchingStrategy(
            EncodingType.CL100K_BASE,
            8191,
            0.1
        );
        List<List<Document>> batches = batchingStrategy.batch(chunks);
        
        // 4. Process each batch
        for (List<Document> batch : batches) {
            for (Document doc : batch) {
                String content = doc.getFormattedContent(MetadataMode.EMBED);
                // Send to embedding API
                // Store in vector database
            }
        }
        
        System.out.println("Processed " + chunks.size() + " chunks in " + batches.size() + " batches");
    }
}

Next Steps

Real-World Scenarios - See more complete examples
API Reference - Explore the full API
Edge Cases - Learn about error handling

Common Patterns

Reading JSON Documents

import org.springframework.ai.reader.JsonReader;

JsonReader jsonReader = new JsonReader(
    new ClassPathResource("data.json"),
    "title", "content"  // Keys to use for document text
);
List<Document> docs = jsonReader.get();

Custom Metadata

Document doc = Document.builder()
    .text("Content")
    .metadata("author", "John Doe")
    .metadata("timestamp", System.currentTimeMillis())
    .metadata("version", "1.0")
    .build();

Content-Based IDs (for deduplication)

import org.springframework.ai.document.id.JdkSha256HexIdGenerator;

Document doc = Document.builder()
    .idGenerator(new JdkSha256HexIdGenerator())
    .text("Same content always gets same ID")
    .build();

Troubleshooting

Empty Document Error

// ❌ This throws IllegalArgumentException
Document doc = new Document("");

// ✅ Validate before creating
if (text != null && !text.isEmpty()) {
    Document doc = new Document(text);
}

File Not Found

try {
    TextReader reader = new TextReader(new ClassPathResource("missing.txt"));
    List<Document> docs = reader.get();
} catch (RuntimeException e) {
    System.err.println("Failed to read file: " + e.getMessage());
}

Token Limit Exceeded

// Check token count before sending to API
JTokkitTokenCountEstimator estimator = new JTokkitTokenCountEstimator();
int tokens = estimator.estimate(text);

if (tokens > 8191) {
    // Split the text first
    TokenTextSplitter splitter = TokenTextSplitter.builder()
        .withChunkSize(8000)
        .build();
    List<Document> chunks = splitter.split(new Document(text));
}

Install with Tessl CLI

npx tessl i tessl/maven-org-springframework-ai--spring-ai-commons

docs

examples

guides

reference