CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-commons

Common classes used across Spring AI providing document processing, text transformation, embedding utilities, observability support, and tokenization capabilities for AI application development

Overview
Eval results
Files

quick-start.mddocs/guides/

Quick Start Guide

This guide will help you get started with Spring AI Commons in minutes.

Installation

Add the dependency to your Maven pom.xml:

<dependency>
  <groupId>org.springframework.ai</groupId>
  <artifactId>spring-ai-commons</artifactId>
  <version>1.1.2</version>
</dependency>

Requirements: Java 17 or higher

Step 1: Read Documents

import org.springframework.ai.document.Document;
import org.springframework.ai.reader.TextReader;
import org.springframework.core.io.ClassPathResource;
import java.util.List;

// Read from a text file
TextReader reader = new TextReader(new ClassPathResource("knowledge-base.txt"));
List<Document> documents = reader.get();

System.out.println("Loaded " + documents.size() + " documents");

Step 2: Create Documents

import org.springframework.ai.document.Document;

// Create a document manually
Document doc = Document.builder()
    .text("Spring AI Commons provides foundational abstractions for AI development")
    .metadata("source", "documentation")
    .metadata("category", "overview")
    .build();

System.out.println("Document ID: " + doc.getId());

Step 3: Split into Chunks

import org.springframework.ai.transformer.splitter.TokenTextSplitter;

// Create a splitter for embedding-sized chunks
TokenTextSplitter splitter = TokenTextSplitter.builder()
    .withChunkSize(800)  // 800 tokens per chunk
    .withMinChunkSizeChars(100)
    .build();

// Split documents
List<Document> chunks = splitter.apply(documents);

System.out.println("Created " + chunks.size() + " chunks");

Step 4: Count Tokens

import org.springframework.ai.tokenizer.JTokkitTokenCountEstimator;
import com.knuddels.jtokkit.api.EncodingType;

// Create token estimator
JTokkitTokenCountEstimator estimator = new JTokkitTokenCountEstimator(
    EncodingType.CL100K_BASE  // For GPT-3.5/GPT-4
);

// Count tokens in a document
int tokenCount = estimator.estimate(doc.getText());
System.out.println("Token count: " + tokenCount);

Step 5: Batch for Embedding

import org.springframework.ai.embedding.TokenCountBatchingStrategy;

// Create batching strategy
TokenCountBatchingStrategy batchingStrategy = new TokenCountBatchingStrategy(
    EncodingType.CL100K_BASE,
    8191,  // OpenAI embedding limit
    0.1    // 10% reserve
);

// Batch chunks for efficient embedding
List<List<Document>> batches = batchingStrategy.batch(chunks);

System.out.println("Created " + batches.size() + " batches for embedding");

Step 6: Format for AI

import org.springframework.ai.document.MetadataMode;

// Format document for embedding (excludes certain metadata)
String embedContent = doc.getFormattedContent(MetadataMode.EMBED);

// Format document for LLM inference (excludes different metadata)
String inferenceContent = doc.getFormattedContent(MetadataMode.INFERENCE);

// Get just the text (no metadata)
String textOnly = doc.getFormattedContent(MetadataMode.NONE);

Complete Example: RAG Pipeline

import org.springframework.ai.document.Document;
import org.springframework.ai.document.MetadataMode;
import org.springframework.ai.reader.TextReader;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.embedding.TokenCountBatchingStrategy;
import org.springframework.ai.tokenizer.JTokkitTokenCountEstimator;
import org.springframework.core.io.ClassPathResource;
import com.knuddels.jtokkit.api.EncodingType;
import java.util.List;

public class RAGPipeline {
    public void processDocuments() {
        // 1. Read documents
        TextReader reader = new TextReader(new ClassPathResource("knowledge-base.txt"));
        List<Document> documents = reader.get();
        
        // 2. Split into chunks
        TokenTextSplitter splitter = TokenTextSplitter.builder()
            .withChunkSize(500)
            .build();
        List<Document> chunks = splitter.apply(documents);
        
        // 3. Batch for embedding
        TokenCountBatchingStrategy batchingStrategy = new TokenCountBatchingStrategy(
            EncodingType.CL100K_BASE,
            8191,
            0.1
        );
        List<List<Document>> batches = batchingStrategy.batch(chunks);
        
        // 4. Process each batch
        for (List<Document> batch : batches) {
            for (Document doc : batch) {
                String content = doc.getFormattedContent(MetadataMode.EMBED);
                // Send to embedding API
                // Store in vector database
            }
        }
        
        System.out.println("Processed " + chunks.size() + " chunks in " + batches.size() + " batches");
    }
}

Next Steps

  • Real-World Scenarios - See more complete examples
  • API Reference - Explore the full API
  • Edge Cases - Learn about error handling

Common Patterns

Reading JSON Documents

import org.springframework.ai.reader.JsonReader;

JsonReader jsonReader = new JsonReader(
    new ClassPathResource("data.json"),
    "title", "content"  // Keys to use for document text
);
List<Document> docs = jsonReader.get();

Custom Metadata

Document doc = Document.builder()
    .text("Content")
    .metadata("author", "John Doe")
    .metadata("timestamp", System.currentTimeMillis())
    .metadata("version", "1.0")
    .build();

Content-Based IDs (for deduplication)

import org.springframework.ai.document.id.JdkSha256HexIdGenerator;

Document doc = Document.builder()
    .idGenerator(new JdkSha256HexIdGenerator())
    .text("Same content always gets same ID")
    .build();

Troubleshooting

Empty Document Error

// ❌ This throws IllegalArgumentException
Document doc = new Document("");

// ✅ Validate before creating
if (text != null && !text.isEmpty()) {
    Document doc = new Document(text);
}

File Not Found

try {
    TextReader reader = new TextReader(new ClassPathResource("missing.txt"));
    List<Document> docs = reader.get();
} catch (RuntimeException e) {
    System.err.println("Failed to read file: " + e.getMessage());
}

Token Limit Exceeded

// Check token count before sending to API
JTokkitTokenCountEstimator estimator = new JTokkitTokenCountEstimator();
int tokens = estimator.estimate(text);

if (tokens > 8191) {
    // Split the text first
    TokenTextSplitter splitter = TokenTextSplitter.builder()
        .withChunkSize(8000)
        .build();
    List<Document> chunks = splitter.split(new Document(text));
}

Install with Tessl CLI

npx tessl i tessl/maven-org-springframework-ai--spring-ai-commons

docs

index.md

README.md

tile.json