CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-commons

Common classes used across Spring AI providing document processing, text transformation, embedding utilities, observability support, and tokenization capabilities for AI application development

Overview
Eval results
Files

edge-cases.mddocs/examples/

Edge Cases and Error Handling

This document covers edge cases, error scenarios, and best practices for robust error handling.

Table of Contents

  • Document Creation Errors
  • File I/O Errors
  • Token Limit Handling
  • Null and Empty Handling
  • Concurrency Issues
  • Resource Cleanup

Document Creation Errors

Empty or Null Text

import org.springframework.ai.document.Document;

// ❌ Empty text throws IllegalArgumentException
try {
    Document doc = new Document("");
} catch (IllegalArgumentException e) {
    System.err.println("Cannot create document with empty text: " + e.getMessage());
}

// ❌ Null text throws IllegalArgumentException
try {
    Document doc = new Document(null);
} catch (IllegalArgumentException e) {
    System.err.println("Cannot create document with null text: " + e.getMessage());
}

// ✅ Validate before creating
String text = getUserInput();
if (text != null && !text.trim().isEmpty()) {
    Document doc = new Document(text);
} else {
    throw new IllegalArgumentException("Document text cannot be null or empty");
}

Missing Content

// ❌ Must have either text or media
try {
    Document doc = Document.builder().build();
} catch (IllegalArgumentException e) {
    System.err.println("Document must have text or media: " + e.getMessage());
}

// ✅ Always provide content
Document doc = Document.builder()
    .text("Content")
    .build();

Both Text and Media

import org.springframework.ai.content.Media;

// ❌ Cannot have both text and media
try {
    Document doc = Document.builder()
        .text("Text content")
        .media(someMedia)
        .build();
} catch (IllegalArgumentException e) {
    System.err.println("Document cannot have both text and media: " + e.getMessage());
}

// ✅ Choose one
Document textDoc = Document.builder()
    .text("Text content")
    .build();

Document mediaDoc = Document.builder()
    .media(someMedia)
    .build();

File I/O Errors

Missing Files

import org.springframework.ai.reader.TextReader;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.FileSystemResource;
import java.io.IOException;

// Handle missing classpath resources
try {
    TextReader reader = new TextReader(new ClassPathResource("missing.txt"));
    List<Document> docs = reader.get();
} catch (RuntimeException e) {
    if (e.getCause() instanceof IOException) {
        System.err.println("File not found: " + e.getMessage());
        // Fallback to default content or skip
    } else {
        throw e;
    }
}

// Check file existence before reading
File file = new File("/path/to/file.txt");
if (file.exists() && file.canRead()) {
    TextReader reader = new TextReader(new FileSystemResource(file));
    List<Document> docs = reader.get();
} else {
    System.err.println("File does not exist or is not readable: " + file.getPath());
}

Invalid JSON

import org.springframework.ai.reader.JsonReader;
import com.fasterxml.jackson.core.JsonProcessingException;

// Handle invalid JSON
try {
    JsonReader reader = new JsonReader(new ClassPathResource("invalid.json"));
    List<Document> docs = reader.get();
} catch (RuntimeException e) {
    if (e.getCause() instanceof JsonProcessingException) {
        System.err.println("Invalid JSON format: " + e.getMessage());
        // Log error and skip file
    } else {
        throw e;
    }
}

Charset Issues

import java.nio.charset.StandardCharsets;
import java.nio.charset.Charset;

// Detect and handle charset issues
TextReader reader = new TextReader(new ClassPathResource("data.txt"));

// Try UTF-8 first
reader.setCharset(StandardCharsets.UTF_8);
try {
    List<Document> docs = reader.get();
} catch (RuntimeException e) {
    // If UTF-8 fails, try other charsets
    System.err.println("UTF-8 decoding failed, trying ISO-8859-1");
    reader.setCharset(StandardCharsets.ISO_8859_1);
    List<Document> docs = reader.get();
}

Token Limit Handling

Exceeding Model Limits

import org.springframework.ai.tokenizer.JTokkitTokenCountEstimator;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import com.knuddels.jtokkit.api.EncodingType;

public class TokenLimitHandler {
    private final JTokkitTokenCountEstimator estimator;
    private final int maxTokens;

    public TokenLimitHandler(int maxTokens) {
        this.estimator = new JTokkitTokenCountEstimator(EncodingType.CL100K_BASE);
        this.maxTokens = maxTokens;
    }

    public List<Document> ensureWithinLimit(Document doc) {
        int tokenCount = estimator.estimate(doc.getText());

        if (tokenCount <= maxTokens) {
            return List.of(doc);
        }

        // Split if exceeds limit
        System.out.println("Document exceeds token limit (" + tokenCount + " > " + maxTokens + "), splitting...");
        
        TokenTextSplitter splitter = TokenTextSplitter.builder()
            .withChunkSize(maxTokens - 50)  // Leave buffer
            .build();

        return splitter.split(doc);
    }
}

// Usage
TokenLimitHandler handler = new TokenLimitHandler(8191);
Document largeDoc = new Document(veryLongText);
List<Document> chunks = handler.ensureWithinLimit(largeDoc);

Batch Size Validation

import org.springframework.ai.embedding.TokenCountBatchingStrategy;

// Handle documents that exceed batch limits
TokenCountBatchingStrategy strategy = new TokenCountBatchingStrategy(
    EncodingType.CL100K_BASE,
    8191,
    0.1
);

List<Document> documents = // ... your documents

// Check for oversized documents
JTokkitTokenCountEstimator estimator = new JTokkitTokenCountEstimator();
for (Document doc : documents) {
    int tokens = estimator.estimate(doc.getText());
    if (tokens > 8191) {
        System.err.println("Warning: Document " + doc.getId() + 
                         " exceeds batch limit (" + tokens + " tokens)");
        // Split or handle separately
    }
}

List<List<Document>> batches = strategy.batch(documents);

Null and Empty Handling

Null Metadata Values

// Null metadata values are converted to string "null"
Document doc = Document.builder()
    .text("Content")
    .metadata("key", null)
    .build();

String value = (String) doc.getMetadata().get("key");
// value is null (not the string "null")

// ✅ Check for null before using
Object metadataValue = doc.getMetadata().get("key");
if (metadataValue != null) {
    // Use value
}

Empty Document Lists

import org.springframework.ai.document.DocumentTransformer;

// Transformers handle empty lists gracefully
List<Document> empty = List.of();
TokenTextSplitter splitter = new TokenTextSplitter();
List<Document> result = splitter.apply(empty);
// result is also empty list (not null)

// ✅ Always check list size
if (!documents.isEmpty()) {
    List<Document> chunks = splitter.apply(documents);
}

Missing JSON Pointer Paths

import org.springframework.ai.reader.JsonReader;

// Non-existent JSON pointer returns empty list
JsonReader reader = new JsonReader(new ClassPathResource("data.json"));
List<Document> docs = reader.get("/nonexistent/path");
// docs is empty list (not null, no exception)

// ✅ Check result
if (docs.isEmpty()) {
    System.err.println("No documents found at JSON pointer path");
}

Concurrency Issues

Thread-Safe Usage

// ✅ Thread-safe components (can be shared)
TokenTextSplitter splitter = TokenTextSplitter.builder().build();
JTokkitTokenCountEstimator estimator = new JTokkitTokenCountEstimator();

// Use from multiple threads safely
ExecutorService executor = Executors.newFixedThreadPool(4);
for (Document doc : documents) {
    executor.submit(() -> {
        List<Document> chunks = splitter.split(doc);  // Thread-safe
        int tokens = estimator.estimate(doc.getText());  // Thread-safe
    });
}

Concurrent File Writes

import org.springframework.ai.writer.FileDocumentWriter;

// ❌ FileDocumentWriter is NOT thread-safe
FileDocumentWriter writer = new FileDocumentWriter("output.txt");

// DON'T do this (concurrent writes to same file)
documents.parallelStream().forEach(doc -> {
    writer.write(List.of(doc));  // NOT SAFE
});

// ✅ Synchronize writes
Object lock = new Object();
documents.parallelStream().forEach(doc -> {
    synchronized (lock) {
        writer.write(List.of(doc));  // Safe
    }
});

// ✅ Or use separate writers per thread
documents.parallelStream().forEach(doc -> {
    FileDocumentWriter threadWriter = new FileDocumentWriter("output-" + Thread.currentThread().getId() + ".txt");
    threadWriter.write(List.of(doc));
});

Metadata Mutability

// ⚠️ Metadata maps may be shared when using mutate()
Document original = Document.builder()
    .text("Original")
    .metadata("key", "value")
    .build();

Document modified = original.mutate()
    .text("Modified")
    .build();

// Metadata might be shared - changes affect both
modified.getMetadata().put("new_key", "new_value");
// original.getMetadata() might also have "new_key"

// ✅ For complete isolation, create new document
Document isolated = Document.builder()
    .text("Modified")
    .metadata(new HashMap<>(original.getMetadata()))
    .build();

Resource Cleanup

Large Document Memory

// ⚠️ Documents hold full text in memory
Document largeDoc = new Document(tenMegabyteString);
// This uses ~10MB of memory

// ✅ Process in chunks and discard
TextReader reader = new TextReader(new ClassPathResource("large-file.txt"));
List<Document> docs = reader.get();

TokenTextSplitter splitter = TokenTextSplitter.builder()
    .withChunkSize(500)
    .build();

for (Document doc : docs) {
    List<Document> chunks = splitter.split(doc);
    
    // Process chunks immediately
    processChunks(chunks);
    
    // Chunks can be garbage collected after processing
}

// Original docs can now be garbage collected
docs = null;

Streaming Large Files

// For very large files (>100MB), consider streaming
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;

public void processLargeFile(Resource resource) throws IOException {
    try (BufferedReader reader = new BufferedReader(
            new InputStreamReader(resource.getInputStream(), StandardCharsets.UTF_8))) {
        
        StringBuilder chunk = new StringBuilder();
        String line;
        int lineCount = 0;
        
        while ((line = reader.readLine()) != null) {
            chunk.append(line).append("\n");
            lineCount++;
            
            // Process in chunks of 1000 lines
            if (lineCount >= 1000) {
                Document doc = new Document(chunk.toString());
                processDocument(doc);
                
                chunk = new StringBuilder();
                lineCount = 0;
            }
        }
        
        // Process remaining lines
        if (chunk.length() > 0) {
            Document doc = new Document(chunk.toString());
            processDocument(doc);
        }
    }
}

Best Practices Summary

Input Validation

public Document createSafeDocument(String text, Map<String, Object> metadata) {
    // Validate text
    if (text == null || text.trim().isEmpty()) {
        throw new IllegalArgumentException("Document text cannot be null or empty");
    }

    // Validate metadata
    if (metadata == null) {
        metadata = new HashMap<>();
    }

    // Remove null values from metadata
    metadata = metadata.entrySet().stream()
        .filter(e -> e.getValue() != null)
        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));

    return new Document(text, metadata);
}

Error Recovery

public List<Document> loadDocumentsWithFallback(List<Resource> resources) {
    List<Document> documents = new ArrayList<>();
    List<String> failures = new ArrayList<>();

    for (Resource resource : resources) {
        try {
            TextReader reader = new TextReader(resource);
            documents.addAll(reader.get());
        } catch (Exception e) {
            failures.add(resource.getFilename() + ": " + e.getMessage());
            // Continue with other files
        }
    }

    if (!failures.isEmpty()) {
        System.err.println("Failed to load " + failures.size() + " files:");
        failures.forEach(System.err::println);
    }

    return documents;
}

Defensive Programming

public void processDocumentsSafely(List<Document> documents) {
    // Check for null
    if (documents == null) {
        throw new IllegalArgumentException("Documents list cannot be null");
    }

    // Check for empty
    if (documents.isEmpty()) {
        System.out.println("No documents to process");
        return;
    }

    // Filter out invalid documents
    List<Document> validDocs = documents.stream()
        .filter(doc -> doc != null)
        .filter(doc -> doc.getText() != null && !doc.getText().isEmpty())
        .collect(Collectors.toList());

    if (validDocs.size() < documents.size()) {
        System.err.println("Filtered out " + (documents.size() - validDocs.size()) + 
                         " invalid documents");
    }

    // Process valid documents
    for (Document doc : validDocs) {
        try {
            processDocument(doc);
        } catch (Exception e) {
            System.err.println("Failed to process document " + doc.getId() + ": " + 
                             e.getMessage());
            // Continue with other documents
        }
    }
}

See Also

  • Real-World Scenarios - Complete examples
  • Quick Start Guide - Getting started
  • API Reference - Complete API documentation

Install with Tessl CLI

npx tessl i tessl/maven-org-springframework-ai--spring-ai-commons@1.1.0

docs

examples

edge-cases.md

real-world-scenarios.md

index.md

README.md

tile.json