tessl/maven-org-springframework-ai--spring-ai-commons

Common classes used across Spring AI providing document processing, text transformation, embedding utilities, observability support, and tokenization capabilities for AI application development

Overview

Eval results

Files

Edge Cases and Error Handling

Name: tessl/maven-org-springframework-ai--spring-ai-commons
Author: tessl

This document covers edge cases, error scenarios, and best practices for robust error handling.

Document Creation Errors
File I/O Errors
Token Limit Handling
Null and Empty Handling
Concurrency Issues
Resource Cleanup

Document Creation Errors

Empty or Null Text

import org.springframework.ai.document.Document;

// ❌ Empty text throws IllegalArgumentException
try {
    Document doc = new Document("");
} catch (IllegalArgumentException e) {
    System.err.println("Cannot create document with empty text: " + e.getMessage());
}

// ❌ Null text throws IllegalArgumentException
try {
    Document doc = new Document(null);
} catch (IllegalArgumentException e) {
    System.err.println("Cannot create document with null text: " + e.getMessage());
}

// ✅ Validate before creating
String text = getUserInput();
if (text != null && !text.trim().isEmpty()) {
    Document doc = new Document(text);
} else {
    throw new IllegalArgumentException("Document text cannot be null or empty");
}

Missing Content

// ❌ Must have either text or media
try {
    Document doc = Document.builder().build();
} catch (IllegalArgumentException e) {
    System.err.println("Document must have text or media: " + e.getMessage());
}

// ✅ Always provide content
Document doc = Document.builder()
    .text("Content")
    .build();

Both Text and Media

import org.springframework.ai.content.Media;

// ❌ Cannot have both text and media
try {
    Document doc = Document.builder()
        .text("Text content")
        .media(someMedia)
        .build();
} catch (IllegalArgumentException e) {
    System.err.println("Document cannot have both text and media: " + e.getMessage());
}

// ✅ Choose one
Document textDoc = Document.builder()
    .text("Text content")
    .build();

Document mediaDoc = Document.builder()
    .media(someMedia)
    .build();

File I/O Errors

Missing Files

import org.springframework.ai.reader.TextReader;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.FileSystemResource;
import java.io.IOException;

// Handle missing classpath resources
try {
    TextReader reader = new TextReader(new ClassPathResource("missing.txt"));
    List<Document> docs = reader.get();
} catch (RuntimeException e) {
    if (e.getCause() instanceof IOException) {
        System.err.println("File not found: " + e.getMessage());
        // Fallback to default content or skip
    } else {
        throw e;
    }
}

// Check file existence before reading
File file = new File("/path/to/file.txt");
if (file.exists() && file.canRead()) {
    TextReader reader = new TextReader(new FileSystemResource(file));
    List<Document> docs = reader.get();
} else {
    System.err.println("File does not exist or is not readable: " + file.getPath());
}

Invalid JSON

import org.springframework.ai.reader.JsonReader;
import com.fasterxml.jackson.core.JsonProcessingException;

// Handle invalid JSON
try {
    JsonReader reader = new JsonReader(new ClassPathResource("invalid.json"));
    List<Document> docs = reader.get();
} catch (RuntimeException e) {
    if (e.getCause() instanceof JsonProcessingException) {
        System.err.println("Invalid JSON format: " + e.getMessage());
        // Log error and skip file
    } else {
        throw e;
    }
}

Charset Issues

import java.nio.charset.StandardCharsets;
import java.nio.charset.Charset;

// Detect and handle charset issues
TextReader reader = new TextReader(new ClassPathResource("data.txt"));

// Try UTF-8 first
reader.setCharset(StandardCharsets.UTF_8);
try {
    List<Document> docs = reader.get();
} catch (RuntimeException e) {
    // If UTF-8 fails, try other charsets
    System.err.println("UTF-8 decoding failed, trying ISO-8859-1");
    reader.setCharset(StandardCharsets.ISO_8859_1);
    List<Document> docs = reader.get();
}

Token Limit Handling

Exceeding Model Limits

import org.springframework.ai.tokenizer.JTokkitTokenCountEstimator;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import com.knuddels.jtokkit.api.EncodingType;

public class TokenLimitHandler {
    private final JTokkitTokenCountEstimator estimator;
    private final int maxTokens;

    public TokenLimitHandler(int maxTokens) {
        this.estimator = new JTokkitTokenCountEstimator(EncodingType.CL100K_BASE);
        this.maxTokens = maxTokens;
    }

    public List<Document> ensureWithinLimit(Document doc) {
        int tokenCount = estimator.estimate(doc.getText());

        if (tokenCount <= maxTokens) {
            return List.of(doc);
        }

        // Split if exceeds limit
        System.out.println("Document exceeds token limit (" + tokenCount + " > " + maxTokens + "), splitting...");
        
        TokenTextSplitter splitter = TokenTextSplitter.builder()
            .withChunkSize(maxTokens - 50)  // Leave buffer
            .build();

        return splitter.split(doc);
    }
}

// Usage
TokenLimitHandler handler = new TokenLimitHandler(8191);
Document largeDoc = new Document(veryLongText);
List<Document> chunks = handler.ensureWithinLimit(largeDoc);

Batch Size Validation

import org.springframework.ai.embedding.TokenCountBatchingStrategy;

// Handle documents that exceed batch limits
TokenCountBatchingStrategy strategy = new TokenCountBatchingStrategy(
    EncodingType.CL100K_BASE,
    8191,
    0.1
);

List<Document> documents = // ... your documents

// Check for oversized documents
JTokkitTokenCountEstimator estimator = new JTokkitTokenCountEstimator();
for (Document doc : documents) {
    int tokens = estimator.estimate(doc.getText());
    if (tokens > 8191) {
        System.err.println("Warning: Document " + doc.getId() + 
                         " exceeds batch limit (" + tokens + " tokens)");
        // Split or handle separately
    }
}

List<List<Document>> batches = strategy.batch(documents);

Null and Empty Handling

Null Metadata Values

// Null metadata values are converted to string "null"
Document doc = Document.builder()
    .text("Content")
    .metadata("key", null)
    .build();

String value = (String) doc.getMetadata().get("key");
// value is null (not the string "null")

// ✅ Check for null before using
Object metadataValue = doc.getMetadata().get("key");
if (metadataValue != null) {
    // Use value
}

Empty Document Lists

import org.springframework.ai.document.DocumentTransformer;

// Transformers handle empty lists gracefully
List<Document> empty = List.of();
TokenTextSplitter splitter = new TokenTextSplitter();
List<Document> result = splitter.apply(empty);
// result is also empty list (not null)

// ✅ Always check list size
if (!documents.isEmpty()) {
    List<Document> chunks = splitter.apply(documents);
}

Missing JSON Pointer Paths

import org.springframework.ai.reader.JsonReader;

// Non-existent JSON pointer returns empty list
JsonReader reader = new JsonReader(new ClassPathResource("data.json"));
List<Document> docs = reader.get("/nonexistent/path");
// docs is empty list (not null, no exception)

// ✅ Check result
if (docs.isEmpty()) {
    System.err.println("No documents found at JSON pointer path");
}

Concurrency Issues

Thread-Safe Usage

// ✅ Thread-safe components (can be shared)
TokenTextSplitter splitter = TokenTextSplitter.builder().build();
JTokkitTokenCountEstimator estimator = new JTokkitTokenCountEstimator();

// Use from multiple threads safely
ExecutorService executor = Executors.newFixedThreadPool(4);
for (Document doc : documents) {
    executor.submit(() -> {
        List<Document> chunks = splitter.split(doc);  // Thread-safe
        int tokens = estimator.estimate(doc.getText());  // Thread-safe
    });
}

Concurrent File Writes

import org.springframework.ai.writer.FileDocumentWriter;

// ❌ FileDocumentWriter is NOT thread-safe
FileDocumentWriter writer = new FileDocumentWriter("output.txt");

// DON'T do this (concurrent writes to same file)
documents.parallelStream().forEach(doc -> {
    writer.write(List.of(doc));  // NOT SAFE
});

// ✅ Synchronize writes
Object lock = new Object();
documents.parallelStream().forEach(doc -> {
    synchronized (lock) {
        writer.write(List.of(doc));  // Safe
    }
});

// ✅ Or use separate writers per thread
documents.parallelStream().forEach(doc -> {
    FileDocumentWriter threadWriter = new FileDocumentWriter("output-" + Thread.currentThread().getId() + ".txt");
    threadWriter.write(List.of(doc));
});

Metadata Mutability

// ⚠️ Metadata maps may be shared when using mutate()
Document original = Document.builder()
    .text("Original")
    .metadata("key", "value")
    .build();

Document modified = original.mutate()
    .text("Modified")
    .build();

// Metadata might be shared - changes affect both
modified.getMetadata().put("new_key", "new_value");
// original.getMetadata() might also have "new_key"

// ✅ For complete isolation, create new document
Document isolated = Document.builder()
    .text("Modified")
    .metadata(new HashMap<>(original.getMetadata()))
    .build();

Resource Cleanup

Large Document Memory

// ⚠️ Documents hold full text in memory
Document largeDoc = new Document(tenMegabyteString);
// This uses ~10MB of memory

// ✅ Process in chunks and discard
TextReader reader = new TextReader(new ClassPathResource("large-file.txt"));
List<Document> docs = reader.get();

TokenTextSplitter splitter = TokenTextSplitter.builder()
    .withChunkSize(500)
    .build();

for (Document doc : docs) {
    List<Document> chunks = splitter.split(doc);
    
    // Process chunks immediately
    processChunks(chunks);
    
    // Chunks can be garbage collected after processing
}

// Original docs can now be garbage collected
docs = null;

Streaming Large Files

// For very large files (>100MB), consider streaming
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;

public void processLargeFile(Resource resource) throws IOException {
    try (BufferedReader reader = new BufferedReader(
            new InputStreamReader(resource.getInputStream(), StandardCharsets.UTF_8))) {
        
        StringBuilder chunk = new StringBuilder();
        String line;
        int lineCount = 0;
        
        while ((line = reader.readLine()) != null) {
            chunk.append(line).append("\n");
            lineCount++;
            
            // Process in chunks of 1000 lines
            if (lineCount >= 1000) {
                Document doc = new Document(chunk.toString());
                processDocument(doc);
                
                chunk = new StringBuilder();
                lineCount = 0;
            }
        }
        
        // Process remaining lines
        if (chunk.length() > 0) {
            Document doc = new Document(chunk.toString());
            processDocument(doc);
        }
    }
}

Best Practices Summary

Input Validation

public Document createSafeDocument(String text, Map<String, Object> metadata) {
    // Validate text
    if (text == null || text.trim().isEmpty()) {
        throw new IllegalArgumentException("Document text cannot be null or empty");
    }

    // Validate metadata
    if (metadata == null) {
        metadata = new HashMap<>();
    }

    // Remove null values from metadata
    metadata = metadata.entrySet().stream()
        .filter(e -> e.getValue() != null)
        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));

    return new Document(text, metadata);
}

Error Recovery

public List<Document> loadDocumentsWithFallback(List<Resource> resources) {
    List<Document> documents = new ArrayList<>();
    List<String> failures = new ArrayList<>();

    for (Resource resource : resources) {
        try {
            TextReader reader = new TextReader(resource);
            documents.addAll(reader.get());
        } catch (Exception e) {
            failures.add(resource.getFilename() + ": " + e.getMessage());
            // Continue with other files
        }
    }

    if (!failures.isEmpty()) {
        System.err.println("Failed to load " + failures.size() + " files:");
        failures.forEach(System.err::println);
    }

    return documents;
}

Defensive Programming

public void processDocumentsSafely(List<Document> documents) {
    // Check for null
    if (documents == null) {
        throw new IllegalArgumentException("Documents list cannot be null");
    }

    // Check for empty
    if (documents.isEmpty()) {
        System.out.println("No documents to process");
        return;
    }

    // Filter out invalid documents
    List<Document> validDocs = documents.stream()
        .filter(doc -> doc != null)
        .filter(doc -> doc.getText() != null && !doc.getText().isEmpty())
        .collect(Collectors.toList());

    if (validDocs.size() < documents.size()) {
        System.err.println("Filtered out " + (documents.size() - validDocs.size()) + 
                         " invalid documents");
    }

    // Process valid documents
    for (Document doc : validDocs) {
        try {
            processDocument(doc);
        } catch (Exception e) {
            System.err.println("Failed to process document " + doc.getId() + ": " + 
                             e.getMessage());
            // Continue with other documents
        }
    }
}

tessl/maven-org-springframework-ai--spring-ai-commons

edge-cases.mddocs/examples/

Edge Cases and Error Handling

Table of Contents

Document Creation Errors

Empty or Null Text

Missing Content

Both Text and Media

File I/O Errors

Missing Files

Invalid JSON

Charset Issues

Token Limit Handling

Exceeding Model Limits

Batch Size Validation

Null and Empty Handling

Null Metadata Values

Empty Document Lists

Missing JSON Pointer Paths

Concurrency Issues

Thread-Safe Usage

Concurrent File Writes

Metadata Mutability

Resource Cleanup

Large Document Memory

Streaming Large Files

Best Practices Summary

Input Validation

Error Recovery

Defensive Programming

See Also

tessl/maven-org-springframework-ai--spring-ai-commons

edge-cases.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/examples/

Edge Cases and Error Handling

Table of Contents

Document Creation Errors

Empty or Null Text

Missing Content

Both Text and Media

File I/O Errors

Missing Files

Invalid JSON

Charset Issues

Token Limit Handling

Exceeding Model Limits

Batch Size Validation

Null and Empty Handling

Null Metadata Values

Empty Document Lists

Missing JSON Pointer Paths

Concurrency Issues

Thread-Safe Usage

Concurrent File Writes

Metadata Mutability

Resource Cleanup

Large Document Memory

Streaming Large Files

Best Practices Summary

Input Validation

Error Recovery

Defensive Programming

See Also

edge-cases.mddocs/examples/