Common classes used across Spring AI providing document processing, text transformation, embedding utilities, observability support, and tokenization capabilities for AI application development
This document covers edge cases, error scenarios, and best practices for robust error handling.
import org.springframework.ai.document.Document;
// ❌ Empty text throws IllegalArgumentException
try {
Document doc = new Document("");
} catch (IllegalArgumentException e) {
System.err.println("Cannot create document with empty text: " + e.getMessage());
}
// ❌ Null text throws IllegalArgumentException
try {
Document doc = new Document(null);
} catch (IllegalArgumentException e) {
System.err.println("Cannot create document with null text: " + e.getMessage());
}
// ✅ Validate before creating
String text = getUserInput();
if (text != null && !text.trim().isEmpty()) {
Document doc = new Document(text);
} else {
throw new IllegalArgumentException("Document text cannot be null or empty");
}// ❌ Must have either text or media
try {
Document doc = Document.builder().build();
} catch (IllegalArgumentException e) {
System.err.println("Document must have text or media: " + e.getMessage());
}
// ✅ Always provide content
Document doc = Document.builder()
.text("Content")
.build();import org.springframework.ai.content.Media;
// ❌ Cannot have both text and media
try {
Document doc = Document.builder()
.text("Text content")
.media(someMedia)
.build();
} catch (IllegalArgumentException e) {
System.err.println("Document cannot have both text and media: " + e.getMessage());
}
// ✅ Choose one
Document textDoc = Document.builder()
.text("Text content")
.build();
Document mediaDoc = Document.builder()
.media(someMedia)
.build();import org.springframework.ai.reader.TextReader;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.FileSystemResource;
import java.io.IOException;
// Handle missing classpath resources
try {
TextReader reader = new TextReader(new ClassPathResource("missing.txt"));
List<Document> docs = reader.get();
} catch (RuntimeException e) {
if (e.getCause() instanceof IOException) {
System.err.println("File not found: " + e.getMessage());
// Fallback to default content or skip
} else {
throw e;
}
}
// Check file existence before reading
File file = new File("/path/to/file.txt");
if (file.exists() && file.canRead()) {
TextReader reader = new TextReader(new FileSystemResource(file));
List<Document> docs = reader.get();
} else {
System.err.println("File does not exist or is not readable: " + file.getPath());
}import org.springframework.ai.reader.JsonReader;
import com.fasterxml.jackson.core.JsonProcessingException;
// Handle invalid JSON
try {
JsonReader reader = new JsonReader(new ClassPathResource("invalid.json"));
List<Document> docs = reader.get();
} catch (RuntimeException e) {
if (e.getCause() instanceof JsonProcessingException) {
System.err.println("Invalid JSON format: " + e.getMessage());
// Log error and skip file
} else {
throw e;
}
}import java.nio.charset.StandardCharsets;
import java.nio.charset.Charset;
// Detect and handle charset issues
TextReader reader = new TextReader(new ClassPathResource("data.txt"));
// Try UTF-8 first
reader.setCharset(StandardCharsets.UTF_8);
try {
List<Document> docs = reader.get();
} catch (RuntimeException e) {
// If UTF-8 fails, try other charsets
System.err.println("UTF-8 decoding failed, trying ISO-8859-1");
reader.setCharset(StandardCharsets.ISO_8859_1);
List<Document> docs = reader.get();
}import org.springframework.ai.tokenizer.JTokkitTokenCountEstimator;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import com.knuddels.jtokkit.api.EncodingType;
public class TokenLimitHandler {
private final JTokkitTokenCountEstimator estimator;
private final int maxTokens;
public TokenLimitHandler(int maxTokens) {
this.estimator = new JTokkitTokenCountEstimator(EncodingType.CL100K_BASE);
this.maxTokens = maxTokens;
}
public List<Document> ensureWithinLimit(Document doc) {
int tokenCount = estimator.estimate(doc.getText());
if (tokenCount <= maxTokens) {
return List.of(doc);
}
// Split if exceeds limit
System.out.println("Document exceeds token limit (" + tokenCount + " > " + maxTokens + "), splitting...");
TokenTextSplitter splitter = TokenTextSplitter.builder()
.withChunkSize(maxTokens - 50) // Leave buffer
.build();
return splitter.split(doc);
}
}
// Usage
TokenLimitHandler handler = new TokenLimitHandler(8191);
Document largeDoc = new Document(veryLongText);
List<Document> chunks = handler.ensureWithinLimit(largeDoc);import org.springframework.ai.embedding.TokenCountBatchingStrategy;
// Handle documents that exceed batch limits
TokenCountBatchingStrategy strategy = new TokenCountBatchingStrategy(
EncodingType.CL100K_BASE,
8191,
0.1
);
List<Document> documents = // ... your documents
// Check for oversized documents
JTokkitTokenCountEstimator estimator = new JTokkitTokenCountEstimator();
for (Document doc : documents) {
int tokens = estimator.estimate(doc.getText());
if (tokens > 8191) {
System.err.println("Warning: Document " + doc.getId() +
" exceeds batch limit (" + tokens + " tokens)");
// Split or handle separately
}
}
List<List<Document>> batches = strategy.batch(documents);// Null metadata values are converted to string "null"
Document doc = Document.builder()
.text("Content")
.metadata("key", null)
.build();
String value = (String) doc.getMetadata().get("key");
// value is null (not the string "null")
// ✅ Check for null before using
Object metadataValue = doc.getMetadata().get("key");
if (metadataValue != null) {
// Use value
}import org.springframework.ai.document.DocumentTransformer;
// Transformers handle empty lists gracefully
List<Document> empty = List.of();
TokenTextSplitter splitter = new TokenTextSplitter();
List<Document> result = splitter.apply(empty);
// result is also empty list (not null)
// ✅ Always check list size
if (!documents.isEmpty()) {
List<Document> chunks = splitter.apply(documents);
}import org.springframework.ai.reader.JsonReader;
// Non-existent JSON pointer returns empty list
JsonReader reader = new JsonReader(new ClassPathResource("data.json"));
List<Document> docs = reader.get("/nonexistent/path");
// docs is empty list (not null, no exception)
// ✅ Check result
if (docs.isEmpty()) {
System.err.println("No documents found at JSON pointer path");
}// ✅ Thread-safe components (can be shared)
TokenTextSplitter splitter = TokenTextSplitter.builder().build();
JTokkitTokenCountEstimator estimator = new JTokkitTokenCountEstimator();
// Use from multiple threads safely
ExecutorService executor = Executors.newFixedThreadPool(4);
for (Document doc : documents) {
executor.submit(() -> {
List<Document> chunks = splitter.split(doc); // Thread-safe
int tokens = estimator.estimate(doc.getText()); // Thread-safe
});
}import org.springframework.ai.writer.FileDocumentWriter;
// ❌ FileDocumentWriter is NOT thread-safe
FileDocumentWriter writer = new FileDocumentWriter("output.txt");
// DON'T do this (concurrent writes to same file)
documents.parallelStream().forEach(doc -> {
writer.write(List.of(doc)); // NOT SAFE
});
// ✅ Synchronize writes
Object lock = new Object();
documents.parallelStream().forEach(doc -> {
synchronized (lock) {
writer.write(List.of(doc)); // Safe
}
});
// ✅ Or use separate writers per thread
documents.parallelStream().forEach(doc -> {
FileDocumentWriter threadWriter = new FileDocumentWriter("output-" + Thread.currentThread().getId() + ".txt");
threadWriter.write(List.of(doc));
});// ⚠️ Metadata maps may be shared when using mutate()
Document original = Document.builder()
.text("Original")
.metadata("key", "value")
.build();
Document modified = original.mutate()
.text("Modified")
.build();
// Metadata might be shared - changes affect both
modified.getMetadata().put("new_key", "new_value");
// original.getMetadata() might also have "new_key"
// ✅ For complete isolation, create new document
Document isolated = Document.builder()
.text("Modified")
.metadata(new HashMap<>(original.getMetadata()))
.build();// ⚠️ Documents hold full text in memory
Document largeDoc = new Document(tenMegabyteString);
// This uses ~10MB of memory
// ✅ Process in chunks and discard
TextReader reader = new TextReader(new ClassPathResource("large-file.txt"));
List<Document> docs = reader.get();
TokenTextSplitter splitter = TokenTextSplitter.builder()
.withChunkSize(500)
.build();
for (Document doc : docs) {
List<Document> chunks = splitter.split(doc);
// Process chunks immediately
processChunks(chunks);
// Chunks can be garbage collected after processing
}
// Original docs can now be garbage collected
docs = null;// For very large files (>100MB), consider streaming
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
public void processLargeFile(Resource resource) throws IOException {
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(resource.getInputStream(), StandardCharsets.UTF_8))) {
StringBuilder chunk = new StringBuilder();
String line;
int lineCount = 0;
while ((line = reader.readLine()) != null) {
chunk.append(line).append("\n");
lineCount++;
// Process in chunks of 1000 lines
if (lineCount >= 1000) {
Document doc = new Document(chunk.toString());
processDocument(doc);
chunk = new StringBuilder();
lineCount = 0;
}
}
// Process remaining lines
if (chunk.length() > 0) {
Document doc = new Document(chunk.toString());
processDocument(doc);
}
}
}public Document createSafeDocument(String text, Map<String, Object> metadata) {
// Validate text
if (text == null || text.trim().isEmpty()) {
throw new IllegalArgumentException("Document text cannot be null or empty");
}
// Validate metadata
if (metadata == null) {
metadata = new HashMap<>();
}
// Remove null values from metadata
metadata = metadata.entrySet().stream()
.filter(e -> e.getValue() != null)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
return new Document(text, metadata);
}public List<Document> loadDocumentsWithFallback(List<Resource> resources) {
List<Document> documents = new ArrayList<>();
List<String> failures = new ArrayList<>();
for (Resource resource : resources) {
try {
TextReader reader = new TextReader(resource);
documents.addAll(reader.get());
} catch (Exception e) {
failures.add(resource.getFilename() + ": " + e.getMessage());
// Continue with other files
}
}
if (!failures.isEmpty()) {
System.err.println("Failed to load " + failures.size() + " files:");
failures.forEach(System.err::println);
}
return documents;
}public void processDocumentsSafely(List<Document> documents) {
// Check for null
if (documents == null) {
throw new IllegalArgumentException("Documents list cannot be null");
}
// Check for empty
if (documents.isEmpty()) {
System.out.println("No documents to process");
return;
}
// Filter out invalid documents
List<Document> validDocs = documents.stream()
.filter(doc -> doc != null)
.filter(doc -> doc.getText() != null && !doc.getText().isEmpty())
.collect(Collectors.toList());
if (validDocs.size() < documents.size()) {
System.err.println("Filtered out " + (documents.size() - validDocs.size()) +
" invalid documents");
}
// Process valid documents
for (Document doc : validDocs) {
try {
processDocument(doc);
} catch (Exception e) {
System.err.println("Failed to process document " + doc.getId() + ": " +
e.getMessage());
// Continue with other documents
}
}
}Install with Tessl CLI
npx tessl i tessl/maven-org-springframework-ai--spring-ai-commons@1.1.0