Common vector store functionality for Spring AI providing a portable abstraction layer for integrating vector databases with comprehensive filtering, similarity search, and observability support.
Learn how to handle errors, edge cases, and exceptional scenarios in Spring AI Vector Store.
Some vector stores throw errors when adding documents with duplicate IDs.
try {
vectorStore.add(documentsWithDuplicateIds);
} catch (RuntimeException e) {
logger.error("Duplicate document IDs detected", e);
// Strategy 1: Generate new IDs
List<Document> withNewIds = documentsWithDuplicateIds.stream()
.map(doc -> new Document(
UUID.randomUUID().toString(),
doc.getContent(),
doc.getMetadata()
))
.toList();
vectorStore.add(withNewIds);
// Strategy 2: Update existing documents
vectorStore.delete(existingIds);
vectorStore.add(documentsWithDuplicateIds);
}Handle cases where no documents match the query or filters.
public List<Document> searchWithFallback(String query, String filter) {
SearchRequest strictRequest = SearchRequest.builder()
.query(query)
.filterExpression(filter)
.similarityThreshold(0.8)
.topK(10)
.build();
List<Document> results = vectorStore.similaritySearch(strictRequest);
if (results.isEmpty()) {
logger.warn("No results found with strict criteria, relaxing...");
// Strategy 1: Lower similarity threshold
SearchRequest relaxedRequest = SearchRequest.builder()
.query(query)
.filterExpression(filter)
.similarityThreshold(0.6) // Lower threshold
.topK(20) // More results
.build();
results = vectorStore.similaritySearch(relaxedRequest);
if (results.isEmpty()) {
// Strategy 2: Remove filters
logger.warn("Still no results, removing filters...");
results = vectorStore.similaritySearch(
SearchRequest.builder()
.query(query)
.topK(10)
.build()
);
}
}
return results;
}Handle invalid filter syntax from user input.
import org.springframework.ai.vectorstore.filter.FilterExpressionTextParser;
import org.springframework.ai.vectorstore.filter.FilterExpressionTextParser.FilterExpressionParseException;
public Filter.Expression parseFilterSafely(String filterText) {
if (filterText == null || filterText.isBlank()) {
return null;
}
FilterExpressionTextParser parser = new FilterExpressionTextParser();
try {
return parser.parse(filterText);
} catch (FilterExpressionParseException e) {
logger.error("Invalid filter syntax: {}", filterText, e);
// Strategy 1: Return null (no filter)
return null;
// Strategy 2: Use a default filter
// return new FilterExpressionBuilder().eq("status", "active").build();
// Strategy 3: Throw a user-friendly exception
// throw new InvalidFilterException("Invalid filter syntax: " + e.getMessage());
}
}Handle operations not supported by all vector stores.
public void deleteByFilter(String filter) {
try {
vectorStore.delete(filter);
} catch (UnsupportedOperationException e) {
logger.warn("Filter-based deletion not supported, falling back to ID-based deletion");
// Fall back to searching and deleting by ID
SearchRequest request = SearchRequest.builder()
.query("*") // Match all
.filterExpression(filter)
.topK(1000) // Batch size
.build();
List<Document> documentsToDelete = vectorStore.similaritySearch(request);
if (!documentsToDelete.isEmpty()) {
List<String> idsToDelete = documentsToDelete.stream()
.map(Document::getId)
.toList();
vectorStore.delete(idsToDelete);
logger.info("Deleted {} documents using ID-based fallback", idsToDelete.size());
}
}
}Handle file I/O errors when loading/saving state.
public void saveWithRetry(SimpleVectorStore store, File file, int maxRetries) {
int attempts = 0;
while (attempts < maxRetries) {
try {
store.save(file);
logger.info("Vector store saved successfully");
return;
} catch (IOException e) {
attempts++;
logger.error("Failed to save vector store (attempt {}/{})",
attempts, maxRetries, e);
if (attempts >= maxRetries) {
// Last attempt failed
logger.error("All save attempts failed, data may be lost!");
// Strategy 1: Save to backup location
try {
File backup = new File(file.getParent(), "vectorstore-backup.json");
store.save(backup);
logger.info("Saved to backup location: {}", backup);
} catch (IOException backupError) {
logger.error("Backup save also failed", backupError);
}
throw new RuntimeException("Failed to save vector store after " +
maxRetries + " attempts", e);
}
// Wait before retry
try {
Thread.sleep(1000 * attempts); // Exponential backoff
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
}
}
}
}
public void loadWithFallback(SimpleVectorStore store, File primaryFile, File backupFile) {
try {
store.load(primaryFile);
logger.info("Loaded vector store from primary file");
} catch (IOException e) {
logger.warn("Failed to load from primary file, trying backup", e);
try {
store.load(backupFile);
logger.info("Loaded vector store from backup file");
} catch (IOException backupError) {
logger.error("Failed to load from backup file, starting with empty store", backupError);
// Continue with empty store
}
}
}Handle documents with missing or null metadata fields.
public void addDocumentSafely(String content, Map<String, Object> metadata) {
// Ensure metadata is not null
Map<String, Object> safeMetadata = metadata != null ?
new HashMap<>(metadata) : new HashMap<>();
// Add default values
safeMetadata.putIfAbsent("createdAt", Instant.now().toString());
safeMetadata.putIfAbsent("status", "active");
// Remove null values to avoid issues
safeMetadata.entrySet().removeIf(entry -> entry.getValue() == null);
Document doc = new Document(content, safeMetadata);
vectorStore.add(List.of(doc));
}
// When filtering, handle potential nulls
public List<Document> searchWithNullSafeFilter(String query, String category) {
String filter = category != null ?
"category == '" + category + "'" :
"category IS NOT NULL"; // Only return docs with category set
return vectorStore.similaritySearch(
SearchRequest.builder()
.query(query)
.filterExpression(filter)
.build()
);
}Validate inputs before operations.
public void addDocuments(List<Document> documents) {
if (documents == null || documents.isEmpty()) {
logger.warn("Attempted to add null or empty document list");
return; // No-op
}
// Filter out documents with empty content
List<Document> validDocuments = documents.stream()
.filter(doc -> doc.getContent() != null && !doc.getContent().isBlank())
.toList();
if (validDocuments.isEmpty()) {
logger.warn("All documents had empty content, nothing to add");
return;
}
if (validDocuments.size() < documents.size()) {
logger.warn("Filtered out {} documents with empty content",
documents.size() - validDocuments.size());
}
vectorStore.add(validDocuments);
}Handle documents that exceed embedding model limits.
public void addLongDocument(String content, Map<String, Object> metadata) {
final int MAX_TOKENS = 8000; // Example limit
if (estimateTokens(content) > MAX_TOKENS) {
logger.warn("Document exceeds token limit, chunking...");
// Strategy 1: Chunk the document
List<String> chunks = chunkContent(content, MAX_TOKENS);
for (int i = 0; i < chunks.size(); i++) {
Map<String, Object> chunkMetadata = new HashMap<>(metadata);
chunkMetadata.put("chunkIndex", i);
chunkMetadata.put("totalChunks", chunks.size());
Document chunk = new Document(chunks.get(i), chunkMetadata);
vectorStore.add(List.of(chunk));
}
return;
}
// Normal case
Document doc = new Document(content, metadata);
vectorStore.add(List.of(doc));
}
private int estimateTokens(String text) {
// Rough estimation: 1 token ≈ 4 characters
return text.length() / 4;
}
private List<String> chunkContent(String content, int maxTokens) {
List<String> chunks = new ArrayList<>();
int chunkSize = maxTokens * 4; // Convert tokens to characters
for (int i = 0; i < content.length(); i += chunkSize) {
int end = Math.min(i + chunkSize, content.length());
chunks.add(content.substring(i, end));
}
return chunks;
}Handle failures during embedding generation.
public void addDocumentsWithRetry(List<Document> documents) {
for (Document doc : documents) {
int maxRetries = 3;
int attempt = 0;
while (attempt < maxRetries) {
try {
vectorStore.add(List.of(doc));
break; // Success
} catch (Exception e) {
attempt++;
logger.error("Failed to add document (attempt {}/{}): {}",
attempt, maxRetries, doc.getId(), e);
if (attempt >= maxRetries) {
// Log failure and continue with next document
logger.error("Failed to add document after {} attempts: {}",
maxRetries, doc.getId());
// Optionally: Store failed document for later processing
storeFailedDocument(doc, e);
} else {
// Wait before retry
try {
Thread.sleep(1000 * attempt);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
break;
}
}
}
}
}
}
private void storeFailedDocument(Document doc, Exception error) {
// Store for later retry or manual review
logger.error("Storing failed document for manual review: {}", doc.getId());
// Implementation: write to file, database, or queue
}Handle concurrent access to vector stores.
@Service
public class ConcurrentSafeVectorStoreService {
private final VectorStore vectorStore;
private final ReentrantLock lock = new ReentrantLock();
public void addDocumentsSafely(List<Document> documents) {
lock.lock();
try {
vectorStore.add(documents);
} finally {
lock.unlock();
}
}
public List<Document> searchSafely(SearchRequest request) {
// Reads can be concurrent for most vector stores
return vectorStore.similaritySearch(request);
}
// For SimpleVectorStore with file persistence
public void saveStateSafely(SimpleVectorStore store, File file) {
lock.lock();
try {
store.save(file);
} finally {
lock.unlock();
}
}
}Ensure metadata types match filter expectations.
public void addDocumentWithTypeSafety(
String content,
String category,
int year,
double rating,
boolean featured) {
// Ensure correct types
Map<String, Object> metadata = Map.of(
"category", category, // String
"year", Integer.valueOf(year), // Integer (not String)
"rating", Double.valueOf(rating), // Double (not String)
"featured", Boolean.valueOf(featured) // Boolean (not String)
);
Document doc = new Document(content, metadata);
vectorStore.add(List.of(doc));
}
// Validate filter will work with metadata types
public boolean validateFilter(String filterExpression) {
try {
FilterExpressionTextParser parser = new FilterExpressionTextParser();
Filter.Expression expr = parser.parse(filterExpression);
// Additional validation: check if fields exist in schema
// Implementation depends on your schema management
return true;
} catch (FilterExpressionParseException e) {
logger.error("Invalid filter expression: {}", filterExpression, e);
return false;
}
}Monitor and handle memory limits for in-memory stores.
@Service
public class MemoryAwareVectorStoreService {
private final SimpleVectorStore vectorStore;
private final int maxDocuments = 10000;
private final AtomicInteger documentCount = new AtomicInteger(0);
public void addDocumentWithCapacityCheck(Document doc) {
if (documentCount.get() >= maxDocuments) {
logger.warn("Vector store at capacity, triggering cleanup");
// Strategy 1: Remove oldest documents
removeOldestDocuments(1000);
// Strategy 2: Archive to disk
archiveAndClear();
// Strategy 3: Switch to persistent store
migrateToPersistentStore();
}
vectorStore.add(List.of(doc));
documentCount.incrementAndGet();
}
private void removeOldestDocuments(int count) {
// Implementation: Query for oldest documents and delete them
SearchRequest request = SearchRequest.builder()
.query("*")
.topK(count)
.filterExpression("1 == 1") // Match all
.build();
List<Document> oldest = vectorStore.similaritySearch(request);
List<String> idsToDelete = oldest.stream()
.sorted((a, b) -> {
String dateA = (String) a.getMetadata().get("createdAt");
String dateB = (String) b.getMetadata().get("createdAt");
return dateA.compareTo(dateB);
})
.limit(count)
.map(Document::getId)
.toList();
vectorStore.delete(idsToDelete);
documentCount.addAndGet(-idsToDelete.size());
}
private void archiveAndClear() {
// Save current state and clear
try {
File archive = new File("archive-" + System.currentTimeMillis() + ".json");
vectorStore.save(archive);
logger.info("Archived to {}", archive);
// Clear by creating new instance
// Note: This depends on your setup
} catch (IOException e) {
logger.error("Failed to archive", e);
}
}
private void migrateToPersistentStore() {
// Switch to production-grade vector store
logger.warn("Memory limit reached, consider migrating to persistent store");
// Implementation: Migrate data to Pinecone, PgVector, etc.
}
}Handle documents with encoding issues.
public void addDocumentWithSanitization(String content, Map<String, Object> metadata) {
// Remove or replace invalid UTF-8 characters
String sanitizedContent = content
.replaceAll("[^\\x00-\\x7F]", "") // Remove non-ASCII
.replaceAll("\\p{C}", ""); // Remove control characters
if (sanitizedContent.isBlank()) {
logger.warn("Document content became empty after sanitization");
return;
}
// Sanitize metadata values
Map<String, Object> sanitizedMetadata = metadata.entrySet().stream()
.filter(entry -> entry.getValue() != null)
.collect(Collectors.toMap(
Map.Entry::getKey,
entry -> {
Object value = entry.getValue();
if (value instanceof String str) {
return str.replaceAll("[^\\x00-\\x7F]", "");
}
return value;
}
));
Document doc = new Document(sanitizedContent, sanitizedMetadata);
vectorStore.add(List.of(doc));
}public class VectorStoreValidator {
public static void validateDocument(Document doc) {
if (doc == null) {
throw new IllegalArgumentException("Document cannot be null");
}
if (doc.getContent() == null || doc.getContent().isBlank()) {
throw new IllegalArgumentException("Document content cannot be empty");
}
if (doc.getMetadata() == null) {
throw new IllegalArgumentException("Document metadata cannot be null");
}
}
public static void validateSearchRequest(SearchRequest request) {
if (request == null) {
throw new IllegalArgumentException("SearchRequest cannot be null");
}
if (request.getQuery() == null || request.getQuery().isBlank()) {
throw new IllegalArgumentException("Query cannot be empty");
}
if (request.getTopK() < 0) {
throw new IllegalArgumentException("TopK must be >= 0");
}
if (request.getSimilarityThreshold() < 0.0 ||
request.getSimilarityThreshold() > 1.0) {
throw new IllegalArgumentException(
"Similarity threshold must be in range [0.0, 1.0]");
}
}
}@Service
public class ResilientVectorStoreService {
private final VectorStore vectorStore;
private final CircuitBreaker circuitBreaker;
public ResilientVectorStoreService(VectorStore vectorStore) {
this.vectorStore = vectorStore;
this.circuitBreaker = CircuitBreaker.of("vectorStore",
CircuitBreakerConfig.custom()
.failureRateThreshold(50)
.waitDurationInOpenState(Duration.ofSeconds(30))
.build()
);
}
public List<Document> searchWithCircuitBreaker(SearchRequest request) {
return circuitBreaker.executeSupplier(() ->
vectorStore.similaritySearch(request)
);
}
}@Test
void testEmptyResults() {
VectorStore store = SimpleVectorStore.builder(embeddingModel).build();
SearchRequest request = SearchRequest.builder()
.query("nonexistent query")
.filterExpression("category == 'nonexistent'")
.build();
List<Document> results = store.similaritySearch(request);
assertThat(results).isEmpty();
}
@Test
void testDuplicateIds() {
VectorStore store = SimpleVectorStore.builder(embeddingModel).build();
Document doc1 = new Document("id-1", "content 1", Map.of());
Document doc2 = new Document("id-1", "content 2", Map.of());
store.add(List.of(doc1));
assertThrows(RuntimeException.class, () -> {
store.add(List.of(doc2));
});
}
@Test
void testInvalidFilter() {
FilterExpressionTextParser parser = new FilterExpressionTextParser();
assertThrows(FilterExpressionParseException.class, () -> {
parser.parse("invalid filter syntax <<<");
});
}Install with Tessl CLI
npx tessl i tessl/maven-org-springframework-ai--spring-ai-vector-store