CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-vector-store

Common vector store functionality for Spring AI providing a portable abstraction layer for integrating vector databases with comprehensive filtering, similarity search, and observability support.

Overview
Eval results
Files

edge-cases.mddocs/examples/

Edge Cases and Error Handling

Learn how to handle errors, edge cases, and exceptional scenarios in Spring AI Vector Store.

Error Handling Patterns

Pattern 1: Duplicate Document IDs

Some vector stores throw errors when adding documents with duplicate IDs.

try {
    vectorStore.add(documentsWithDuplicateIds);
} catch (RuntimeException e) {
    logger.error("Duplicate document IDs detected", e);
    
    // Strategy 1: Generate new IDs
    List<Document> withNewIds = documentsWithDuplicateIds.stream()
        .map(doc -> new Document(
            UUID.randomUUID().toString(),
            doc.getContent(),
            doc.getMetadata()
        ))
        .toList();
    vectorStore.add(withNewIds);
    
    // Strategy 2: Update existing documents
    vectorStore.delete(existingIds);
    vectorStore.add(documentsWithDuplicateIds);
}

Pattern 2: Empty Search Results

Handle cases where no documents match the query or filters.

public List<Document> searchWithFallback(String query, String filter) {
    SearchRequest strictRequest = SearchRequest.builder()
        .query(query)
        .filterExpression(filter)
        .similarityThreshold(0.8)
        .topK(10)
        .build();
    
    List<Document> results = vectorStore.similaritySearch(strictRequest);
    
    if (results.isEmpty()) {
        logger.warn("No results found with strict criteria, relaxing...");
        
        // Strategy 1: Lower similarity threshold
        SearchRequest relaxedRequest = SearchRequest.builder()
            .query(query)
            .filterExpression(filter)
            .similarityThreshold(0.6)  // Lower threshold
            .topK(20)  // More results
            .build();
        
        results = vectorStore.similaritySearch(relaxedRequest);
        
        if (results.isEmpty()) {
            // Strategy 2: Remove filters
            logger.warn("Still no results, removing filters...");
            results = vectorStore.similaritySearch(
                SearchRequest.builder()
                    .query(query)
                    .topK(10)
                    .build()
            );
        }
    }
    
    return results;
}

Pattern 3: Filter Expression Parse Errors

Handle invalid filter syntax from user input.

import org.springframework.ai.vectorstore.filter.FilterExpressionTextParser;
import org.springframework.ai.vectorstore.filter.FilterExpressionTextParser.FilterExpressionParseException;

public Filter.Expression parseFilterSafely(String filterText) {
    if (filterText == null || filterText.isBlank()) {
        return null;
    }
    
    FilterExpressionTextParser parser = new FilterExpressionTextParser();
    
    try {
        return parser.parse(filterText);
    } catch (FilterExpressionParseException e) {
        logger.error("Invalid filter syntax: {}", filterText, e);
        
        // Strategy 1: Return null (no filter)
        return null;
        
        // Strategy 2: Use a default filter
        // return new FilterExpressionBuilder().eq("status", "active").build();
        
        // Strategy 3: Throw a user-friendly exception
        // throw new InvalidFilterException("Invalid filter syntax: " + e.getMessage());
    }
}

Pattern 4: Unsupported Operations

Handle operations not supported by all vector stores.

public void deleteByFilter(String filter) {
    try {
        vectorStore.delete(filter);
    } catch (UnsupportedOperationException e) {
        logger.warn("Filter-based deletion not supported, falling back to ID-based deletion");
        
        // Fall back to searching and deleting by ID
        SearchRequest request = SearchRequest.builder()
            .query("*")  // Match all
            .filterExpression(filter)
            .topK(1000)  // Batch size
            .build();
        
        List<Document> documentsToDelete = vectorStore.similaritySearch(request);
        
        if (!documentsToDelete.isEmpty()) {
            List<String> idsToDelete = documentsToDelete.stream()
                .map(Document::getId)
                .toList();
            vectorStore.delete(idsToDelete);
            logger.info("Deleted {} documents using ID-based fallback", idsToDelete.size());
        }
    }
}

Pattern 5: I/O Errors (SimpleVectorStore)

Handle file I/O errors when loading/saving state.

public void saveWithRetry(SimpleVectorStore store, File file, int maxRetries) {
    int attempts = 0;
    
    while (attempts < maxRetries) {
        try {
            store.save(file);
            logger.info("Vector store saved successfully");
            return;
        } catch (IOException e) {
            attempts++;
            logger.error("Failed to save vector store (attempt {}/{})", 
                attempts, maxRetries, e);
            
            if (attempts >= maxRetries) {
                // Last attempt failed
                logger.error("All save attempts failed, data may be lost!");
                
                // Strategy 1: Save to backup location
                try {
                    File backup = new File(file.getParent(), "vectorstore-backup.json");
                    store.save(backup);
                    logger.info("Saved to backup location: {}", backup);
                } catch (IOException backupError) {
                    logger.error("Backup save also failed", backupError);
                }
                
                throw new RuntimeException("Failed to save vector store after " + 
                    maxRetries + " attempts", e);
            }
            
            // Wait before retry
            try {
                Thread.sleep(1000 * attempts);  // Exponential backoff
            } catch (InterruptedException ie) {
                Thread.currentThread().interrupt();
            }
        }
    }
}

public void loadWithFallback(SimpleVectorStore store, File primaryFile, File backupFile) {
    try {
        store.load(primaryFile);
        logger.info("Loaded vector store from primary file");
    } catch (IOException e) {
        logger.warn("Failed to load from primary file, trying backup", e);
        
        try {
            store.load(backupFile);
            logger.info("Loaded vector store from backup file");
        } catch (IOException backupError) {
            logger.error("Failed to load from backup file, starting with empty store", backupError);
            // Continue with empty store
        }
    }
}

Edge Cases

Edge Case 1: Null or Empty Metadata

Handle documents with missing or null metadata fields.

public void addDocumentSafely(String content, Map<String, Object> metadata) {
    // Ensure metadata is not null
    Map<String, Object> safeMetadata = metadata != null ? 
        new HashMap<>(metadata) : new HashMap<>();
    
    // Add default values
    safeMetadata.putIfAbsent("createdAt", Instant.now().toString());
    safeMetadata.putIfAbsent("status", "active");
    
    // Remove null values to avoid issues
    safeMetadata.entrySet().removeIf(entry -> entry.getValue() == null);
    
    Document doc = new Document(content, safeMetadata);
    vectorStore.add(List.of(doc));
}

// When filtering, handle potential nulls
public List<Document> searchWithNullSafeFilter(String query, String category) {
    String filter = category != null ? 
        "category == '" + category + "'" : 
        "category IS NOT NULL";  // Only return docs with category set
    
    return vectorStore.similaritySearch(
        SearchRequest.builder()
            .query(query)
            .filterExpression(filter)
            .build()
    );
}

Edge Case 2: Empty Document List

Validate inputs before operations.

public void addDocuments(List<Document> documents) {
    if (documents == null || documents.isEmpty()) {
        logger.warn("Attempted to add null or empty document list");
        return;  // No-op
    }
    
    // Filter out documents with empty content
    List<Document> validDocuments = documents.stream()
        .filter(doc -> doc.getContent() != null && !doc.getContent().isBlank())
        .toList();
    
    if (validDocuments.isEmpty()) {
        logger.warn("All documents had empty content, nothing to add");
        return;
    }
    
    if (validDocuments.size() < documents.size()) {
        logger.warn("Filtered out {} documents with empty content", 
            documents.size() - validDocuments.size());
    }
    
    vectorStore.add(validDocuments);
}

Edge Case 3: Very Long Content

Handle documents that exceed embedding model limits.

public void addLongDocument(String content, Map<String, Object> metadata) {
    final int MAX_TOKENS = 8000;  // Example limit
    
    if (estimateTokens(content) > MAX_TOKENS) {
        logger.warn("Document exceeds token limit, chunking...");
        
        // Strategy 1: Chunk the document
        List<String> chunks = chunkContent(content, MAX_TOKENS);
        
        for (int i = 0; i < chunks.size(); i++) {
            Map<String, Object> chunkMetadata = new HashMap<>(metadata);
            chunkMetadata.put("chunkIndex", i);
            chunkMetadata.put("totalChunks", chunks.size());
            
            Document chunk = new Document(chunks.get(i), chunkMetadata);
            vectorStore.add(List.of(chunk));
        }
        
        return;
    }
    
    // Normal case
    Document doc = new Document(content, metadata);
    vectorStore.add(List.of(doc));
}

private int estimateTokens(String text) {
    // Rough estimation: 1 token ≈ 4 characters
    return text.length() / 4;
}

private List<String> chunkContent(String content, int maxTokens) {
    List<String> chunks = new ArrayList<>();
    int chunkSize = maxTokens * 4;  // Convert tokens to characters
    
    for (int i = 0; i < content.length(); i += chunkSize) {
        int end = Math.min(i + chunkSize, content.length());
        chunks.add(content.substring(i, end));
    }
    
    return chunks;
}

Edge Case 4: Embedding Generation Failures

Handle failures during embedding generation.

public void addDocumentsWithRetry(List<Document> documents) {
    for (Document doc : documents) {
        int maxRetries = 3;
        int attempt = 0;
        
        while (attempt < maxRetries) {
            try {
                vectorStore.add(List.of(doc));
                break;  // Success
            } catch (Exception e) {
                attempt++;
                logger.error("Failed to add document (attempt {}/{}): {}", 
                    attempt, maxRetries, doc.getId(), e);
                
                if (attempt >= maxRetries) {
                    // Log failure and continue with next document
                    logger.error("Failed to add document after {} attempts: {}", 
                        maxRetries, doc.getId());
                    // Optionally: Store failed document for later processing
                    storeFailedDocument(doc, e);
                } else {
                    // Wait before retry
                    try {
                        Thread.sleep(1000 * attempt);
                    } catch (InterruptedException ie) {
                        Thread.currentThread().interrupt();
                        break;
                    }
                }
            }
        }
    }
}

private void storeFailedDocument(Document doc, Exception error) {
    // Store for later retry or manual review
    logger.error("Storing failed document for manual review: {}", doc.getId());
    // Implementation: write to file, database, or queue
}

Edge Case 5: Concurrent Modifications

Handle concurrent access to vector stores.

@Service
public class ConcurrentSafeVectorStoreService {
    
    private final VectorStore vectorStore;
    private final ReentrantLock lock = new ReentrantLock();
    
    public void addDocumentsSafely(List<Document> documents) {
        lock.lock();
        try {
            vectorStore.add(documents);
        } finally {
            lock.unlock();
        }
    }
    
    public List<Document> searchSafely(SearchRequest request) {
        // Reads can be concurrent for most vector stores
        return vectorStore.similaritySearch(request);
    }
    
    // For SimpleVectorStore with file persistence
    public void saveStateSafely(SimpleVectorStore store, File file) {
        lock.lock();
        try {
            store.save(file);
        } finally {
            lock.unlock();
        }
    }
}

Edge Case 6: Type Mismatches in Filters

Ensure metadata types match filter expectations.

public void addDocumentWithTypeSafety(
        String content,
        String category,
        int year,
        double rating,
        boolean featured) {
    
    // Ensure correct types
    Map<String, Object> metadata = Map.of(
        "category", category,           // String
        "year", Integer.valueOf(year),  // Integer (not String)
        "rating", Double.valueOf(rating), // Double (not String)
        "featured", Boolean.valueOf(featured) // Boolean (not String)
    );
    
    Document doc = new Document(content, metadata);
    vectorStore.add(List.of(doc));
}

// Validate filter will work with metadata types
public boolean validateFilter(String filterExpression) {
    try {
        FilterExpressionTextParser parser = new FilterExpressionTextParser();
        Filter.Expression expr = parser.parse(filterExpression);
        
        // Additional validation: check if fields exist in schema
        // Implementation depends on your schema management
        
        return true;
    } catch (FilterExpressionParseException e) {
        logger.error("Invalid filter expression: {}", filterExpression, e);
        return false;
    }
}

Edge Case 7: Memory Constraints (SimpleVectorStore)

Monitor and handle memory limits for in-memory stores.

@Service
public class MemoryAwareVectorStoreService {
    
    private final SimpleVectorStore vectorStore;
    private final int maxDocuments = 10000;
    private final AtomicInteger documentCount = new AtomicInteger(0);
    
    public void addDocumentWithCapacityCheck(Document doc) {
        if (documentCount.get() >= maxDocuments) {
            logger.warn("Vector store at capacity, triggering cleanup");
            
            // Strategy 1: Remove oldest documents
            removeOldestDocuments(1000);
            
            // Strategy 2: Archive to disk
            archiveAndClear();
            
            // Strategy 3: Switch to persistent store
            migrateToPersistentStore();
        }
        
        vectorStore.add(List.of(doc));
        documentCount.incrementAndGet();
    }
    
    private void removeOldestDocuments(int count) {
        // Implementation: Query for oldest documents and delete them
        SearchRequest request = SearchRequest.builder()
            .query("*")
            .topK(count)
            .filterExpression("1 == 1")  // Match all
            .build();
        
        List<Document> oldest = vectorStore.similaritySearch(request);
        List<String> idsToDelete = oldest.stream()
            .sorted((a, b) -> {
                String dateA = (String) a.getMetadata().get("createdAt");
                String dateB = (String) b.getMetadata().get("createdAt");
                return dateA.compareTo(dateB);
            })
            .limit(count)
            .map(Document::getId)
            .toList();
        
        vectorStore.delete(idsToDelete);
        documentCount.addAndGet(-idsToDelete.size());
    }
    
    private void archiveAndClear() {
        // Save current state and clear
        try {
            File archive = new File("archive-" + System.currentTimeMillis() + ".json");
            vectorStore.save(archive);
            logger.info("Archived to {}", archive);
            
            // Clear by creating new instance
            // Note: This depends on your setup
        } catch (IOException e) {
            logger.error("Failed to archive", e);
        }
    }
    
    private void migrateToPersistentStore() {
        // Switch to production-grade vector store
        logger.warn("Memory limit reached, consider migrating to persistent store");
        // Implementation: Migrate data to Pinecone, PgVector, etc.
    }
}

Edge Case 8: Invalid UTF-8 Characters

Handle documents with encoding issues.

public void addDocumentWithSanitization(String content, Map<String, Object> metadata) {
    // Remove or replace invalid UTF-8 characters
    String sanitizedContent = content
        .replaceAll("[^\\x00-\\x7F]", "")  // Remove non-ASCII
        .replaceAll("\\p{C}", "");          // Remove control characters
    
    if (sanitizedContent.isBlank()) {
        logger.warn("Document content became empty after sanitization");
        return;
    }
    
    // Sanitize metadata values
    Map<String, Object> sanitizedMetadata = metadata.entrySet().stream()
        .filter(entry -> entry.getValue() != null)
        .collect(Collectors.toMap(
            Map.Entry::getKey,
            entry -> {
                Object value = entry.getValue();
                if (value instanceof String str) {
                    return str.replaceAll("[^\\x00-\\x7F]", "");
                }
                return value;
            }
        ));
    
    Document doc = new Document(sanitizedContent, sanitizedMetadata);
    vectorStore.add(List.of(doc));
}

Defensive Programming

Input Validation

public class VectorStoreValidator {
    
    public static void validateDocument(Document doc) {
        if (doc == null) {
            throw new IllegalArgumentException("Document cannot be null");
        }
        if (doc.getContent() == null || doc.getContent().isBlank()) {
            throw new IllegalArgumentException("Document content cannot be empty");
        }
        if (doc.getMetadata() == null) {
            throw new IllegalArgumentException("Document metadata cannot be null");
        }
    }
    
    public static void validateSearchRequest(SearchRequest request) {
        if (request == null) {
            throw new IllegalArgumentException("SearchRequest cannot be null");
        }
        if (request.getQuery() == null || request.getQuery().isBlank()) {
            throw new IllegalArgumentException("Query cannot be empty");
        }
        if (request.getTopK() < 0) {
            throw new IllegalArgumentException("TopK must be >= 0");
        }
        if (request.getSimilarityThreshold() < 0.0 || 
            request.getSimilarityThreshold() > 1.0) {
            throw new IllegalArgumentException(
                "Similarity threshold must be in range [0.0, 1.0]");
        }
    }
}

Circuit Breaker Pattern

@Service
public class ResilientVectorStoreService {
    
    private final VectorStore vectorStore;
    private final CircuitBreaker circuitBreaker;
    
    public ResilientVectorStoreService(VectorStore vectorStore) {
        this.vectorStore = vectorStore;
        this.circuitBreaker = CircuitBreaker.of("vectorStore", 
            CircuitBreakerConfig.custom()
                .failureRateThreshold(50)
                .waitDurationInOpenState(Duration.ofSeconds(30))
                .build()
        );
    }
    
    public List<Document> searchWithCircuitBreaker(SearchRequest request) {
        return circuitBreaker.executeSupplier(() -> 
            vectorStore.similaritySearch(request)
        );
    }
}

Testing Edge Cases

@Test
void testEmptyResults() {
    VectorStore store = SimpleVectorStore.builder(embeddingModel).build();
    
    SearchRequest request = SearchRequest.builder()
        .query("nonexistent query")
        .filterExpression("category == 'nonexistent'")
        .build();
    
    List<Document> results = store.similaritySearch(request);
    
    assertThat(results).isEmpty();
}

@Test
void testDuplicateIds() {
    VectorStore store = SimpleVectorStore.builder(embeddingModel).build();
    
    Document doc1 = new Document("id-1", "content 1", Map.of());
    Document doc2 = new Document("id-1", "content 2", Map.of());
    
    store.add(List.of(doc1));
    
    assertThrows(RuntimeException.class, () -> {
        store.add(List.of(doc2));
    });
}

@Test
void testInvalidFilter() {
    FilterExpressionTextParser parser = new FilterExpressionTextParser();
    
    assertThrows(FilterExpressionParseException.class, () -> {
        parser.parse("invalid filter syntax <<<");
    });
}

Reference

  • Quick Start Guide - Getting started
  • Real-World Scenarios - Common patterns
  • API Reference - Complete API

Install with Tessl CLI

npx tessl i tessl/maven-org-springframework-ai--spring-ai-vector-store@1.1.0

docs

examples

edge-cases.md

real-world-scenarios.md

index.md

tile.json