tessl/pypi-langchain-tests

Standard tests for LangChain implementations

—

Pending

Overview

Eval results

Files

Vector Store Testing

Name: tessl/pypi-langchain-tests
Author: tessl

Specialized testing suite for vector store implementations with comprehensive CRUD operations, similarity search, async support, and bulk operations testing. The vector store tests cover all aspects of vector database functionality including document storage, retrieval, deletion, and metadata handling.

Capabilities

Vector Store Integration Tests

Comprehensive integration testing for vector stores with 30+ test methods covering all vector store operations.

from langchain_tests.integration_tests import VectorStoreIntegrationTests

class VectorStoreIntegrationTests(BaseStandardTests):
    """Integration tests for vector stores with comprehensive CRUD operations."""
    
    # Configuration properties
    @property
    def has_sync(self) -> bool:
        """Whether the vector store supports synchronous operations. Default: True."""
    
    @property
    def has_async(self) -> bool:
        """Whether the vector store supports asynchronous operations. Default: False."""
    
    # Utility methods
    def get_embeddings(self):
        """Returns deterministic fake embeddings for consistent testing."""
    
    # Basic state tests
    def test_vectorstore_is_empty(self) -> None:
        """Verify that the vector store starts empty."""
    
    def test_vectorstore_still_empty(self) -> None:
        """Verify that the vector store is properly cleaned up after tests."""
    
    # Document addition tests
    def test_add_documents(self) -> None:
        """Test adding documents to the vector store."""
    
    def test_add_documents_with_ids_is_idempotent(self) -> None:
        """Test that adding documents with same IDs is idempotent."""
    
    def test_add_documents_by_id_with_mutation(self) -> None:
        """Test adding documents with ID-based mutations."""
    
    # Document deletion tests
    def test_deleting_documents(self) -> None:
        """Test deleting individual documents from the vector store."""
    
    def test_deleting_bulk_documents(self) -> None:
        """Test bulk deletion of multiple documents."""
    
    def test_delete_missing_content(self) -> None:
        """Test deletion behavior when content doesn't exist."""
    
    # Document retrieval tests
    def test_get_by_ids(self) -> None:
        """Test retrieving documents by their IDs."""
    
    def test_get_by_ids_missing(self) -> None:
        """Test behavior when retrieving non-existent document IDs."""
    
    # Similarity search tests
    def test_similarity_search(self) -> None:
        """Test similarity search functionality."""
    
    def test_similarity_search_with_score(self) -> None:
        """Test similarity search with relevance scores."""
    
    def test_similarity_search_with_score_threshold(self) -> None:
        """Test similarity search with score threshold filtering."""
    
    def test_similarity_search_by_vector(self) -> None:
        """Test similarity search using vector embeddings directly."""
    
    def test_similarity_search_by_vector_with_score(self) -> None:
        """Test vector-based similarity search with scores."""
    
    # Metadata filtering tests
    def test_similarity_search_with_filter(self) -> None:
        """Test similarity search with metadata filtering."""
    
    def test_similarity_search_with_complex_filter(self) -> None:
        """Test similarity search with complex metadata filters."""
    
    # Async operation tests (if has_async=True)
    def test_aadd_documents(self) -> None:
        """Test asynchronous document addition."""
    
    def test_adelete_documents(self) -> None:
        """Test asynchronous document deletion."""
    
    def test_aget_by_ids(self) -> None:
        """Test asynchronous document retrieval by IDs."""
    
    def test_asimilarity_search(self) -> None:
        """Test asynchronous similarity search."""
    
    def test_asimilarity_search_with_score(self) -> None:
        """Test asynchronous similarity search with scores."""
    
    # Max marginal relevance tests
    def test_max_marginal_relevance_search(self) -> None:
        """Test max marginal relevance search for diverse results."""
    
    def test_max_marginal_relevance_search_by_vector(self) -> None:
        """Test MMR search using vector embeddings directly."""
    
    # Async MMR tests (if has_async=True)
    def test_amax_marginal_relevance_search(self) -> None:
        """Test asynchronous max marginal relevance search."""
    
    def test_amax_marginal_relevance_search_by_vector(self) -> None:
        """Test async MMR search using vector embeddings."""

Usage Example

import pytest
from langchain_tests.integration_tests import VectorStoreIntegrationTests
from my_integration import MyVectorStore

class TestMyVectorStore(VectorStoreIntegrationTests):
    @pytest.fixture
    def vectorstore(self):
        # Create a fresh vector store instance for each test
        store = MyVectorStore(
            connection_url="postgresql://user:pass@localhost/testdb",
            collection_name="test_collection"
        )
        yield store
        # Cleanup after test
        store.delete_collection()
    
    @property
    def has_sync(self):
        return True  # Your vector store supports sync operations
    
    @property
    def has_async(self):
        return True  # Your vector store also supports async operations

Test Embeddings Utility

The framework provides a deterministic embeddings utility for consistent testing:

def get_embeddings(self):
    """
    Returns deterministic fake embeddings for consistent testing.
    
    Returns:
        FakeEmbeddings: Embeddings instance that generates consistent
                       vectors for the same input text
    """

FakeEmbeddings Implementation

class FakeEmbeddings:
    """Deterministic embeddings for testing purposes."""
    
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Generate embeddings for a list of documents."""
    
    def embed_query(self, text: str) -> List[float]:
        """Generate embedding for a single query."""
    
    async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
        """Async version of embed_documents."""
    
    async def aembed_query(self, text: str) -> List[float]:
        """Async version of embed_query."""

Test Constants

EMBEDDING_SIZE = 6  # Standard embedding dimension for vector store tests

Document Fixtures

The framework provides standard document fixtures for consistent testing:

def get_test_documents():
    """
    Returns a list of test documents with metadata.
    
    Returns:
        List[Document]: Standard test documents with varied content and metadata
    """

Document Structure

from langchain_core.documents import Document

# Example test documents
documents = [
    Document(
        page_content="This is a test document about machine learning.",
        metadata={"category": "AI", "difficulty": "beginner"}
    ),
    Document(
        page_content="Advanced neural network architectures and training.",
        metadata={"category": "AI", "difficulty": "advanced"}
    ),
    Document(
        page_content="Introduction to vector databases and similarity search.",
        metadata={"category": "databases", "difficulty": "intermediate"}
    )
]

Async Testing Patterns

For vector stores that support async operations, the framework provides comprehensive async testing:

Async Test Example

class TestAsyncVectorStore(VectorStoreIntegrationTests):
    @property
    def has_async(self):
        return True
    
    @pytest.fixture
    async def vectorstore(self):
        store = await MyAsyncVectorStore.create(
            connection_string="async://localhost/testdb"
        )
        yield store
        await store.close()

Error Handling Tests

The framework validates proper error handling for common vector store failures:

def test_add_documents_invalid_embedding_dimension(self):
    """Test handling of invalid embedding dimensions."""

def test_similarity_search_invalid_query(self):
    """Test handling of invalid query parameters."""

def test_delete_nonexistent_documents(self):
    """Test deletion of documents that don't exist."""

Metadata Filtering

Comprehensive testing for metadata-based filtering:

Filter Types

# Simple equality filter
filter_dict = {"category": "AI"}

# Complex filter with multiple conditions
complex_filter = {
    "category": {"$in": ["AI", "databases"]},
    "difficulty": {"$ne": "beginner"}
}

# Range filter for numeric metadata
range_filter = {
    "score": {"$gte": 0.8, "$lte": 1.0}
}

Performance Considerations

Vector store tests include performance considerations:

Bulk Operations: Test performance with large document batches
Search Performance: Benchmark similarity search with various k values
Memory Usage: Monitor memory consumption during large operations
Connection Management: Test connection pooling and cleanup

Cleanup and Isolation

The framework ensures proper test isolation:

@pytest.fixture
def vectorstore(self):
    """Vector store fixture with proper cleanup."""
    store = MyVectorStore(collection_name=f"test_{uuid.uuid4()}")
    yield store
    # Ensure complete cleanup
    store.delete_collection()
    store.close()

Collection Management

For vector stores that support multiple collections:

def test_create_collection(self) -> None:
    """Test collection creation."""

def test_delete_collection(self) -> None:
    """Test collection deletion."""

def test_list_collections(self) -> None:
    """Test listing available collections."""

Indexing and Optimization

Tests for vector store optimization features:

def test_create_index(self) -> None:
    """Test index creation for performance optimization."""

def test_optimize_collection(self) -> None:
    """Test collection optimization operations."""

The vector store testing framework provides comprehensive coverage of all vector database operations, ensuring that implementations correctly handle document storage, similarity search, metadata filtering, and async operations while maintaining data consistency and performance standards.

Install with Tessl CLI