Standard tests for LangChain implementations
—
Specialized testing suite for vector store implementations with comprehensive CRUD operations, similarity search, async support, and bulk operations testing. The vector store tests cover all aspects of vector database functionality including document storage, retrieval, deletion, and metadata handling.
Comprehensive integration testing for vector stores with 30+ test methods covering all vector store operations.
from langchain_tests.integration_tests import VectorStoreIntegrationTests
class VectorStoreIntegrationTests(BaseStandardTests):
"""Integration tests for vector stores with comprehensive CRUD operations."""
# Configuration properties
@property
def has_sync(self) -> bool:
"""Whether the vector store supports synchronous operations. Default: True."""
@property
def has_async(self) -> bool:
"""Whether the vector store supports asynchronous operations. Default: False."""
# Utility methods
def get_embeddings(self):
"""Returns deterministic fake embeddings for consistent testing."""
# Basic state tests
def test_vectorstore_is_empty(self) -> None:
"""Verify that the vector store starts empty."""
def test_vectorstore_still_empty(self) -> None:
"""Verify that the vector store is properly cleaned up after tests."""
# Document addition tests
def test_add_documents(self) -> None:
"""Test adding documents to the vector store."""
def test_add_documents_with_ids_is_idempotent(self) -> None:
"""Test that adding documents with same IDs is idempotent."""
def test_add_documents_by_id_with_mutation(self) -> None:
"""Test adding documents with ID-based mutations."""
# Document deletion tests
def test_deleting_documents(self) -> None:
"""Test deleting individual documents from the vector store."""
def test_deleting_bulk_documents(self) -> None:
"""Test bulk deletion of multiple documents."""
def test_delete_missing_content(self) -> None:
"""Test deletion behavior when content doesn't exist."""
# Document retrieval tests
def test_get_by_ids(self) -> None:
"""Test retrieving documents by their IDs."""
def test_get_by_ids_missing(self) -> None:
"""Test behavior when retrieving non-existent document IDs."""
# Similarity search tests
def test_similarity_search(self) -> None:
"""Test similarity search functionality."""
def test_similarity_search_with_score(self) -> None:
"""Test similarity search with relevance scores."""
def test_similarity_search_with_score_threshold(self) -> None:
"""Test similarity search with score threshold filtering."""
def test_similarity_search_by_vector(self) -> None:
"""Test similarity search using vector embeddings directly."""
def test_similarity_search_by_vector_with_score(self) -> None:
"""Test vector-based similarity search with scores."""
# Metadata filtering tests
def test_similarity_search_with_filter(self) -> None:
"""Test similarity search with metadata filtering."""
def test_similarity_search_with_complex_filter(self) -> None:
"""Test similarity search with complex metadata filters."""
# Async operation tests (if has_async=True)
def test_aadd_documents(self) -> None:
"""Test asynchronous document addition."""
def test_adelete_documents(self) -> None:
"""Test asynchronous document deletion."""
def test_aget_by_ids(self) -> None:
"""Test asynchronous document retrieval by IDs."""
def test_asimilarity_search(self) -> None:
"""Test asynchronous similarity search."""
def test_asimilarity_search_with_score(self) -> None:
"""Test asynchronous similarity search with scores."""
# Max marginal relevance tests
def test_max_marginal_relevance_search(self) -> None:
"""Test max marginal relevance search for diverse results."""
def test_max_marginal_relevance_search_by_vector(self) -> None:
"""Test MMR search using vector embeddings directly."""
# Async MMR tests (if has_async=True)
def test_amax_marginal_relevance_search(self) -> None:
"""Test asynchronous max marginal relevance search."""
def test_amax_marginal_relevance_search_by_vector(self) -> None:
"""Test async MMR search using vector embeddings."""import pytest
from langchain_tests.integration_tests import VectorStoreIntegrationTests
from my_integration import MyVectorStore
class TestMyVectorStore(VectorStoreIntegrationTests):
@pytest.fixture
def vectorstore(self):
# Create a fresh vector store instance for each test
store = MyVectorStore(
connection_url="postgresql://user:pass@localhost/testdb",
collection_name="test_collection"
)
yield store
# Cleanup after test
store.delete_collection()
@property
def has_sync(self):
return True # Your vector store supports sync operations
@property
def has_async(self):
return True # Your vector store also supports async operationsThe framework provides a deterministic embeddings utility for consistent testing:
def get_embeddings(self):
"""
Returns deterministic fake embeddings for consistent testing.
Returns:
FakeEmbeddings: Embeddings instance that generates consistent
vectors for the same input text
"""class FakeEmbeddings:
"""Deterministic embeddings for testing purposes."""
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Generate embeddings for a list of documents."""
def embed_query(self, text: str) -> List[float]:
"""Generate embedding for a single query."""
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
"""Async version of embed_documents."""
async def aembed_query(self, text: str) -> List[float]:
"""Async version of embed_query."""EMBEDDING_SIZE = 6 # Standard embedding dimension for vector store testsThe framework provides standard document fixtures for consistent testing:
def get_test_documents():
"""
Returns a list of test documents with metadata.
Returns:
List[Document]: Standard test documents with varied content and metadata
"""from langchain_core.documents import Document
# Example test documents
documents = [
Document(
page_content="This is a test document about machine learning.",
metadata={"category": "AI", "difficulty": "beginner"}
),
Document(
page_content="Advanced neural network architectures and training.",
metadata={"category": "AI", "difficulty": "advanced"}
),
Document(
page_content="Introduction to vector databases and similarity search.",
metadata={"category": "databases", "difficulty": "intermediate"}
)
]For vector stores that support async operations, the framework provides comprehensive async testing:
class TestAsyncVectorStore(VectorStoreIntegrationTests):
@property
def has_async(self):
return True
@pytest.fixture
async def vectorstore(self):
store = await MyAsyncVectorStore.create(
connection_string="async://localhost/testdb"
)
yield store
await store.close()The framework validates proper error handling for common vector store failures:
def test_add_documents_invalid_embedding_dimension(self):
"""Test handling of invalid embedding dimensions."""
def test_similarity_search_invalid_query(self):
"""Test handling of invalid query parameters."""
def test_delete_nonexistent_documents(self):
"""Test deletion of documents that don't exist."""Comprehensive testing for metadata-based filtering:
# Simple equality filter
filter_dict = {"category": "AI"}
# Complex filter with multiple conditions
complex_filter = {
"category": {"$in": ["AI", "databases"]},
"difficulty": {"$ne": "beginner"}
}
# Range filter for numeric metadata
range_filter = {
"score": {"$gte": 0.8, "$lte": 1.0}
}Vector store tests include performance considerations:
The framework ensures proper test isolation:
@pytest.fixture
def vectorstore(self):
"""Vector store fixture with proper cleanup."""
store = MyVectorStore(collection_name=f"test_{uuid.uuid4()}")
yield store
# Ensure complete cleanup
store.delete_collection()
store.close()For vector stores that support multiple collections:
def test_create_collection(self) -> None:
"""Test collection creation."""
def test_delete_collection(self) -> None:
"""Test collection deletion."""
def test_list_collections(self) -> None:
"""Test listing available collections."""Tests for vector store optimization features:
def test_create_index(self) -> None:
"""Test index creation for performance optimization."""
def test_optimize_collection(self) -> None:
"""Test collection optimization operations."""The vector store testing framework provides comprehensive coverage of all vector database operations, ensuring that implementations correctly handle document storage, similarity search, metadata filtering, and async operations while maintaining data consistency and performance standards.
Install with Tessl CLI
npx tessl i tessl/pypi-langchain-tests