tessl/pypi-langchain-tests

Standard tests for LangChain implementations

—

Pending

Overview

Eval results

Files

Integration Testing

Name: tessl/pypi-langchain-tests
Author: tessl

Comprehensive test classes for full functionality verification including real API calls, streaming, tool calling, structured output, and multimodal inputs. Integration tests verify complete feature sets and real-world usage patterns with external services.

Capabilities

Chat Model Integration Tests

Comprehensive integration testing for chat models with 40+ test methods covering all aspects of chat model functionality.

from langchain_tests.integration_tests import ChatModelIntegrationTests

class ChatModelIntegrationTests(ChatModelTests):
    """Integration tests for chat models with comprehensive functionality testing."""
    
    # Inherits all configuration from ChatModelTests
    
    # Basic invocation tests
    def test_invoke(self) -> None:
        """Test basic model invocation with simple prompts."""
    
    def test_ainvoke(self) -> None:
        """Test asynchronous model invocation."""
    
    # Streaming tests
    def test_stream(self) -> None:
        """Test streaming responses from the model."""
    
    def test_astream(self) -> None:
        """Test asynchronous streaming responses."""
    
    # Batch processing tests
    def test_batch(self) -> None:
        """Test batch processing of multiple prompts."""
    
    def test_abatch(self) -> None:
        """Test asynchronous batch processing."""
    
    # Conversation tests
    def test_conversation(self) -> None:
        """Test multi-turn conversation handling."""
    
    def test_double_messages_conversation(self) -> None:
        """Test sequential message handling in conversations."""
    
    # Usage metadata tests
    def test_usage_metadata(self) -> None:
        """Test usage metadata tracking and validation."""
    
    def test_usage_metadata_streaming(self) -> None:
        """Test usage metadata in streaming responses."""
    
    # Stop sequence tests
    def test_stop_sequence(self) -> None:
        """Test stop sequence functionality."""
    
    # Tool calling tests (if has_tool_calling=True)
    def test_tool_calling(self) -> None:
        """Test tool calling functionality."""
    
    def test_tool_calling_async(self) -> None:
        """Test asynchronous tool calling."""
    
    def test_bind_runnables_as_tools(self) -> None:
        """Test binding runnable objects as tools."""
    
    def test_tool_message_histories_string_content(self) -> None:
        """Test tool message histories with string content."""
    
    def test_tool_message_histories_list_content(self) -> None:
        """Test tool message histories with complex list content."""
    
    def test_tool_choice(self) -> None:
        """Test tool choice functionality."""
    
    def test_tool_calling_with_no_arguments(self) -> None:
        """Test tool calling with tools that take no arguments."""
    
    def test_tool_message_error_status(self) -> None:
        """Test error handling in tool messages."""
    
    # Structured output tests (if has_structured_output=True)
    def test_structured_few_shot_examples(self) -> None:
        """Test structured output with few-shot examples."""
    
    def test_structured_output(self) -> None:
        """Test structured output generation."""
    
    def test_structured_output_async(self) -> None:
        """Test asynchronous structured output generation."""
    
    def test_structured_output_pydantic_2_v1(self) -> None:
        """Test Pydantic V1 compatibility in structured output."""
    
    def test_structured_output_optional_param(self) -> None:
        """Test structured output with optional parameters."""
    
    # JSON mode tests (if supports_json_mode=True)
    def test_json_mode(self) -> None:
        """Test JSON mode functionality."""
    
    # Multimodal input tests (if corresponding support flags=True)
    def test_pdf_inputs(self) -> None:
        """Test PDF input handling."""
    
    def test_audio_inputs(self) -> None:
        """Test audio input handling."""
    
    def test_image_inputs(self) -> None:
        """Test image input handling."""
    
    def test_image_tool_message(self) -> None:
        """Test image content in tool messages."""
    
    def test_anthropic_inputs(self) -> None:
        """Test Anthropic-style input format handling."""
    
    # Message handling tests
    def test_message_with_name(self) -> None:
        """Test messages with name attributes."""
    
    # Advanced functionality tests
    def test_agent_loop(self) -> None:
        """Test agent loop functionality with tool calling."""
    
    def test_unicode_tool_call_integration(self) -> None:
        """Test Unicode handling in tool calls."""
    
    # Performance tests
    def test_stream_time(self) -> None:
        """Benchmark streaming performance."""

Usage Example

from langchain_tests.integration_tests import ChatModelIntegrationTests
from my_integration import MyChatModel

class TestMyChatModelIntegration(ChatModelIntegrationTests):
    @property
    def chat_model_class(self):
        return MyChatModel
    
    @property
    def chat_model_params(self):
        return {
            "api_key": "real-api-key",  # Use real credentials for integration tests
            "model": "gpt-4",
            "temperature": 0.1
        }
    
    # Configure model capabilities
    @property
    def has_tool_calling(self):
        return True
    
    @property
    def has_structured_output(self):
        return True
    
    @property
    def supports_image_inputs(self):
        return True
    
    @property
    def returns_usage_metadata(self):
        return True

Embeddings Integration Tests

Integration testing for embeddings models with synchronous and asynchronous operations.

from langchain_tests.integration_tests import EmbeddingsIntegrationTests

class EmbeddingsIntegrationTests(EmbeddingsTests):
    """Integration tests for embeddings models."""
    
    def test_embed_query(self) -> None:
        """Test embedding a single query string."""
    
    def test_embed_documents(self) -> None:
        """Test embedding a list of documents."""
    
    def test_aembed_query(self) -> None:
        """Test asynchronous embedding of a single query."""
    
    def test_aembed_documents(self) -> None:
        """Test asynchronous embedding of document lists."""

Usage Example

from langchain_tests.integration_tests import EmbeddingsIntegrationTests
from my_integration import MyEmbeddings

class TestMyEmbeddingsIntegration(EmbeddingsIntegrationTests):
    @property
    def embeddings_class(self):
        return MyEmbeddings
    
    @property
    def embedding_model_params(self):
        return {
            "api_key": "real-api-key",
            "model": "text-embedding-3-large"
        }

Tools Integration Tests

Integration testing for tools with schema validation and invocation verification.

from langchain_tests.integration_tests import ToolsIntegrationTests

class ToolsIntegrationTests(ToolsTests):
    """Integration tests for tools."""
    
    def test_invoke_matches_output_schema(self) -> None:
        """Test that tool output matches its declared schema."""
    
    def test_async_invoke_matches_output_schema(self) -> None:
        """Test that async tool output matches its declared schema."""
    
    def test_invoke_no_tool_call(self) -> None:
        """Test direct tool invocation without tool call wrapper."""
    
    def test_async_invoke_no_tool_call(self) -> None:
        """Test direct async tool invocation."""

Usage Example

from langchain_tests.integration_tests import ToolsIntegrationTests
from my_integration import MySearchTool

class TestMySearchToolIntegration(ToolsIntegrationTests):
    @property
    def tool_constructor(self):
        return MySearchTool
    
    @property
    def tool_constructor_params(self):
        return {
            "api_key": "real-search-api-key",
            "base_url": "https://api.search-service.com"
        }
    
    @property
    def tool_invoke_params_example(self):
        return {
            "query": "LangChain framework",
            "num_results": 5
        }

Retrievers Integration Tests

Integration testing for retriever implementations with document retrieval and parameter validation.

from langchain_tests.integration_tests import RetrieversIntegrationTests

class RetrieversIntegrationTests(BaseStandardTests):
    """Integration tests for retrievers."""
    
    # Required abstract properties
    @property
    def retriever_constructor(self):
        """Retriever class to test."""
    
    @property
    def retriever_constructor_params(self) -> dict:
        """Constructor parameters for the retriever."""
    
    @property
    def retriever_query_example(self) -> str:
        """Example query string for testing."""
    
    @property
    def num_results_arg_name(self) -> str:
        """Name of the parameter that controls number of results. Default: 'k'."""
    
    # Fixtures
    @pytest.fixture
    def retriever(self):
        """Retriever fixture for testing."""
    
    def test_k_constructor_param(self) -> None:
        """Test the number of results constructor parameter."""
    
    def test_invoke_with_k_kwarg(self) -> None:
        """Test runtime parameter for number of results."""
    
    def test_invoke_returns_documents(self) -> None:
        """Test that retriever returns Document objects."""
    
    def test_ainvoke_returns_documents(self) -> None:
        """Test that async retriever returns Document objects."""

Usage Example

from langchain_tests.integration_tests import RetrieversIntegrationTests
from my_integration import MyRetriever

class TestMyRetrieverIntegration(RetrieversIntegrationTests):
    @property
    def retriever_constructor(self):
        return MyRetriever
    
    @property
    def retriever_constructor_params(self):
        return {
            "index_name": "test-index",
            "api_key": "real-api-key"
        }
    
    @property
    def retriever_query_example(self):
        return "machine learning algorithms"
    
    @property
    def num_results_arg_name(self):
        return "top_k"  # If your retriever uses 'top_k' instead of 'k'

Pre-defined Test Tools

The integration test framework includes several pre-built tools for testing tool calling functionality:

# Pre-defined tools for testing
def magic_function(input: int) -> int:
    """Magic function tool with input validation."""

def magic_function_no_args() -> str:
    """No-argument magic function tool."""

def unicode_customer(customer_name: str, description: str) -> str:
    """Unicode handling tool for internationalization testing."""

def current_weather_tool():
    """Weather tool fixture for testing tool calling."""

Test Callback Handlers

Integration tests include callback handlers for capturing and validating model behavior:

class _TestCallbackHandler:
    """Callback handler for capturing chat model options and events."""
    
    def on_chat_model_start(self, serialized, messages, **kwargs):
        """Called when chat model starts processing."""
    
    def on_llm_end(self, response, **kwargs):
        """Called when chat model completes processing."""

Schema Generation Utilities

Utilities for generating test schemas for structured output testing:

def _get_joke_class(schema_type: str):
    """Generate joke schema for different output formats."""

VCR Integration

Integration tests automatically use VCR (Video Cassette Recorder) for HTTP call recording and playback, enabling:

Consistent Testing: Record real API responses once, replay for subsequent test runs
Offline Testing: Run tests without network connectivity
Cost Reduction: Avoid repeated API calls during test development
Deterministic Results: Same responses every time for reliable testing

VCR integration is controlled by the enable_vcr_tests property in the base test class.

Performance Benchmarking

Integration tests include performance benchmarking capabilities:

Stream Performance: test_stream_time() benchmarks streaming response times
Batch Performance: Timing analysis for batch operations
Tool Calling Performance: Benchmarking for tool calling overhead

Performance tests use pytest-benchmark for detailed statistical analysis and regression detection.