Standard tests for LangChain implementations
—
Comprehensive test classes for full functionality verification including real API calls, streaming, tool calling, structured output, and multimodal inputs. Integration tests verify complete feature sets and real-world usage patterns with external services.
Comprehensive integration testing for chat models with 40+ test methods covering all aspects of chat model functionality.
from langchain_tests.integration_tests import ChatModelIntegrationTests
class ChatModelIntegrationTests(ChatModelTests):
"""Integration tests for chat models with comprehensive functionality testing."""
# Inherits all configuration from ChatModelTests
# Basic invocation tests
def test_invoke(self) -> None:
"""Test basic model invocation with simple prompts."""
def test_ainvoke(self) -> None:
"""Test asynchronous model invocation."""
# Streaming tests
def test_stream(self) -> None:
"""Test streaming responses from the model."""
def test_astream(self) -> None:
"""Test asynchronous streaming responses."""
# Batch processing tests
def test_batch(self) -> None:
"""Test batch processing of multiple prompts."""
def test_abatch(self) -> None:
"""Test asynchronous batch processing."""
# Conversation tests
def test_conversation(self) -> None:
"""Test multi-turn conversation handling."""
def test_double_messages_conversation(self) -> None:
"""Test sequential message handling in conversations."""
# Usage metadata tests
def test_usage_metadata(self) -> None:
"""Test usage metadata tracking and validation."""
def test_usage_metadata_streaming(self) -> None:
"""Test usage metadata in streaming responses."""
# Stop sequence tests
def test_stop_sequence(self) -> None:
"""Test stop sequence functionality."""
# Tool calling tests (if has_tool_calling=True)
def test_tool_calling(self) -> None:
"""Test tool calling functionality."""
def test_tool_calling_async(self) -> None:
"""Test asynchronous tool calling."""
def test_bind_runnables_as_tools(self) -> None:
"""Test binding runnable objects as tools."""
def test_tool_message_histories_string_content(self) -> None:
"""Test tool message histories with string content."""
def test_tool_message_histories_list_content(self) -> None:
"""Test tool message histories with complex list content."""
def test_tool_choice(self) -> None:
"""Test tool choice functionality."""
def test_tool_calling_with_no_arguments(self) -> None:
"""Test tool calling with tools that take no arguments."""
def test_tool_message_error_status(self) -> None:
"""Test error handling in tool messages."""
# Structured output tests (if has_structured_output=True)
def test_structured_few_shot_examples(self) -> None:
"""Test structured output with few-shot examples."""
def test_structured_output(self) -> None:
"""Test structured output generation."""
def test_structured_output_async(self) -> None:
"""Test asynchronous structured output generation."""
def test_structured_output_pydantic_2_v1(self) -> None:
"""Test Pydantic V1 compatibility in structured output."""
def test_structured_output_optional_param(self) -> None:
"""Test structured output with optional parameters."""
# JSON mode tests (if supports_json_mode=True)
def test_json_mode(self) -> None:
"""Test JSON mode functionality."""
# Multimodal input tests (if corresponding support flags=True)
def test_pdf_inputs(self) -> None:
"""Test PDF input handling."""
def test_audio_inputs(self) -> None:
"""Test audio input handling."""
def test_image_inputs(self) -> None:
"""Test image input handling."""
def test_image_tool_message(self) -> None:
"""Test image content in tool messages."""
def test_anthropic_inputs(self) -> None:
"""Test Anthropic-style input format handling."""
# Message handling tests
def test_message_with_name(self) -> None:
"""Test messages with name attributes."""
# Advanced functionality tests
def test_agent_loop(self) -> None:
"""Test agent loop functionality with tool calling."""
def test_unicode_tool_call_integration(self) -> None:
"""Test Unicode handling in tool calls."""
# Performance tests
def test_stream_time(self) -> None:
"""Benchmark streaming performance."""from langchain_tests.integration_tests import ChatModelIntegrationTests
from my_integration import MyChatModel
class TestMyChatModelIntegration(ChatModelIntegrationTests):
@property
def chat_model_class(self):
return MyChatModel
@property
def chat_model_params(self):
return {
"api_key": "real-api-key", # Use real credentials for integration tests
"model": "gpt-4",
"temperature": 0.1
}
# Configure model capabilities
@property
def has_tool_calling(self):
return True
@property
def has_structured_output(self):
return True
@property
def supports_image_inputs(self):
return True
@property
def returns_usage_metadata(self):
return TrueIntegration testing for embeddings models with synchronous and asynchronous operations.
from langchain_tests.integration_tests import EmbeddingsIntegrationTests
class EmbeddingsIntegrationTests(EmbeddingsTests):
"""Integration tests for embeddings models."""
def test_embed_query(self) -> None:
"""Test embedding a single query string."""
def test_embed_documents(self) -> None:
"""Test embedding a list of documents."""
def test_aembed_query(self) -> None:
"""Test asynchronous embedding of a single query."""
def test_aembed_documents(self) -> None:
"""Test asynchronous embedding of document lists."""from langchain_tests.integration_tests import EmbeddingsIntegrationTests
from my_integration import MyEmbeddings
class TestMyEmbeddingsIntegration(EmbeddingsIntegrationTests):
@property
def embeddings_class(self):
return MyEmbeddings
@property
def embedding_model_params(self):
return {
"api_key": "real-api-key",
"model": "text-embedding-3-large"
}Integration testing for tools with schema validation and invocation verification.
from langchain_tests.integration_tests import ToolsIntegrationTests
class ToolsIntegrationTests(ToolsTests):
"""Integration tests for tools."""
def test_invoke_matches_output_schema(self) -> None:
"""Test that tool output matches its declared schema."""
def test_async_invoke_matches_output_schema(self) -> None:
"""Test that async tool output matches its declared schema."""
def test_invoke_no_tool_call(self) -> None:
"""Test direct tool invocation without tool call wrapper."""
def test_async_invoke_no_tool_call(self) -> None:
"""Test direct async tool invocation."""from langchain_tests.integration_tests import ToolsIntegrationTests
from my_integration import MySearchTool
class TestMySearchToolIntegration(ToolsIntegrationTests):
@property
def tool_constructor(self):
return MySearchTool
@property
def tool_constructor_params(self):
return {
"api_key": "real-search-api-key",
"base_url": "https://api.search-service.com"
}
@property
def tool_invoke_params_example(self):
return {
"query": "LangChain framework",
"num_results": 5
}Integration testing for retriever implementations with document retrieval and parameter validation.
from langchain_tests.integration_tests import RetrieversIntegrationTests
class RetrieversIntegrationTests(BaseStandardTests):
"""Integration tests for retrievers."""
# Required abstract properties
@property
def retriever_constructor(self):
"""Retriever class to test."""
@property
def retriever_constructor_params(self) -> dict:
"""Constructor parameters for the retriever."""
@property
def retriever_query_example(self) -> str:
"""Example query string for testing."""
@property
def num_results_arg_name(self) -> str:
"""Name of the parameter that controls number of results. Default: 'k'."""
# Fixtures
@pytest.fixture
def retriever(self):
"""Retriever fixture for testing."""
def test_k_constructor_param(self) -> None:
"""Test the number of results constructor parameter."""
def test_invoke_with_k_kwarg(self) -> None:
"""Test runtime parameter for number of results."""
def test_invoke_returns_documents(self) -> None:
"""Test that retriever returns Document objects."""
def test_ainvoke_returns_documents(self) -> None:
"""Test that async retriever returns Document objects."""from langchain_tests.integration_tests import RetrieversIntegrationTests
from my_integration import MyRetriever
class TestMyRetrieverIntegration(RetrieversIntegrationTests):
@property
def retriever_constructor(self):
return MyRetriever
@property
def retriever_constructor_params(self):
return {
"index_name": "test-index",
"api_key": "real-api-key"
}
@property
def retriever_query_example(self):
return "machine learning algorithms"
@property
def num_results_arg_name(self):
return "top_k" # If your retriever uses 'top_k' instead of 'k'The integration test framework includes several pre-built tools for testing tool calling functionality:
# Pre-defined tools for testing
def magic_function(input: int) -> int:
"""Magic function tool with input validation."""
def magic_function_no_args() -> str:
"""No-argument magic function tool."""
def unicode_customer(customer_name: str, description: str) -> str:
"""Unicode handling tool for internationalization testing."""
def current_weather_tool():
"""Weather tool fixture for testing tool calling."""Integration tests include callback handlers for capturing and validating model behavior:
class _TestCallbackHandler:
"""Callback handler for capturing chat model options and events."""
def on_chat_model_start(self, serialized, messages, **kwargs):
"""Called when chat model starts processing."""
def on_llm_end(self, response, **kwargs):
"""Called when chat model completes processing."""Utilities for generating test schemas for structured output testing:
def _get_joke_class(schema_type: str):
"""Generate joke schema for different output formats."""Integration tests automatically use VCR (Video Cassette Recorder) for HTTP call recording and playback, enabling:
VCR integration is controlled by the enable_vcr_tests property in the base test class.
Integration tests include performance benchmarking capabilities:
test_stream_time() benchmarks streaming response timesPerformance tests use pytest-benchmark for detailed statistical analysis and regression detection.
For models that support multimodal inputs, the framework provides comprehensive testing:
Each multimodal capability is controlled by feature flags in the test class configuration.
Integration tests verify proper error handling for common failure scenarios:
The framework ensures that implementations handle these errors gracefully and provide meaningful error messages to developers.
Install with Tessl CLI
npx tessl i tessl/pypi-langchain-tests