or run

tessl search
Log in

Version

Workspace
tessl
Visibility
Public
Created
Last updated
Describes
pypipkg:pypi/langsmith@0.6.x

docs

index.md
tile.json

tessl/pypi-langsmith

tessl install tessl/pypi-langsmith@0.6.1

Python SDK for LangSmith Observability and Evaluation Platform

pytest-integration.mddocs/testing/

Testing with Pytest

Pytest integration for tracing test cases with expectations API.

Test Decorator

Pytest integration for tracing test cases with expectations API for approximate assertions and scoring. LangSmith's testing framework integrates seamlessly with pytest to provide trace-aware test execution with rich assertions.

test Decorator

Decorator to trace a pytest test case in LangSmith.

def test(
    id: Optional[UUID] = None,
    output_keys: Optional[Sequence[str]] = None,
    client: Optional[Client] = None,
    test_suite_name: Optional[str] = None,
    metadata: Optional[dict] = None,
    repetitions: Optional[int] = None,
    split: Optional[Union[str, list[str]]] = None,
    cached_hosts: Optional[Sequence[str]] = None,
) -> Callable:
    """
    Decorator to trace a pytest test case in LangSmith.

    Ensures that necessary example data is created and associated with
    the test function. Also works as a pytest marker.

    Parameters:
    - id: Unique identifier for the test case (auto-generated if not provided)
    - output_keys: Keys to extract from test function's local variables as outputs
    - client: LangSmith client to use
    - test_suite_name: Name of the test suite/dataset
    - metadata: Metadata to attach to the test
    - repetitions: Number of times to repeat the test
    - split: Dataset split(s) to run test on
    - cached_hosts: List of hosts to cache API calls from (for deterministic testing)

    Returns:
    Decorated test function
    """

unit Decorator

Alias for the test decorator with identical functionality.

def unit(
    id: Optional[UUID] = None,
    output_keys: Optional[Sequence[str]] = None,
    **kwargs
) -> Callable:
    """
    Alias for the test decorator. Identical functionality.

    Parameters:
    Same as test()

    Returns:
    Decorated test function
    """

Expect API

The expect module provides assertions for testing.

expect Module

Module for making approximate assertions as "expectations" on test results.

expect.score

@staticmethod
def score(
    value: float,
    *,
    key: str = "score",
) -> _Matcher:
    """
    Log a score for the current test case.

    Parameters:
    - value: Numeric score value (typically 0.0 to 1.0)
    - key: Key/name for the score metric

    Returns:
    Matcher object (typically not used)
    """

expect.value

@staticmethod
def value(value: Any) -> _Matcher:
    """
    Make assertions on a value directly.

    Parameters:
    - value: The value to assert on

    Returns:
    Matcher object with assertion methods
    """

expectembedding-distance

@staticmethod
def embedding_distance(
    prediction: str,
    reference: str,
    *,
    config: Optional[EmbeddingConfig] = None,
) -> _Matcher:
    """
    Calculate and assert on embedding distance between prediction and reference.

    Uses embeddings to compute semantic similarity/distance.

    Parameters:
    - prediction: Predicted/actual text
    - reference: Reference/expected text
    - config: Configuration for embedding model

    Returns:
    Matcher object with distance assertion methods
    """

expectedit-distance

@staticmethod
def edit_distance(
    prediction: str,
    reference: str,
    *,
    config: Optional[EditDistanceConfig] = None,
) -> _Matcher:
    """
    Calculate and assert on edit distance (Damerau-Levenshtein) between strings.

    Parameters:
    - prediction: Predicted/actual string
    - reference: Reference/expected string
    - config: Configuration for edit distance calculation

    Returns:
    Matcher object with distance assertion methods
    """

Matcher Methods

The matcher objects returned by expect functions support the following assertion methods:

class _Matcher:
    """Matcher for making assertions on values."""

    def to_be_less_than(self, threshold: float) -> None:
        """
        Assert value is less than threshold.

        Parameters:
        - threshold: Upper bound (exclusive)
        """

    def to_be_greater_than(self, threshold: float) -> None:
        """
        Assert value is greater than threshold.

        Parameters:
        - threshold: Lower bound (exclusive)
        """

    def to_be_between(self, min: float, max: float) -> None:
        """
        Assert value is between min and max (inclusive).

        Parameters:
        - min: Lower bound (inclusive)
        - max: Upper bound (inclusive)
        """

    def to_contain(self, substring: str) -> None:
        """
        Assert string contains substring.

        Parameters:
        - substring: Substring to search for
        """

    def against(self, func: Callable[[Any], bool]) -> None:
        """
        Assert using custom function.

        Parameters:
        - func: Function that returns True if assertion passes
        """

Usage Examples

Basic Test with @test Decorator

import pytest
from langsmith import test

@test
def test_my_feature():
    """Test a feature with LangSmith tracing."""
    result = my_function("input")
    assert result == "expected"

Test with Pytest Marker

import pytest

@pytest.mark.langsmith
def test_with_marker():
    """Alternative syntax using pytest marker."""
    result = my_function("input")
    assert result == "expected"

Test with Output Keys

from langsmith import test

@test(output_keys=["result", "metadata"])
def test_with_outputs():
    """Test that captures local variables as outputs."""
    input_data = "test input"
    result = my_function(input_data)
    metadata = {"processed": True}

    # These variables will be captured as outputs
    assert result is not None

Using expect.score

import pytest
from langsmith import expect

@pytest.mark.langsmith
def test_with_score():
    """Test that logs a quality score."""
    response = get_llm_response("What is 2+2?")

    # Calculate and log a score
    quality_score = evaluate_quality(response)
    expect.score(quality_score, key="quality")

    # Still use regular assertions
    assert "4" in response

Using expect.value Assertions

import pytest
from langsmith import expect

@pytest.mark.langsmith
def test_with_expectations():
    """Test using expect assertions."""
    response = get_llm_response("Tell me a joke")

    # Assert response contains keyword
    expect.value(response).to_contain("joke")

    # Can still use regular assertions
    assert len(response) > 0

Using expect.embedding_distance

import pytest
from langsmith import expect

@pytest.mark.langsmith
def test_semantic_similarity():
    """Test semantic similarity of response."""
    response = get_llm_response("What is the capital of France?")
    expected = "The capital of France is Paris"

    # Assert semantic similarity
    expect.embedding_distance(
        prediction=response,
        reference=expected
    ).to_be_less_than(0.3)  # Low distance = high similarity

Using expect.edit_distance

import pytest
from langsmith import expect

@pytest.mark.langsmith
def test_edit_distance():
    """Test string similarity using edit distance."""
    result = normalize_text("Hello, World!")
    expected = "hello world"

    # Assert edit distance
    expect.edit_distance(
        prediction=result,
        reference=expected
    ).to_be_less_than(5)

Numeric Assertions

import pytest
from langsmith import expect

@pytest.mark.langsmith
def test_numeric_checks():
    """Test numeric values with expectations."""
    latency = measure_latency()

    # Assert latency is within acceptable range
    expect.value(latency).to_be_less_than(1.0)
    expect.value(latency).to_be_greater_than(0.0)
    expect.value(latency).to_be_between(0.1, 0.5)

Custom Assertion Functions

import pytest
from langsmith import expect

@pytest.mark.langsmith
def test_custom_assertion():
    """Test with custom assertion logic."""
    response = get_llm_response("List 5 colors")

    # Custom validation function
    def has_five_items(text):
        items = [line.strip() for line in text.split('\n') if line.strip()]
        return len(items) == 5

    expect.value(response).against(has_five_items)

Test with Metadata

from langsmith import test

@test(
    test_suite_name="integration-tests",
    metadata={
        "category": "llm",
        "priority": "high",
        "model": "gpt-4"
    }
)
def test_with_metadata():
    """Test with custom metadata."""
    result = my_llm_function("input")
    assert result is not None

Test with Repetitions

from langsmith import test

@test(repetitions=5)
def test_consistency():
    """Test that runs 5 times to check consistency."""
    result = my_stochastic_function("input")
    assert result is not None

Test with Dataset Split

from langsmith import test

@test(split="test")
def test_on_test_split():
    """Test that runs only on test split of dataset."""
    result = my_function("input")
    assert result is not None

@test(split=["train", "validation"])
def test_on_multiple_splits():
    """Test that runs on train and validation splits."""
    result = my_function("input")
    assert result is not None

Parameterized Tests

import pytest
from langsmith import expect

@pytest.mark.langsmith
@pytest.mark.parametrize("input,expected", [
    ("hello", "HELLO"),
    ("world", "WORLD"),
    ("test", "TEST"),
])
def test_uppercase(input, expected):
    """Parameterized test with LangSmith tracing."""
    result = my_uppercase_function(input)

    expect.value(result).to_contain(expected)
    assert result == expected

Async Tests

import pytest
from langsmith import expect

@pytest.mark.langsmith
@pytest.mark.asyncio
async def test_async_function():
    """Async test with LangSmith tracing."""
    result = await my_async_function("input")

    expect.value(result).to_contain("expected")
    assert result is not None

Multiple Expectations in One Test

import pytest
from langsmith import expect

@pytest.mark.langsmith
def test_multiple_expectations():
    """Test with multiple expect assertions."""
    response = get_llm_response("Explain quantum computing")

    # Log overall quality score
    quality = evaluate_quality(response)
    expect.score(quality, key="quality")

    # Check content requirements
    expect.value(response).to_contain("quantum")
    expect.value(response).to_contain("computing")

    # Check semantic similarity to reference
    reference = "Quantum computing uses quantum mechanics principles"
    expect.embedding_distance(
        prediction=response,
        reference=reference
    ).to_be_less_than(0.5)

    # Check length is reasonable
    expect.value(len(response)).to_be_greater_than(50)
    expect.value(len(response)).to_be_less_than(1000)

Test Fixtures with Tracing

import pytest
from langsmith import test

@pytest.fixture
def llm_client():
    """Fixture that provides LLM client."""
    return setup_llm_client()

@test
def test_with_fixture(llm_client):
    """Test using pytest fixture."""
    response = llm_client.generate("test prompt")
    assert response is not None

Test with Cached API Calls

from langsmith import test

@test(cached_hosts=["api.openai.com", "api.anthropic.com"])
def test_with_caching():
    """Test with API call caching for deterministic results."""
    # API calls to cached hosts will be cached
    response = llm.generate("test prompt")
    assert response is not None

Full Integration Test Example

import pytest
from langsmith import test, expect

@test(
    test_suite_name="qa-pipeline-tests",
    metadata={
        "component": "qa-system",
        "version": "v2.0",
        "critical": True
    },
    output_keys=["answer", "confidence", "sources"]
)
def test_qa_pipeline():
    """Comprehensive QA pipeline test."""
    # Setup
    question = "What is the capital of France?"
    expected_answer = "Paris"

    # Execute
    answer, confidence, sources = qa_pipeline(question)

    # Log scores
    expect.score(confidence, key="confidence")

    # Content assertions
    expect.value(answer).to_contain(expected_answer)

    # Semantic similarity
    expect.embedding_distance(
        prediction=answer,
        reference=f"The capital of France is {expected_answer}"
    ).to_be_less_than(0.2)

    # Confidence threshold
    expect.value(confidence).to_be_greater_than(0.8)

    # Source verification
    expect.value(len(sources)).to_be_greater_than(0)

    # Standard assertions
    assert answer is not None
    assert confidence >= 0.0 and confidence <= 1.0

Test Class Organization

import pytest
from langsmith import expect

class TestLLMFeatures:
    """Test suite for LLM features."""

    @pytest.mark.langsmith
    def test_summarization(self):
        """Test summarization feature."""
        text = "Long text to summarize..."
        summary = summarize(text)

        expect.value(len(summary)).to_be_less_than(len(text))
        assert summary is not None

    @pytest.mark.langsmith
    def test_translation(self):
        """Test translation feature."""
        english_text = "Hello, world!"
        french_text = translate(english_text, target="fr")

        expect.value(french_text).to_contain("Bonjour")
        assert french_text is not None

    @pytest.mark.langsmith
    def test_sentiment(self):
        """Test sentiment analysis."""
        text = "This is a great product!"
        sentiment = analyze_sentiment(text)

        expect.value(sentiment["score"]).to_be_greater_than(0.5)
        assert sentiment["label"] == "positive"