tessl install tessl/pypi-langsmith@0.6.1Python SDK for LangSmith Observability and Evaluation Platform
A comprehensive Python SDK for LangSmith, providing observability, evaluation, and testing capabilities for LLM applications. LangSmith enables developers to trace, debug, evaluate, and monitor AI applications through a unified API, with full support for synchronous and asynchronous operations.
pip install langsmithpip install langsmithimport langsmith as ls
from langsmith import Client, traceableimport langsmith as ls
from langsmith import traceable
# Configure at startup
ls.configure(
enabled=True,
project_name="my-project"
)
# Trace a function
@traceable
def process_query(query: str) -> str:
"""Process a user query."""
result = f"Processed: {query}"
return result
# Use it
result = process_query("What is LangSmith?")Quick links to common tasks:
@traceable decoratortrace context manager@test decoratorexpect API💡 Navigation Tip: This index provides three ways to find what you need:
Essential API signatures for immediate use. For detailed documentation, examples, and usage patterns, see the API Directory below.
# Automatic tracing decorator
@traceable(
run_type: Literal["chain", "llm", "tool", "retriever", "prompt"] = "chain",
name: Optional[str] = None,
metadata: Optional[Mapping[str, Any]] = None,
tags: Optional[list[str]] = None,
client: Optional[Client] = None,
project_name: Optional[str] = None,
)
# Manual tracing context manager
with trace(
name: str,
run_type: str = "chain",
inputs: Optional[dict] = None,
metadata: Optional[Mapping[str, Any]] = None,
tags: Optional[list[str]] = None,
project_name: Optional[str] = None,
) as run:
run.end(outputs=dict, error=str)
# Context utilities
get_current_run_tree() -> Optional[RunTree]
set_run_metadata(**metadata: Any) -> None
# Global configuration
ls.configure(
enabled: Optional[bool] = ...,
project_name: Optional[str] = ...,
client: Optional[Client] = ...,
)# Initialize client
client = Client(
api_key: Optional[str] = None,
api_url: Optional[str] = None,
timeout_ms: Optional[int] = None,
)
# Run operations
client.create_run(name: str, inputs: dict, run_type: str, ...)
client.read_run(run_id: Union[str, UUID]) -> Run
client.list_runs(project_name: str = None, ...) -> Iterator[Run]
client.share_run(run_id: Union[str, UUID]) -> str
# Project operations
client.create_project(project_name: str, ...) -> TracerSession
client.list_projects(...) -> Iterator[TracerSession]
# Dataset operations
client.create_dataset(dataset_name: str, ...) -> Dataset
client.create_example(inputs: dict, dataset_id: UUID, outputs: dict = None, ...) -> Example
client.list_examples(dataset_id: UUID, ...) -> Iterator[Example]
# Feedback operations
client.create_feedback(run_id: UUID, key: str, score: float = None, ...) -> Feedback# Evaluate a target function
results = evaluate(
target: Union[Callable, Runnable, str, UUID],
data: Union[str, UUID, Iterable[Example]],
evaluators: Optional[Sequence[Union[RunEvaluator, Callable]]] = None,
summary_evaluators: Optional[Sequence[Callable]] = None,
experiment_prefix: Optional[str] = None,
max_concurrency: Optional[int] = None,
) -> ExperimentResults
# Async evaluation
results = await aevaluate(target, data, evaluators, ...)
# Evaluate existing experiment
results = evaluate_existing(
experiment: Union[str, UUID],
evaluators: Sequence[RunEvaluator],
) -> ExperimentResults
# Custom evaluator protocol
class MyEvaluator(RunEvaluator):
def evaluate_run(self, run: Run, example: Example) -> EvaluationResult:
return EvaluationResult(key="metric", score=0.9)→ Full Evaluation Documentation
# Trace pytest tests
@test(output_keys=["answer"], ...)
def test_my_function():
result = my_function()
assert result["answer"] == "expected"
# Use expectations API
@test
def test_with_expectations():
result = my_function()
assert expect.score(0.9, key="accuracy").compare(result)
assert expect.embedding_distance("predicted", "reference").compare(result)# Initialize async client
async with AsyncClient(api_key=..., api_url=...) as client:
await client.create_run(...)
run = await client.read_run(run_id)
async for run in client.list_runs(...):
process(run)→ Full Async Client Documentation
# Synchronous cache
cache = Cache(max_size=100, ttl_seconds=3600)
cache.set(key, value)
value = cache.get(key)
# Asynchronous cache
async_cache = AsyncCache(max_size=100, ttl_seconds=3600)
await async_cache.start()
async_cache.set(key, value)
value = async_cache.get(key)
# Use with client
client = Client(cache=True) # Use default cacheSynchronous and asynchronous clients for the LangSmith API.
Automatic and manual tracing of functions and code blocks.
Global configuration for tracing behavior.
Organize and manage trace collections (also called sessions).
Collections of examples for evaluation and testing.
Individual records within datasets.
Metrics and annotations on runs.
Run evaluations and create custom metrics.
Pytest integration for trace-aware testing.
LRU cache with TTL and background refresh.
Helper functions and data anonymization.
Version and share prompts.
Human review workflows.
Complete API reference and data schemas.
LangSmith's architecture centers on runs (trace spans) organized in tree structures:
Tracing System: Uses context variables to implicitly track parent-child relationships between runs, enabling automatic trace tree construction
Client Architecture: The Client and AsyncClient provide comprehensive API access for managing runs, projects, datasets, examples, and feedback
RunTree: Represents individual trace spans with metadata, inputs, outputs, and timing information
Evaluation: Built on top of tracing infrastructure, allowing runs to be associated with dataset examples and evaluated by custom or built-in evaluators
Testing Framework: Integrates with pytest to provide trace-aware test execution with expectations and assertions
import langsmith as ls
# 1. Configure once at startup
ls.configure(enabled=True, project_name="my-app")
# 2. Trace your functions
@traceable
def my_chain(input_text):
return process(input_text)
# 3. Run and traces are automatically sent
result = my_chain("test input")from langsmith import evaluate
# 1. Define your target function
def my_app(inputs):
return {"answer": process(inputs["question"])}
# 2. Define evaluator
def correctness(run, example):
return {
"key": "correct",
"score": run.outputs["answer"] == example.outputs["expected"]
}
# 3. Run evaluation
results = evaluate(
my_app,
data="my-dataset",
evaluators=[correctness]
)from langsmith import AsyncClient, traceable
@traceable
async def async_process(input_data):
result = await async_operation(input_data)
return result
# Use with async client
async with AsyncClient() as client:
await client.create_run(...)LangSmith respects the following environment variables:
LANGSMITH_API_KEY - API key for authenticationLANGSMITH_ENDPOINT - API endpoint URL (default: https://api.smith.langchain.com)LANGSMITH_PROJECT - Default project nameLANGSMITH_TRACING - Enable/disable tracing ("true"/"false")LangSmith is designed to be non-blocking - tracing errors won't crash your application:
from langsmith import Client
# Tracing errors are logged but don't raise
client = Client()
@traceable
def my_function():
# Even if tracing fails, your function runs
return "result"To handle tracing errors explicitly:
def error_handler(error: Exception):
print(f"Tracing error: {error}")
client = Client(tracing_error_callback=error_handler)