or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

async-client.md caching.md client.md evaluation.md index.md run-management.md schemas.md testing.md tracing.md utilities.md

README.md tile.json

Data Schemas

Key data types used throughout the LangSmith API including Run, Example, Dataset, Feedback, and more. These Pydantic models represent the core data structures for tracing, evaluation, and dataset management.

Run

Represents a single run (trace span) in the LangSmith system.

class Run(BaseModel):
    """A single run/trace span."""

    id: UUID
    """Unique identifier for the run"""

    name: str
    """Name of the run"""

    run_type: str
    """Type of run (e.g., "chain", "llm", "tool", "retriever")"""

    start_time: datetime
    """When the run started"""

    end_time: Optional[datetime] = None
    """When the run ended"""

    inputs: dict
    """Input data for the run"""

    outputs: Optional[dict] = None
    """Output data from the run"""

    error: Optional[str] = None
    """Error message if run failed"""

    extra: Optional[dict] = None
    """Extra metadata"""

    tags: Optional[list[str]] = None
    """Tags associated with the run"""

    parent_run_id: Optional[UUID] = None
    """ID of parent run"""

    session_id: Optional[UUID] = None
    """Project/session ID"""

    session_name: Optional[str] = None
    """Project/session name"""

    trace_id: UUID
    """Root trace ID"""

    dotted_order: str
    """Dotted order string for tree positioning"""

    reference_example_id: Optional[UUID] = None
    """Associated dataset example ID"""

    events: Optional[list[dict]] = None
    """Events logged during run"""

    serialized: Optional[dict] = None
    """Serialized representation"""

Example

Represents a single example (record) in a dataset.

class Example(BaseModel):
    """A dataset example."""

    id: UUID
    """Unique identifier for the example"""

    created_at: datetime
    """When the example was created"""

    modified_at: Optional[datetime] = None
    """When the example was last modified"""

    dataset_id: UUID
    """ID of the dataset this example belongs to"""

    inputs: dict
    """Input data for the example"""

    outputs: Optional[dict] = None
    """Expected output data"""

    metadata: Optional[dict] = None
    """Example metadata"""

    source_run_id: Optional[UUID] = None
    """Source run ID if created from a run"""

    splits: Optional[list[str]] = None
    """Dataset splits (e.g., ["train"], ["test"])"""

Dataset

Represents a dataset collection.

class Dataset(BaseModel):
    """A dataset of examples."""

    id: UUID
    """Unique identifier for the dataset"""

    name: str
    """Name of the dataset"""

    description: Optional[str] = None
    """Dataset description"""

    created_at: datetime
    """When the dataset was created"""

    modified_at: Optional[datetime] = None
    """When the dataset was last modified"""

    data_type: Optional[DataType] = None
    """Type of data (e.g., "kv", "llm", "chat")"""

    inputs_schema: Optional[dict] = None
    """JSON schema for inputs"""

    outputs_schema: Optional[dict] = None
    """JSON schema for outputs"""

    example_count: Optional[int] = None
    """Number of examples in the dataset"""

    metadata: Optional[dict] = None
    """Dataset metadata"""

    tags: Optional[list[str]] = None
    """Dataset tags"""

Feedback

Represents feedback (metrics/annotations) on a run.

class Feedback(BaseModel):
    """Feedback on a run."""

    id: UUID
    """Unique identifier for the feedback"""

    created_at: datetime
    """When the feedback was created"""

    modified_at: Optional[datetime] = None
    """When the feedback was last modified"""

    run_id: UUID
    """ID of the run being evaluated"""

    key: str
    """Feedback key/metric name"""

    score: Optional[Union[int, float, bool]] = None
    """Numeric score"""

    value: Optional[Union[str, dict, int, float, bool, list]] = None
    """Non-numeric value"""

    correction: Optional[dict] = None
    """Correction data"""

    comment: Optional[str] = None
    """Text comment"""

    feedback_source: Optional[FeedbackSource] = None
    """Source of the feedback"""

    session_id: Optional[UUID] = None
    """Associated session/project ID"""

    comparative_experiment_id: Optional[UUID] = None
    """Comparative experiment ID"""

TracerSession

Represents a project (also called session or trace group).

class TracerSession(BaseModel):
    """A project/session."""

    id: UUID
    """Unique identifier for the project"""

    name: str
    """Name of the project"""

    description: Optional[str] = None
    """Project description"""

    created_at: datetime
    """When the project was created"""

    start_time: Optional[datetime] = None
    """Project start time"""

    end_time: Optional[datetime] = None
    """Project end time (if ended)"""

    metadata: Optional[dict] = None
    """Project metadata"""

    tags: Optional[list[str]] = None
    """Project tags"""

    reference_dataset_id: Optional[UUID] = None
    """Associated dataset ID"""

    run_count: Optional[int] = None
    """Number of runs in the project"""

    latency_p50: Optional[float] = None
    """Median latency of runs"""

    latency_p99: Optional[float] = None
    """99th percentile latency"""

    total_tokens: Optional[int] = None
    """Total tokens used"""

    prompt_tokens: Optional[int] = None
    """Total prompt tokens"""

    completion_tokens: Optional[int] = None
    """Total completion tokens"""

    last_run_start_time: Optional[datetime] = None
    """When the last run started"""

AnnotationQueue

Represents an annotation queue for human review.

class AnnotationQueue(BaseModel):
    """An annotation queue."""

    id: UUID
    """Unique identifier for the queue"""

    name: str
    """Name of the queue"""

    description: Optional[str] = None
    """Queue description"""

    created_at: datetime
    """When the queue was created"""

    updated_at: datetime
    """When the queue was last updated"""

    run_count: Optional[int] = None
    """Number of runs in the queue"""

Prompt

Represents a prompt in the prompt hub.

class Prompt(BaseModel):
    """A prompt from the hub."""

    id: UUID
    """Unique identifier for the prompt"""

    repo_handle: str
    """Repository handle (e.g., "owner/prompt-name")"""

    description: Optional[str] = None
    """Prompt description"""

    readme: Optional[str] = None
    """Markdown readme"""

    created_at: datetime
    """When the prompt was created"""

    updated_at: datetime
    """When the prompt was last updated"""

    is_public: bool
    """Whether the prompt is public"""

    is_archived: bool
    """Whether the prompt is archived"""

    tags: Optional[list[str]] = None
    """Prompt tags"""

    num_likes: Optional[int] = None
    """Number of likes"""

    num_downloads: Optional[int] = None
    """Number of downloads"""

    num_views: Optional[int] = None
    """Number of views"""

ExperimentResults

Results from an evaluation experiment.

class ExperimentResults:
    """Results from an evaluation."""

    experiment_name: str
    """Name of the experiment"""

    experiment_id: UUID
    """ID of the experiment"""

    results: list[dict]
    """List of results for each example"""

    summary_results: dict
    """Summary/aggregate results"""

    def is_complete(self) -> bool:
        """
        Check if experiment is complete.

        Returns:
        True if all evaluations have finished
        """

    def get_results(self) -> dict:
        """
        Get final results.

        Returns:
        Dictionary with complete results
        """

DataType Enum

class DataType(str, Enum):
    """Dataset data type."""

    kv = "kv"
    """Key-value pairs"""

    llm = "llm"
    """LLM completion format"""

    chat = "chat"
    """Chat message format"""

FeedbackSourceType

class FeedbackSourceType(str, Enum):
    """Type of feedback source."""

    api = "api"
    """Feedback from API"""

    model = "model"
    """Feedback from model/evaluator"""

    app = "app"
    """Feedback from application"""

FeedbackConfig

Configuration for feedback UI display.

class FeedbackConfig(BaseModel):
    """Configuration for feedback."""

    type: str
    """Type of feedback ("continuous", "categorical", etc.)"""

    min: Optional[float] = None
    """Minimum value for continuous feedback"""

    max: Optional[float] = None
    """Maximum value for continuous feedback"""

    categories: Optional[list[dict]] = None
    """Categories for categorical feedback"""

LangSmithInfo

Information about LangSmith server.

class LangSmithInfo(BaseModel):
    """LangSmith server information."""

    version: str
    """Server version"""

    batch_ingest_config: Optional[dict] = None
    """Batch ingestion configuration"""

    license_expiration_time: Optional[datetime] = None
    """License expiration time"""

    instance_flags: Optional[dict] = None
    """Server feature flags"""

FeedbackIngestToken

Token for public feedback submission.

class FeedbackIngestToken(BaseModel):
    """Presigned feedback token."""

    id: UUID
    """Token ID"""

    url: str
    """URL for feedback submission"""

    expires_at: datetime
    """When the token expires"""

Attachment

File attachment for a run.

class Attachment(TypedDict):
    """File attachment."""

    mime_type: str
    """MIME type (e.g., "image/png")"""

    data: Optional[bytes]
    """Binary data"""

    url: Optional[str]
    """URL to attachment"""

Usage Examples

Working with Run Objects

from langsmith import Client

client = Client()

# Read a run
run = client.read_run("550e8400-e29b-41d4-a716-446655440000")

# Access run properties
print(f"Run: {run.name}")
print(f"Type: {run.run_type}")
print(f"Duration: {(run.end_time - run.start_time).total_seconds()}s")
print(f"Inputs: {run.inputs}")
print(f"Outputs: {run.outputs}")

# Check for errors
if run.error:
    print(f"Run failed: {run.error}")

# Access metadata
if run.tags:
    print(f"Tags: {', '.join(run.tags)}")

# Check parent relationship
if run.parent_run_id:
    parent = client.read_run(run.parent_run_id)
    print(f"Parent: {parent.name}")

Working with Examples

from langsmith import Client

client = Client()

# List examples from a dataset
for example in client.list_examples(dataset_name="my-dataset"):
    print(f"Example {example.id}:")
    print(f"  Inputs: {example.inputs}")
    print(f"  Outputs: {example.outputs}")
    print(f"  Metadata: {example.metadata}")
    print(f"  Splits: {example.splits}")

# Create an example
example = client.create_example(
    inputs={"question": "What is 2+2?"},
    outputs={"answer": "4"},
    dataset_name="math-qa",
    metadata={"difficulty": "easy"},
    split=["train"]
)

Working with Datasets

from langsmith import Client

client = Client()

# Read dataset
dataset = client.read_dataset("my-dataset")

print(f"Dataset: {dataset.name}")
print(f"Description: {dataset.description}")
print(f"Type: {dataset.data_type}")
print(f"Examples: {dataset.example_count}")
print(f"Created: {dataset.created_at}")

# Access schemas
if dataset.inputs_schema:
    print(f"Input schema: {dataset.inputs_schema}")
if dataset.outputs_schema:
    print(f"Output schema: {dataset.outputs_schema}")

Working with Feedback

from langsmith import Client

client = Client()

# List feedback for a run
for feedback in client.list_feedback(
    run_ids=["550e8400-e29b-41d4-a716-446655440000"]
):
    print(f"Feedback: {feedback.key}")
    if feedback.score is not None:
        print(f"  Score: {feedback.score}")
    if feedback.value is not None:
        print(f"  Value: {feedback.value}")
    if feedback.comment:
        print(f"  Comment: {feedback.comment}")

# Create feedback
feedback = client.create_feedback(
    run_id="550e8400-e29b-41d4-a716-446655440000",
    key="accuracy",
    score=0.9,
    comment="High accuracy result"
)

Working with Projects

from langsmith import Client

client = Client()

# Read project
project = client.read_project("my-project")

print(f"Project: {project.name}")
print(f"Description: {project.description}")
print(f"Run count: {project.run_count}")
print(f"Median latency: {project.latency_p50}s")
print(f"Total tokens: {project.total_tokens}")

# Access metadata
if project.metadata:
    print(f"Metadata: {project.metadata}")

# Check if ended
if project.end_time:
    print(f"Ended at: {project.end_time}")

Type-Safe Run Creation

from langsmith import Client, RunTree
from datetime import datetime

client = Client()

# Create run with type hints
run = RunTree(
    name="My Run",
    run_type="chain",
    inputs={"query": "test"},
    outputs={"result": "success"},
    tags=["production", "v1"],
    metadata={"user_id": "123"},
    client=client
)

# Type checker knows about all fields
assert isinstance(run.id, UUID)
assert isinstance(run.start_time, datetime)
assert isinstance(run.inputs, dict)

Filtering with Type Safety

from langsmith import Client
from datetime import datetime, timedelta

client = Client()

# Filter runs by type
llm_runs = client.list_runs(
    project_name="my-project",
    run_type="llm",
    start_time=datetime.now() - timedelta(hours=24)
)

# Type checker knows run properties
for run in llm_runs:
    # Access fields with type safety
    duration = (run.end_time - run.start_time).total_seconds() if run.end_time else None
    print(f"{run.name}: {duration}s")

Custom Type Extensions

from langsmith import Run, Example
from typing import Optional

def analyze_run(run: Run) -> dict:
    """Analyze a run and return metrics."""
    metrics = {
        "name": run.name,
        "type": run.run_type,
        "success": run.error is None,
        "has_outputs": run.outputs is not None,
    }

    if run.end_time:
        metrics["duration"] = (run.end_time - run.start_time).total_seconds()

    return metrics

def validate_example(example: Example) -> bool:
    """Validate an example has required fields."""
    return (
        example.inputs is not None
        and example.outputs is not None
        and len(example.inputs) > 0
    )