or run

tessl search
Log in

Version

Workspace
tessl
Visibility
Public
Created
Last updated
Describes
npmpkg:npm/langsmith@0.4.x

docs

index.md
tile.json

tessl/npm-langsmith

tessl install tessl/npm-langsmith@0.4.3

TypeScript client SDK for the LangSmith LLM tracing, evaluation, and monitoring platform.

integration-patterns.mddocs/guides/

Integration Patterns

Advanced patterns combining multiple LangSmith features for real-world use cases.

Overview

This guide demonstrates how to combine LangSmith features (tracing, evaluation, feedback, anonymization, annotation queues, etc.) to build robust production systems. Each pattern is extracted from production use cases and shows complete working implementations.

Pattern 1: Production Monitoring with Feedback Loop

Combines: Tracing + Feedback Collection + Annotation Queues

Use Case: Monitor production LLM application, collect user feedback, and queue low-confidence outputs for human review.

Complete Implementation

import { traceable } from "langsmith/traceable";
import { Client } from "langsmith";
import OpenAI from "openai";

const client = new Client({
  projectName: "production-chatbot",
  tracingSamplingRate: 0.1, // Sample 10% in production
  hideInputs: false,
  hideOutputs: false
});

const openai = new OpenAI();

// Create annotation queue for low-confidence outputs
const qaQueue = await client.createAnnotationQueue({
  name: "Low Confidence Review",
  description: "Human review queue for outputs with confidence < 0.7",
  rubricInstructions: "Rate: 1=Poor, 2=Fair, 3=Good, 4=Great, 5=Excellent"
});

// Traced chatbot function
const chatbot = traceable(
  async (userMessage: string) => {
    const completion = await openai.chat.completions.create({
      model: "gpt-4",
      messages: [
        { role: "system", content: "You are a helpful assistant." },
        { role: "user", content: userMessage }
      ],
      temperature: 0.7
    });

    const response = completion.choices[0].message.content;

    // Calculate confidence (example logic)
    const confidence = calculateConfidence(response);

    return { response, confidence };
  },
  {
    name: "production-chatbot",
    run_type: "chain",
    client: client,
    metadata: {
      environment: "production",
      version: "2.1.0"
    },
    tags: ["production", "customer-facing"]
  }
);

// Production workflow with feedback loop
async function handleUserQuery(userId: string, message: string) {
  // Execute chatbot with tracing
  const result = await chatbot(message);
  const runId = result.runId; // From trace context

  // Queue low-confidence outputs for human review
  if (result.confidence < 0.7) {
    await client.addRunsToAnnotationQueue({
      queueId: qaQueue.id,
      runIds: [runId]
    });

    console.log(`Low confidence (${result.confidence}) - queued for review`);
  }

  // Create presigned token for user feedback
  const feedbackToken = await client.createPresignedFeedbackToken(
    runId,
    "user_satisfaction",
    {
      expiration: new Date(Date.now() + 24 * 60 * 60 * 1000), // 24 hours
      feedbackConfig: {
        type: "continuous",
        min: 1,
        max: 5
      }
    }
  );

  return {
    response: result.response,
    feedbackUrl: feedbackToken.url, // Return to frontend
    confidence: result.confidence,
    queuedForReview: result.confidence < 0.7
  };
}

// Helper: Calculate confidence score
function calculateConfidence(response: string): number {
  // Example: based on response length, hedging words, etc.
  if (response.includes("I'm not sure") || response.includes("maybe")) {
    return 0.5;
  }
  return response.length > 50 ? 0.9 : 0.6;
}

// Example usage
const result = await handleUserQuery("user-123", "What is LangSmith?");
console.log("Response:", result.response);
console.log("User can provide feedback at:", result.feedbackUrl);

Human Review Workflow

// Reviewer processes annotation queue
async function processReviewQueue(queueId: string, reviewerId: string) {
  // Get queue size
  const size = await client.getSizeFromAnnotationQueue(queueId);
  console.log(`Queue has ${size} items to review`);

  // Process items one by one
  for (let i = 0; i < size; i++) {
    const queueItem = await client.getRunFromAnnotationQueue(queueId, i);
    const run = queueItem.run;

    console.log("\n=== Review Item ===");
    console.log("Input:", run.inputs);
    console.log("Output:", run.outputs);

    // Human reviewer provides feedback (your UI logic)
    const humanRating = await getHumanRating(); // Your UI
    const humanComment = await getHumanComment(); // Your UI

    // Log feedback
    await client.createFeedback(run.id, "human_quality_review", {
      score: humanRating / 5, // Normalize to 0-1,
      comment: humanComment,
      feedbackSourceType: "app",
      source_info: {,
      reviewer_id: reviewerId,
      review_timestamp: new Date().toISOString(),
      },
    });

    // Remove from queue after review
    await client.deleteRunFromAnnotationQueue(queueId, run.id);
  }

  console.log("Review queue processing complete");
}

Pattern 2: Privacy-First Production Deployment

Combines: Anonymization + Client Configuration + Sampling + Privacy Controls

Use Case: Deploy LLM application with comprehensive PII protection and optimal performance.

Complete Implementation

import { traceable } from "langsmith/traceable";
import { Client } from "langsmith";
import { createAnonymizer } from "langsmith/anonymizer";
import OpenAI from "openai";

// 1. Create comprehensive anonymizer
const anonymizer = createAnonymizer([
  // Email addresses
  { pattern: /\b[\w\.-]+@[\w\.-]+\.\w+\b/g, replace: "[EMAIL]" },
  // Phone numbers (US format)
  { pattern: /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/g, replace: "[PHONE]" },
  // SSNs
  { pattern: /\b\d{3}-\d{2}-\d{4}\b/g, replace: "[SSN]" },
  // Credit cards
  { pattern: /\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, replace: "[CARD]" },
  // API keys (various formats)
  { pattern: /\bsk-[a-zA-Z0-9]{32,}\b/g, replace: "[API_KEY]" },
  { pattern: /\bapi[-_]key["\s:=]+[a-zA-Z0-9]{20,}/gi, replace: "[API_KEY]" },
  // AWS keys
  { pattern: /\bAKIA[0-9A-Z]{16}\b/g, replace: "[AWS_KEY]" },
  // JWTs
  { pattern: /\beyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+/g, replace: "[JWT]" },
  // IP addresses
  { pattern: /\b(?:\d{1,3}\.){3}\d{1,3}\b/g, replace: "[IP]" }
], {
  // Exclude certain paths from anonymization
  exclude: ["metadata.user_id", "metadata.session_id"]
});

// 2. Configure privacy-focused client
const client = new Client({
  apiKey: process.env.LANGCHAIN_API_KEY,

  // Performance: sample in production
  tracingSamplingRate: process.env.NODE_ENV === "production" ? 0.1 : 1.0,

  // Privacy: anonymize all data
  anonymizer: anonymizer,

  // Privacy: hide specific fields
  hideInputs: (inputs) => {
    const { password, secret, token, ...safe } = inputs;
    return safe;
  },

  // Performance: optimize batching
  autoBatchTracing: true,
  batchSizeBytesLimit: 20_000_000,
  traceBatchConcurrency: 5,

  // Privacy: don't log runtime info
  omitTracedRuntimeInfo: true
});

// 3. Create traced function with privacy controls
const processUserData = traceable(
  async (input: {
    userEmail: string;
    userSSN: string;
    query: string;
    apiKey: string
  }) => {
    // Business logic
    const openai = new OpenAI({ apiKey: input.apiKey });

    const response = await openai.chat.completions.create({
      model: "gpt-4",
      messages: [{ role: "user", content: input.query }]
    });

    return {
      response: response.choices[0].message.content,
      userEmail: input.userEmail // Will be anonymized
    };
  },
  {
    name: "process-user-data",
    run_type: "chain",
    client: client,
    // Additional per-function anonymization
    processInputs: anonymizer,
    processOutputs: anonymizer,
    tags: ["production", "privacy-critical"]
  }
);

// 4. Usage with privacy guarantees
const result = await processUserData({
  userEmail: "user@example.com",       // → Logged as "[EMAIL]"
  userSSN: "123-45-6789",              // → Logged as "[SSN]"
  query: "What is my account balance?",
  apiKey: "sk-abc123xyz"               // → Logged as "[API_KEY]"
});

// Traces will show anonymized data:
// Input: { userEmail: "[EMAIL]", userSSN: "[SSN]", query: "...", apiKey: "[API_KEY]" }

// 5. Ensure flush before shutdown
process.on('SIGTERM', async () => {
  await client.awaitPendingTraceBatches();
  client.cleanup();
  process.exit(0);
});

Pattern 3: A/B Testing with Human Review

Combines: Evaluation + Comparative Experiments + Annotation Queues + Feedback

Use Case: Compare two models, run automated evaluation, then have humans review side-by-side to determine winner.

Complete Implementation

import { evaluate, evaluateComparative } from "langsmith/evaluation";
import { Client } from "langsmith";
import OpenAI from "openai";

const client = new Client();
const openai = new OpenAI();

// Define two model variants
async function modelA(input: { question: string }) {
  const response = await openai.chat.completions.create({
    model: "gpt-4",
    messages: [{ role: "user", content: input.question }],
    temperature: 0.7
  });
  return { answer: response.choices[0].message.content };
}

async function modelB(input: { question: string }) {
  const response = await openai.chat.completions.create({
    model: "gpt-3.5-turbo",
    messages: [{ role: "user", content: input.question }],
    temperature: 0.7
  });
  return { answer: response.choices[0].message.content };
}

// Create test dataset
const dataset = await client.createDataset({
  datasetName: "model-comparison-qa",
  description: "QA dataset for A/B testing",
  dataType: "kv"
});

await client.createExamples({
  datasetId: dataset.id,
  inputs: [
    { question: "What is machine learning?" },
    { question: "Explain neural networks" },
    { question: "What is deep learning?" }
  ],
  outputs: [
    { answer: "Machine learning is..." },
    { answer: "Neural networks are..." },
    { answer: "Deep learning is..." }
  ]
});

// Step 1: Run automated evaluation on both models
const experimentA = await evaluate(modelA, {
  data: "model-comparison-qa",
  evaluators: [
    ({ run, example }) => ({
      key: "correctness",
      score: run.outputs?.answer?.includes(example?.outputs?.answer) ? 1 : 0
    }),
    ({ run }) => ({
      key: "length",
      score: (run.outputs?.answer?.length || 0) > 50 ? 1 : 0
    })
  ],
  experimentPrefix: "gpt-4-variant",
  metadata: { model: "gpt-4", temperature: 0.7 }
});

const experimentB = await evaluate(modelB, {
  data: "model-comparison-qa",
  evaluators: [
    ({ run, example }) => ({
      key: "correctness",
      score: run.outputs?.answer?.includes(example?.outputs?.answer) ? 1 : 0
    }),
    ({ run }) => ({
      key: "length",
      score: (run.outputs?.answer?.length || 0) > 50 ? 1 : 0
    })
  ],
  experimentPrefix: "gpt-3.5-variant",
  metadata: { model: "gpt-3.5-turbo", temperature: 0.7 }
});

console.log("Experiment A:", experimentA.experimentName);
console.log("Experiment B:", experimentB.experimentName);

// Step 2: Create comparative experiment for side-by-side review
const comparison = await client.createComparativeExperiment({
  name: "GPT-4 vs GPT-3.5 Comparison",
  experimentIds: [experimentA.experimentName, experimentB.experimentName],
  description: "Comparing response quality and accuracy",
  metadata: {
    criteria: ["correctness", "helpfulness", "conciseness"],
    reviewers: ["team-lead", "domain-expert"]
  }
});

console.log("Comparison URL:", comparison.url);

// Step 3: Run comparative evaluation with automated evaluators
const comparativeResults = await evaluateComparative(
  [experimentA.experimentName, experimentB.experimentName],
  {
    comparativeEvaluators: [
      (runs, example) => {
        // Compare run outputs side-by-side
        const scoreA = scoreQuality(runs[0].outputs);
        const scoreB = scoreQuality(runs[1].outputs);

        return {
          key: "quality_winner",
          scores: [scoreA, scoreB],
          value: scoreA > scoreB ? "A" : scoreB > scoreA ? "B" : "Tie"
        };
      }
    ]
  }
);

// Step 4: Add runs to annotation queue for human review
const runIds = comparativeResults.results.flatMap(r => r.run_ids || []);

await client.addRunsToAnnotationQueue({
  queueId: qaQueue.id,
  runIds: runIds
});

console.log(`Added ${runIds.length} run pairs to human review queue`);

// Step 5: Human review process
async function conductHumanReview(queueId: string) {
  const size = await client.getSizeFromAnnotationQueue(queueId);

  for (let i = 0; i < size; i++) {
    const item = await client.getRunFromAnnotationQueue(queueId, i);
    const run = item.run;

    // Display to human reviewer (your UI logic)
    console.log(`\n=== Review ${i + 1}/${size} ===`);
    console.log("Input:", run.inputs);
    console.log("Model A Output:", run.outputs?.modelA);
    console.log("Model B Output:", run.outputs?.modelB);

    // Collect human judgment
    const winner = await askReviewer("Which model performed better? (A/B/Tie)");
    const rating = await askReviewer("Overall quality (1-5)?");
    const comment = await askReviewer("Comments?");

    // Log feedback
    await client.createFeedback(run.id, "human_preference", {
      value: winner,
      score: rating / 5,
      comment: comment,
      feedbackSourceType: "app",
      source_info: {,
      review_type: "comparative",
      comparison_id: comparison.id,
      },
    });

    // Remove from queue
    await client.deleteRunFromAnnotationQueue(queueId, run.id);
  }
}

// Helper function (implement based on your UI)
async function askReviewer(question: string): Promise<any> {
  // Your implementation
  return "A"; // Placeholder
}

function scoreQuality(outputs: any): number {
  // Your scoring logic
  return 0.8; // Placeholder
}

Pattern 2: Complete Observability Stack

Combines: OpenTelemetry + LangSmith Tracing + Data Anonymization

Use Case: Integrate LangSmith into existing observability stack with full privacy protection.

Complete Implementation

import { initializeOTEL } from "langsmith/experimental/otel/setup";
import { createAnonymizer } from "langsmith/anonymizer";
import { Client } from "langsmith";
import { traceable } from "langsmith/traceable";
import { HttpInstrumentation } from "@opentelemetry/instrumentation-http";
import { ExpressInstrumentation } from "@opentelemetry/instrumentation-express";

// Step 1: Initialize OpenTelemetry for distributed tracing
initializeOTEL({
  projectName: "production-app",
  instrumentations: [
    new HttpInstrumentation(),
    new ExpressInstrumentation(),
  ],
  // Optional: export to multiple backends
  exporters: ["langsmith", "jaeger"] // Multi-backend support
});

// Step 2: Create anonymizer for PII protection
const anonymizer = createAnonymizer([
  { pattern: /\b[\w\.-]+@[\w\.-]+\.\w+\b/g, replace: "[EMAIL]" },
  { pattern: /\bsk-[a-zA-Z0-9]+\b/g, replace: "[API_KEY]" },
  { pattern: /\b\d{3}-\d{2}-\d{4}\b/g, replace: "[SSN]" },
  { pattern: /\b\d{16}\b/g, replace: "[CARD]" }
]);

// Step 3: Configure client with privacy and performance
const client = new Client({
  apiKey: process.env.LANGSMITH_API_KEY,
  anonymizer: anonymizer,

  // Performance optimization
  autoBatchTracing: true,
  batchSizeBytesLimit: 20_000_000,
  traceBatchConcurrency: 10,
  tracingSamplingRate: 0.1,

  // Privacy controls
  hideInputs: (inputs) => {
    const { password, apiKey, ...safe } = inputs;
    return safe;
  },
  omitTracedRuntimeInfo: true
});

// Step 4: Create traced functions with full observability
const processUserQuery = traceable(
  async (input: { email: string; query: string; ssn?: string }) => {
    // Both OTEL and LangSmith will trace this
    const response = await handleQuery(input.query);

    return {
      response: response,
      userEmail: input.email,  // Will be anonymized to "[EMAIL]"
      confidence: response.confidence
    };
  },
  {
    name: "process_user_query",
    run_type: "chain",
    client: client,
    processInputs: anonymizer,   // Double anonymization for safety
    processOutputs: anonymizer,
    metadata: {
      environment: process.env.NODE_ENV,
      version: process.env.APP_VERSION
    },
    tags: ["production", "privacy-critical"]
  }
);

// Step 5: Express.js integration with full observability
import express from "express";

const app = express();
app.use(express.json());

app.post("/api/query", async (req, res) => {
  try {
    const result = await processUserQuery({
      email: req.body.email,       // → "[EMAIL]" in traces
      query: req.body.query,
      ssn: req.body.ssn           // → "[SSN]" in traces
    });

    res.json({ response: result.response });
  } catch (error) {
    // Error traced automatically
    res.status(500).json({ error: "Processing failed" });
  }
});

// Step 6: Graceful shutdown with trace upload
process.on('SIGTERM', async () => {
  console.log('SIGTERM received, flushing traces...');
  await client.awaitPendingTraceBatches();
  client.cleanup();
  process.exit(0);
});

app.listen(3000, () => {
  console.log("Server running with full observability");
  console.log("- OpenTelemetry: Distributed tracing");
  console.log("- LangSmith: LLM-specific tracing");
  console.log("- Anonymization: PII protection enabled");
  console.log("- Sampling: 10% in production");
});

// Helper (implement your logic)
async function handleQuery(query: string): Promise<any> {
  return { confidence: 0.9, content: "Response" };
}

Pattern 3: Multi-Provider LLM Router with Unified Tracing

Combines: Multiple SDK Wrappers + Project Organization + Performance Monitoring

Use Case: Route requests to different LLM providers based on requirements while maintaining unified tracing.

Complete Implementation

import { wrapOpenAI } from "langsmith/wrappers/openai";
import { wrapAnthropic } from "langsmith/wrappers/anthropic";
import { traceable } from "langsmith/traceable";
import { Client } from "langsmith";
import OpenAI from "openai";
import Anthropic from "@anthropic-ai/sdk";

const client = new Client({
  projectName: "multi-provider-llm"
});

// Wrap different providers
const openai = wrapOpenAI(new OpenAI(), {
  projectName: "multi-provider-llm",
  runName: "openai-call",
  tags: ["openai", "gpt"],
  metadata: { provider: "openai" }
});

const anthropic = wrapAnthropic(new Anthropic(), {
  project_name: "multi-provider-llm",
  name: "anthropic-call",
  tags: ["anthropic", "claude"],
  metadata: { provider: "anthropic" }
});

// Route requests based on requirements
const routeRequest = traceable(
  async (input: {
    prompt: string;
    requirements: {
      speed?: boolean;
      reasoning?: boolean;
      cost?: "low" | "high";
    }
  }) => {
    const { prompt, requirements } = input;

    // Routing logic
    if (requirements.reasoning) {
      // Use Claude for complex reasoning
      const message = await anthropic.messages.create({
        model: "claude-sonnet-4-20250514",
        max_tokens: 2000,
        messages: [{ role: "user", content: prompt }]
      });

      return {
        provider: "anthropic",
        model: "claude-sonnet-4",
        response: message.content[0].text,
        usage: message.usage
      };
    } else if (requirements.speed || requirements.cost === "low") {
      // Use GPT-3.5 for speed/cost
      const completion = await openai.chat.completions.create({
        model: "gpt-3.5-turbo",
        messages: [{ role: "user", content: prompt }]
      });

      return {
        provider: "openai",
        model: "gpt-3.5-turbo",
        response: completion.choices[0].message.content,
        usage: completion.usage
      };
    } else {
      // Default to GPT-4
      const completion = await openai.chat.completions.create({
        model: "gpt-4",
        messages: [{ role: "user", content: prompt }]
      });

      return {
        provider: "openai",
        model: "gpt-4",
        response: completion.choices[0].message.content,
        usage: completion.usage
      };
    }
  },
  {
    name: "llm-router",
    run_type: "chain",
    client: client,
    tags: ["router", "multi-provider"]
  }
);

// Usage examples
const response1 = await routeRequest({
  prompt: "Explain quantum computing",
  requirements: { reasoning: true } // Routes to Claude
});

const response2 = await routeRequest({
  prompt: "What is 2+2?",
  requirements: { speed: true } // Routes to GPT-3.5
});

const response3 = await routeRequest({
  prompt: "Write a poem",
  requirements: { cost: "low" } // Routes to GPT-3.5
});

// Analyze provider performance
async function analyzeProviderPerformance(projectName: string) {
  const stats = {
    openai: { count: 0, totalLatency: 0, totalCost: 0 },
    anthropic: { count: 0, totalLatency: 0, totalCost: 0 }
  };

  for await (const run of client.listRuns({
    projectName,
    filter: 'has(tags, "router")',
    limit: 1000
  })) {
    const provider = run.metadata?.provider;
    if (provider && stats[provider]) {
      stats[provider].count++;
      stats[provider].totalLatency += (run.end_time - run.start_time);
      stats[provider].totalCost += run.total_cost || 0;
    }
  }

  console.log("=== Provider Performance ===");
  for (const [provider, data] of Object.entries(stats)) {
    console.log(`${provider}:`);
    console.log(`  Requests: ${data.count}`);
    console.log(`  Avg Latency: ${data.totalLatency / data.count}ms`);
    console.log(`  Total Cost: $${data.totalCost.toFixed(4)}`);
  }
}

Pattern 4: Dataset Version Control with Evaluation Pipeline

Combines: Dataset Versioning + Evaluation + Comparative Analysis

Use Case: Maintain dataset versions, run evaluations, compare performance across versions.

Complete Implementation

import { Client } from "langsmith";
import { evaluate } from "langsmith/evaluation";

const client = new Client();

// Create initial dataset
const dataset = await client.createDataset({
  datasetName: "qa-dataset",
  description: "QA dataset with versioning",
  dataType: "kv"
});

// Version 1: Initial examples
await client.createExamples({
  datasetId: dataset.id,
  inputs: [
    { question: "What is AI?" },
    { question: "What is ML?" }
  ],
  outputs: [
    { answer: "Artificial Intelligence is..." },
    { answer: "Machine Learning is..." }
  ]
});

// Tag version 1
const v1Date = new Date();
await client.updateDatasetTag({
  datasetId: dataset.id,
  tag: "v1",
  asOf: v1Date
});

// Run baseline evaluation on v1
const baselineResults = await evaluate(myModel, {
  data: "qa-dataset",
  experimentPrefix: "baseline-v1",
  metadata: { dataset_version: "v1" }
});

console.log("Baseline accuracy:", baselineResults.results.filter(r => r.score === 1).length / baselineResults.results.length);

// Version 2: Add more examples (dataset auto-versions)
await client.createExamples({
  datasetId: dataset.id,
  inputs: [
    { question: "What is Deep Learning?" },
    { question: "What is NLP?" }
  ],
  outputs: [
    { answer: "Deep Learning is..." },
    { answer: "Natural Language Processing is..." }
  ]
});

// Tag version 2
const v2Date = new Date();
await client.updateDatasetTag({
  datasetId: dataset.id,
  tag: "v2",
  asOf: v2Date
});

// Compare versions
const diff = await client.diffDatasetVersions({
  datasetName: "qa-dataset",
  fromVersion: v1Date.toISOString(),
  toVersion: v2Date.toISOString()
});

console.log("Dataset diff:");
console.log("  Added:", diff.examples_added.length);
console.log("  Modified:", diff.examples_modified.length);
console.log("  Removed:", diff.examples_removed.length);

// Run evaluation on v2
const v2Results = await evaluate(myModel, {
  data: "qa-dataset",
  experimentPrefix: "evaluation-v2",
  metadata: { dataset_version: "v2" }
});

// Create comparative experiment between v1 and v2 results
const versionComparison = await client.createComparativeExperiment({
  name: "Dataset V1 vs V2 Performance",
  experimentIds: [
    baselineResults.experimentName,
    v2Results.experimentName
  ],
  description: "Compare model performance on different dataset versions"
});

console.log("Version comparison:", versionComparison.url);

// Read specific version later
const v1Snapshot = await client.readDatasetVersion({
  datasetName: "qa-dataset",
  asOf: v1Date
});

console.log("V1 had", v1Snapshot.example_count, "examples");

// Helper
async function myModel(input: any) {
  return { answer: "Generated answer" };
}

Pattern 5: Distributed Tracing Across Microservices

Combines: RunTree + Headers Propagation + Manual Tracing

Use Case: Trace LLM operations across multiple services while maintaining parent-child relationships.

Complete Implementation

import { RunTree } from "langsmith";
import { Client } from "langsmith";
import express from "express";
import axios from "axios";

const client = new Client();

// ========== Service A: API Gateway ==========
const serviceA = express();
serviceA.use(express.json());

serviceA.post("/api/process", async (req, res) => {
  // Create root run in Service A
  const rootRun = new RunTree({
    name: "api-gateway",
    run_type: "chain",
    inputs: { request: req.body },
    client: client,
    project_name: "microservices-trace"
  });

  try {
    // Export trace context as headers
    const traceHeaders = rootRun.toHeaders();

    // Call Service B with trace context
    const response = await axios.post(
      "http://service-b:3001/process",
      req.body,
      { headers: traceHeaders }
    );

    // End root run
    await rootRun.end({
      statusCode: 200,
      response: response.data
    });
    await rootRun.postRun();

    res.json(response.data);
  } catch (error) {
    await rootRun.end(undefined, error.message);
    await rootRun.postRun();

    res.status(500).json({ error: error.message });
  }
});

// ========== Service B: Processing Service ==========
const serviceB = express();
serviceB.use(express.json());

serviceB.post("/process", async (req, res) => {
  // Reconstruct run tree from headers
  const parentRun = RunTree.fromHeaders(req.headers, {
    name: "processing-service",
    run_type: "chain",
    client: client
  });

  if (!parentRun) {
    // No trace context - create new root
    console.warn("No trace context in headers");
  }

  const serviceRun = parentRun || new RunTree({
    name: "processing-service",
    run_type: "chain",
    inputs: { data: req.body },
    client: client
  });

  try {
    // Create child run for LLM call
    const llmRun = serviceRun.createChild({
      name: "llm-generation",
      run_type: "llm",
      inputs: { prompt: req.body.prompt }
    });

    // Simulate LLM call
    const llmResponse = await callLLM(req.body.prompt);

    await llmRun.end({ response: llmResponse });
    await llmRun.postRun();

    // End service run
    await serviceRun.end({ result: llmResponse });
    await serviceRun.postRun();

    res.json({ result: llmResponse });
  } catch (error) {
    await serviceRun.end(undefined, error.message);
    await serviceRun.postRun();

    res.status(500).json({ error: error.message });
  }
});

// Helper
async function callLLM(prompt: string) {
  return "LLM response";
}

// Start services
serviceA.listen(3000, () => console.log("Service A on :3000"));
serviceB.listen(3001, () => console.log("Service B on :3001"));

// When viewing traces in LangSmith:
// api-gateway (Service A)
//   └─ processing-service (Service B)
//       └─ llm-generation (LLM call in Service B)

Pattern 6: Continuous Evaluation Pipeline

Combines: Dataset Management + Automated Evaluation + Feedback + Alerts

Use Case: Continuously evaluate model on production data, alert on regressions.

Complete Implementation

import { Client } from "langsmith";
import { evaluate } from "langsmith/evaluation";
import { traceable } from "langsmith/traceable";

const client = new Client();

// Step 1: Collect production data into dataset
const productionDataset = await client.createDataset({
  datasetName: "production-golden-set",
  description: "Curated examples from production",
  dataType: "kv"
});

// Step 2: Automated production data collection
const productionBot = traceable(
  async (query: string) => {
    const response = await generateResponse(query);
    return response;
  },
  {
    name: "production-bot",
    tags: ["production"],
    metadata: { version: "2.0" }
  }
);

// Collect high-quality production runs as examples
async function collectGoldenExamples(minFeedbackScore = 0.9) {
  const goldenRuns = [];

  // Find runs with high user feedback
  for await (const feedback of client.listFeedback({
    feedbackKeys: ["user_rating"],
    has_score: true,
    limit: 100
  })) {
    if (feedback.score >= minFeedbackScore) {
      const run = await client.readRun(feedback.run_id);

      goldenRuns.push({
        inputs: run.inputs,
        outputs: run.outputs,
        metadata: {
          source_run_id: run.id,
          user_score: feedback.score,
          collected_at: new Date().toISOString()
        }
      });
    }
  }

  // Add to dataset
  if (goldenRuns.length > 0) {
    await client.createExamples({
      datasetId: productionDataset.id,
      examples: goldenRuns
    });

    console.log(`Added ${goldenRuns.length} golden examples from production`);
  }
}

// Step 3: Scheduled evaluation (run daily)
async function runDailyEvaluation() {
  const results = await evaluate(productionBot, {
    data: "production-golden-set",
    evaluators: [
      ({ run, example }) => ({
        key: "correctness",
        score: calculateSimilarity(run.outputs, example?.outputs)
      }),
      ({ run }) => ({
        key: "latency",
        score: (run.end_time - run.start_time) < 2000 ? 1 : 0
      })
    ],
    experimentPrefix: `daily-eval-${new Date().toISOString().split('T')[0]}`,
    metadata: {
      type: "daily-regression-test",
      dataset_size: (await client.readDataset({ datasetName: "production-golden-set" })).example_count
    }
  });

  // Calculate metrics
  const correctnessScores = results.results
    .map(r => r.evaluation_results.find(e => e.key === "correctness")?.score || 0);

  const averageCorrectness = correctnessScores.reduce((a, b) => a + b, 0) / correctnessScores.length;

  console.log("=== Daily Evaluation Results ===");
  console.log("Average Correctness:", averageCorrectness);

  // Alert on regression
  const regressionThreshold = 0.8;
  if (averageCorrectness < regressionThreshold) {
    await sendAlert({
      severity: "high",
      message: `Model regression detected: correctness ${averageCorrectness} < threshold ${regressionThreshold}`,
      experimentUrl: `https://smith.langchain.com/experiments/${results.experimentName}`
    });
  }

  return results;
}

// Step 4: Continuous improvement loop
async function continuousImprovementLoop() {
  while (true) {
    // Collect new golden examples weekly
    await collectGoldenExamples(0.9);

    // Run evaluation daily
    const results = await runDailyEvaluation();

    // Wait 24 hours
    await new Promise(resolve => setTimeout(resolve, 24 * 60 * 60 * 1000));
  }
}

// Helpers
function calculateSimilarity(output: any, expected: any): number {
  return 0.85; // Your similarity logic
}

async function sendAlert(alert: any) {
  console.error("🚨 ALERT:", alert.message);
  // Send to Slack, PagerDuty, etc.
}

async function generateResponse(query: string) {
  return { answer: "Generated response" };
}

Pattern 7: LangChain + LangSmith Full Integration

Combines: LangChain Callbacks + Traceable + Custom Chains

Use Case: Build LangChain application with full LangSmith observability.

Complete Implementation

import { traceable } from "langsmith/traceable";
import { getLangchainCallbacks, RunnableTraceable } from "langsmith/langchain";
import { ChatOpenAI } from "@langchain/openai";
import { PromptTemplate } from "@langchain/core/prompts";
import { StringOutputParser } from "@langchain/core/output_parsers";
import { RunnableSequence } from "@langchain/core/runnables";

// Define traceable custom logic
const preprocessInput = traceable(
  async (input: string) => {
    return input.trim().toLowerCase();
  },
  { name: "preprocess-input", run_type: "tool" }
);

const postprocessOutput = traceable(
  async (output: string) => {
    return output.toUpperCase();
  },
  { name: "postprocess-output", run_type: "tool" }
);

// Build LangChain pipeline with tracing
const buildTracedPipeline = traceable(
  async (query: string) => {
    // Get callbacks for LangChain integration
    const callbacks = getLangchainCallbacks();

    // Step 1: Preprocess (traced as child)
    const processed = await preprocessInput(query);

    // Step 2: LangChain pipeline (traced via callbacks)
    const prompt = PromptTemplate.fromTemplate(
      "Answer this question concisely: {question}"
    );
    const model = new ChatOpenAI({ temperature: 0 });
    const parser = new StringOutputParser();

    const chain = prompt.pipe(model).pipe(parser);

    const llmResponse = await chain.invoke(
      { question: processed },
      { callbacks }
    );

    // Step 3: Postprocess (traced as child)
    const final = await postprocessOutput(llmResponse);

    return final;
  },
  {
    name: "langchain-pipeline",
    run_type: "chain",
    tags: ["langchain", "production"]
  }
);

// Alternative: Wrap traceable as Runnable
const traceableAsRunnable = RunnableTraceable.from(
  traceable(async (input: string) => {
    return `Processed: ${input}`;
  }, { name: "custom-step" })
);

// Use in LangChain sequences
const fullChain = RunnableSequence.from([
  preprocessInput,
  traceableAsRunnable,
  // Can mix traceable and LangChain runnables
]);

// Execute
const result = await buildTracedPipeline("What is LangSmith?");
console.log("Result:", result);

// View complete trace hierarchy:
// langchain-pipeline
//   ├─ preprocess-input
//   ├─ PromptTemplate (via callbacks)
//   ├─ ChatOpenAI (via callbacks)
//   ├─ StringOutputParser (via callbacks)
//   └─ postprocess-output

Pattern 8: Cost-Aware Evaluation with Budget Limits

Combines: Evaluation + Run Stats + Token Tracking

Use Case: Run evaluations while tracking and limiting costs.

Complete Implementation

import { Client } from "langsmith";
import { evaluate } from "langsmith/evaluation";
import OpenAI from "openai";

const client = new Client();
const openai = new OpenAI();

// Configuration
const COST_PER_1K_INPUT_TOKENS = 0.03;
const COST_PER_1K_OUTPUT_TOKENS = 0.06;
const MAX_EVALUATION_COST = 10.00; // $10 budget

let totalCost = 0;

// Cost-tracking evaluator
async function costAwareModel(input: { question: string }) {
  // Check budget before call
  if (totalCost >= MAX_EVALUATION_COST) {
    throw new Error(`Budget exceeded: $${totalCost.toFixed(2)} >= $${MAX_EVALUATION_COST}`);
  }

  const completion = await openai.chat.completions.create({
    model: "gpt-4",
    messages: [{ role: "user", content: input.question }]
  });

  // Track cost
  const usage = completion.usage;
  if (usage) {
    const callCost =
      (usage.prompt_tokens / 1000) * COST_PER_1K_INPUT_TOKENS +
      (usage.completion_tokens / 1000) * COST_PER_1K_OUTPUT_TOKENS;

    totalCost += callCost;
    console.log(`Call cost: $${callCost.toFixed(4)}, Total: $${totalCost.toFixed(4)}`);
  }

  return { answer: completion.choices[0].message.content };
}

// Run evaluation with cost tracking
try {
  const results = await evaluate(costAwareModel, {
    data: "qa-dataset",
    evaluators: [
      ({ run, example }) => ({
        key: "correctness",
        score: run.outputs?.answer === example?.outputs?.answer ? 1 : 0
      })
    ],
    experimentPrefix: "cost-aware-eval",
    metadata: {
      max_budget: MAX_EVALUATION_COST,
      cost_per_1k_input: COST_PER_1K_INPUT_TOKENS,
      cost_per_1k_output: COST_PER_1K_OUTPUT_TOKENS
    },
    max_concurrency: 1 // Sequential to track cost accurately
  });

  console.log("=== Evaluation Complete ===");
  console.log("Total cost:", `$${totalCost.toFixed(2)}`);
  console.log("Budget remaining:", `$${(MAX_EVALUATION_COST - totalCost).toFixed(2)}`);

  // Get detailed cost breakdown from LangSmith
  const stats = await client.getRunStats({
    projectName: "default",
    filter: `eq(name, "${results.experimentName}")`
  });

  console.log("Total tokens:", stats.total_tokens);
  console.log("Total cost (from LangSmith):", `$${stats.total_cost?.toFixed(4) || 0}`);
} catch (error) {
  console.error("Evaluation stopped:", error.message);
  console.log("Spent before stopping:", `$${totalCost.toFixed(2)}`);
}

Pattern 9: Multi-Turn Conversation with Feedback

Combines: Tracing + Conversation Threading + Feedback

Use Case: Track multi-turn conversations with session management and per-turn feedback.

Complete Implementation

import { traceable, getCurrentRunTree } from "langsmith/traceable";
import { Client } from "langsmith";
import OpenAI from "openai";

const client = new Client();
const openai = new OpenAI();

interface ConversationTurn {
  role: "user" | "assistant";
  content: string;
}

// Traced conversation manager
const conversationSession = traceable(
  async (sessionId: string, userMessage: string, history: ConversationTurn[]) => {
    const runTree = getCurrentRunTree();

    // Add conversation metadata
    runTree.metadata = {
      ...runTree.metadata,
      conversation_id: sessionId,
      turn_number: Math.floor(history.length / 2) + 1
    };

    // Build conversation context
    const messages = [
      ...history,
      { role: "user" as const, content: userMessage }
    ];

    const completion = await openai.chat.completions.create({
      model: "gpt-4",
      messages: messages
    });

    const assistantMessage = completion.choices[0].message.content;

    return {
      message: assistantMessage,
      conversationId: sessionId,
      turnNumber: runTree.metadata.turn_number
    };
  },
  {
    name: "conversation-turn",
    run_type: "chain",
    tags: ["conversation", "multi-turn"]
  }
);

// Conversation manager class
class ConversationManager {
  private sessions = new Map<string, ConversationTurn[]>();

  async sendMessage(sessionId: string, userMessage: string) {
    // Get or create conversation history
    const history = this.sessions.get(sessionId) || [];

    // Process turn with tracing
    const result = await conversationSession(sessionId, userMessage, history);

    // Update history
    history.push(
      { role: "user", content: userMessage },
      { role: "assistant", content: result.message }
    );
    this.sessions.set(sessionId, history);

    return result;
  }

  async collectTurnFeedback(sessionId: string, runId: string, rating: number) {
    await client.createFeedback(runId, "turn_quality", {
      score: rating / 5,
      value: rating,
      feedbackSourceType: "app",
      source_info: {,
      conversation_id: sessionId,
      feedback_type: "per-turn",
      },
    });
  }

  async getConversationHistory(sessionId: string) {
    return this.sessions.get(sessionId) || [];
  }

  // Query all turns in a conversation
  async getConversationRuns(sessionId: string) {
    const runs = [];
    for await (const run of client.listRuns({
      filter: `eq(metadata.conversation_id, "${sessionId}")`,
      order: "asc"
    })) {
      runs.push(run);
    }
    return runs;
  }
}

// Usage
const manager = new ConversationManager();
const sessionId = "session-123";

// Turn 1
const turn1 = await manager.sendMessage(sessionId, "Hello!");
console.log("Bot:", turn1.message);
await manager.collectTurnFeedback(sessionId, turn1.runId, 5);

// Turn 2 (has context from turn 1)
const turn2 = await manager.sendMessage(sessionId, "What did I just say?");
console.log("Bot:", turn2.message);

// Get all conversation runs
const conversationRuns = await manager.getConversationRuns(sessionId);
console.log(`Conversation has ${conversationRuns.length} turns`);

// Helper
async function generateResponse(query: string) {
  return "Response";
}

Related Documentation

  • Tracing Guide - Tracing fundamentals
  • Evaluation Guide - Evaluation strategies
  • Workflows - Common workflows
  • Advanced Features - Advanced feature overview
  • Decision Trees - Choosing the right approach
  • Anti-Patterns - What to avoid
  • Error Handling - Error recovery patterns