tessl install tessl/npm-langsmith@0.4.3TypeScript client SDK for the LangSmith LLM tracing, evaluation, and monitoring platform.
Advanced patterns combining multiple LangSmith features for real-world use cases.
This guide demonstrates how to combine LangSmith features (tracing, evaluation, feedback, anonymization, annotation queues, etc.) to build robust production systems. Each pattern is extracted from production use cases and shows complete working implementations.
Combines: Tracing + Feedback Collection + Annotation Queues
Use Case: Monitor production LLM application, collect user feedback, and queue low-confidence outputs for human review.
import { traceable } from "langsmith/traceable";
import { Client } from "langsmith";
import OpenAI from "openai";
const client = new Client({
projectName: "production-chatbot",
tracingSamplingRate: 0.1, // Sample 10% in production
hideInputs: false,
hideOutputs: false
});
const openai = new OpenAI();
// Create annotation queue for low-confidence outputs
const qaQueue = await client.createAnnotationQueue({
name: "Low Confidence Review",
description: "Human review queue for outputs with confidence < 0.7",
rubricInstructions: "Rate: 1=Poor, 2=Fair, 3=Good, 4=Great, 5=Excellent"
});
// Traced chatbot function
const chatbot = traceable(
async (userMessage: string) => {
const completion = await openai.chat.completions.create({
model: "gpt-4",
messages: [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: userMessage }
],
temperature: 0.7
});
const response = completion.choices[0].message.content;
// Calculate confidence (example logic)
const confidence = calculateConfidence(response);
return { response, confidence };
},
{
name: "production-chatbot",
run_type: "chain",
client: client,
metadata: {
environment: "production",
version: "2.1.0"
},
tags: ["production", "customer-facing"]
}
);
// Production workflow with feedback loop
async function handleUserQuery(userId: string, message: string) {
// Execute chatbot with tracing
const result = await chatbot(message);
const runId = result.runId; // From trace context
// Queue low-confidence outputs for human review
if (result.confidence < 0.7) {
await client.addRunsToAnnotationQueue({
queueId: qaQueue.id,
runIds: [runId]
});
console.log(`Low confidence (${result.confidence}) - queued for review`);
}
// Create presigned token for user feedback
const feedbackToken = await client.createPresignedFeedbackToken(
runId,
"user_satisfaction",
{
expiration: new Date(Date.now() + 24 * 60 * 60 * 1000), // 24 hours
feedbackConfig: {
type: "continuous",
min: 1,
max: 5
}
}
);
return {
response: result.response,
feedbackUrl: feedbackToken.url, // Return to frontend
confidence: result.confidence,
queuedForReview: result.confidence < 0.7
};
}
// Helper: Calculate confidence score
function calculateConfidence(response: string): number {
// Example: based on response length, hedging words, etc.
if (response.includes("I'm not sure") || response.includes("maybe")) {
return 0.5;
}
return response.length > 50 ? 0.9 : 0.6;
}
// Example usage
const result = await handleUserQuery("user-123", "What is LangSmith?");
console.log("Response:", result.response);
console.log("User can provide feedback at:", result.feedbackUrl);// Reviewer processes annotation queue
async function processReviewQueue(queueId: string, reviewerId: string) {
// Get queue size
const size = await client.getSizeFromAnnotationQueue(queueId);
console.log(`Queue has ${size} items to review`);
// Process items one by one
for (let i = 0; i < size; i++) {
const queueItem = await client.getRunFromAnnotationQueue(queueId, i);
const run = queueItem.run;
console.log("\n=== Review Item ===");
console.log("Input:", run.inputs);
console.log("Output:", run.outputs);
// Human reviewer provides feedback (your UI logic)
const humanRating = await getHumanRating(); // Your UI
const humanComment = await getHumanComment(); // Your UI
// Log feedback
await client.createFeedback(run.id, "human_quality_review", {
score: humanRating / 5, // Normalize to 0-1,
comment: humanComment,
feedbackSourceType: "app",
source_info: {,
reviewer_id: reviewerId,
review_timestamp: new Date().toISOString(),
},
});
// Remove from queue after review
await client.deleteRunFromAnnotationQueue(queueId, run.id);
}
console.log("Review queue processing complete");
}Combines: Anonymization + Client Configuration + Sampling + Privacy Controls
Use Case: Deploy LLM application with comprehensive PII protection and optimal performance.
import { traceable } from "langsmith/traceable";
import { Client } from "langsmith";
import { createAnonymizer } from "langsmith/anonymizer";
import OpenAI from "openai";
// 1. Create comprehensive anonymizer
const anonymizer = createAnonymizer([
// Email addresses
{ pattern: /\b[\w\.-]+@[\w\.-]+\.\w+\b/g, replace: "[EMAIL]" },
// Phone numbers (US format)
{ pattern: /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/g, replace: "[PHONE]" },
// SSNs
{ pattern: /\b\d{3}-\d{2}-\d{4}\b/g, replace: "[SSN]" },
// Credit cards
{ pattern: /\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, replace: "[CARD]" },
// API keys (various formats)
{ pattern: /\bsk-[a-zA-Z0-9]{32,}\b/g, replace: "[API_KEY]" },
{ pattern: /\bapi[-_]key["\s:=]+[a-zA-Z0-9]{20,}/gi, replace: "[API_KEY]" },
// AWS keys
{ pattern: /\bAKIA[0-9A-Z]{16}\b/g, replace: "[AWS_KEY]" },
// JWTs
{ pattern: /\beyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+/g, replace: "[JWT]" },
// IP addresses
{ pattern: /\b(?:\d{1,3}\.){3}\d{1,3}\b/g, replace: "[IP]" }
], {
// Exclude certain paths from anonymization
exclude: ["metadata.user_id", "metadata.session_id"]
});
// 2. Configure privacy-focused client
const client = new Client({
apiKey: process.env.LANGCHAIN_API_KEY,
// Performance: sample in production
tracingSamplingRate: process.env.NODE_ENV === "production" ? 0.1 : 1.0,
// Privacy: anonymize all data
anonymizer: anonymizer,
// Privacy: hide specific fields
hideInputs: (inputs) => {
const { password, secret, token, ...safe } = inputs;
return safe;
},
// Performance: optimize batching
autoBatchTracing: true,
batchSizeBytesLimit: 20_000_000,
traceBatchConcurrency: 5,
// Privacy: don't log runtime info
omitTracedRuntimeInfo: true
});
// 3. Create traced function with privacy controls
const processUserData = traceable(
async (input: {
userEmail: string;
userSSN: string;
query: string;
apiKey: string
}) => {
// Business logic
const openai = new OpenAI({ apiKey: input.apiKey });
const response = await openai.chat.completions.create({
model: "gpt-4",
messages: [{ role: "user", content: input.query }]
});
return {
response: response.choices[0].message.content,
userEmail: input.userEmail // Will be anonymized
};
},
{
name: "process-user-data",
run_type: "chain",
client: client,
// Additional per-function anonymization
processInputs: anonymizer,
processOutputs: anonymizer,
tags: ["production", "privacy-critical"]
}
);
// 4. Usage with privacy guarantees
const result = await processUserData({
userEmail: "user@example.com", // → Logged as "[EMAIL]"
userSSN: "123-45-6789", // → Logged as "[SSN]"
query: "What is my account balance?",
apiKey: "sk-abc123xyz" // → Logged as "[API_KEY]"
});
// Traces will show anonymized data:
// Input: { userEmail: "[EMAIL]", userSSN: "[SSN]", query: "...", apiKey: "[API_KEY]" }
// 5. Ensure flush before shutdown
process.on('SIGTERM', async () => {
await client.awaitPendingTraceBatches();
client.cleanup();
process.exit(0);
});Combines: Evaluation + Comparative Experiments + Annotation Queues + Feedback
Use Case: Compare two models, run automated evaluation, then have humans review side-by-side to determine winner.
import { evaluate, evaluateComparative } from "langsmith/evaluation";
import { Client } from "langsmith";
import OpenAI from "openai";
const client = new Client();
const openai = new OpenAI();
// Define two model variants
async function modelA(input: { question: string }) {
const response = await openai.chat.completions.create({
model: "gpt-4",
messages: [{ role: "user", content: input.question }],
temperature: 0.7
});
return { answer: response.choices[0].message.content };
}
async function modelB(input: { question: string }) {
const response = await openai.chat.completions.create({
model: "gpt-3.5-turbo",
messages: [{ role: "user", content: input.question }],
temperature: 0.7
});
return { answer: response.choices[0].message.content };
}
// Create test dataset
const dataset = await client.createDataset({
datasetName: "model-comparison-qa",
description: "QA dataset for A/B testing",
dataType: "kv"
});
await client.createExamples({
datasetId: dataset.id,
inputs: [
{ question: "What is machine learning?" },
{ question: "Explain neural networks" },
{ question: "What is deep learning?" }
],
outputs: [
{ answer: "Machine learning is..." },
{ answer: "Neural networks are..." },
{ answer: "Deep learning is..." }
]
});
// Step 1: Run automated evaluation on both models
const experimentA = await evaluate(modelA, {
data: "model-comparison-qa",
evaluators: [
({ run, example }) => ({
key: "correctness",
score: run.outputs?.answer?.includes(example?.outputs?.answer) ? 1 : 0
}),
({ run }) => ({
key: "length",
score: (run.outputs?.answer?.length || 0) > 50 ? 1 : 0
})
],
experimentPrefix: "gpt-4-variant",
metadata: { model: "gpt-4", temperature: 0.7 }
});
const experimentB = await evaluate(modelB, {
data: "model-comparison-qa",
evaluators: [
({ run, example }) => ({
key: "correctness",
score: run.outputs?.answer?.includes(example?.outputs?.answer) ? 1 : 0
}),
({ run }) => ({
key: "length",
score: (run.outputs?.answer?.length || 0) > 50 ? 1 : 0
})
],
experimentPrefix: "gpt-3.5-variant",
metadata: { model: "gpt-3.5-turbo", temperature: 0.7 }
});
console.log("Experiment A:", experimentA.experimentName);
console.log("Experiment B:", experimentB.experimentName);
// Step 2: Create comparative experiment for side-by-side review
const comparison = await client.createComparativeExperiment({
name: "GPT-4 vs GPT-3.5 Comparison",
experimentIds: [experimentA.experimentName, experimentB.experimentName],
description: "Comparing response quality and accuracy",
metadata: {
criteria: ["correctness", "helpfulness", "conciseness"],
reviewers: ["team-lead", "domain-expert"]
}
});
console.log("Comparison URL:", comparison.url);
// Step 3: Run comparative evaluation with automated evaluators
const comparativeResults = await evaluateComparative(
[experimentA.experimentName, experimentB.experimentName],
{
comparativeEvaluators: [
(runs, example) => {
// Compare run outputs side-by-side
const scoreA = scoreQuality(runs[0].outputs);
const scoreB = scoreQuality(runs[1].outputs);
return {
key: "quality_winner",
scores: [scoreA, scoreB],
value: scoreA > scoreB ? "A" : scoreB > scoreA ? "B" : "Tie"
};
}
]
}
);
// Step 4: Add runs to annotation queue for human review
const runIds = comparativeResults.results.flatMap(r => r.run_ids || []);
await client.addRunsToAnnotationQueue({
queueId: qaQueue.id,
runIds: runIds
});
console.log(`Added ${runIds.length} run pairs to human review queue`);
// Step 5: Human review process
async function conductHumanReview(queueId: string) {
const size = await client.getSizeFromAnnotationQueue(queueId);
for (let i = 0; i < size; i++) {
const item = await client.getRunFromAnnotationQueue(queueId, i);
const run = item.run;
// Display to human reviewer (your UI logic)
console.log(`\n=== Review ${i + 1}/${size} ===`);
console.log("Input:", run.inputs);
console.log("Model A Output:", run.outputs?.modelA);
console.log("Model B Output:", run.outputs?.modelB);
// Collect human judgment
const winner = await askReviewer("Which model performed better? (A/B/Tie)");
const rating = await askReviewer("Overall quality (1-5)?");
const comment = await askReviewer("Comments?");
// Log feedback
await client.createFeedback(run.id, "human_preference", {
value: winner,
score: rating / 5,
comment: comment,
feedbackSourceType: "app",
source_info: {,
review_type: "comparative",
comparison_id: comparison.id,
},
});
// Remove from queue
await client.deleteRunFromAnnotationQueue(queueId, run.id);
}
}
// Helper function (implement based on your UI)
async function askReviewer(question: string): Promise<any> {
// Your implementation
return "A"; // Placeholder
}
function scoreQuality(outputs: any): number {
// Your scoring logic
return 0.8; // Placeholder
}Combines: OpenTelemetry + LangSmith Tracing + Data Anonymization
Use Case: Integrate LangSmith into existing observability stack with full privacy protection.
import { initializeOTEL } from "langsmith/experimental/otel/setup";
import { createAnonymizer } from "langsmith/anonymizer";
import { Client } from "langsmith";
import { traceable } from "langsmith/traceable";
import { HttpInstrumentation } from "@opentelemetry/instrumentation-http";
import { ExpressInstrumentation } from "@opentelemetry/instrumentation-express";
// Step 1: Initialize OpenTelemetry for distributed tracing
initializeOTEL({
projectName: "production-app",
instrumentations: [
new HttpInstrumentation(),
new ExpressInstrumentation(),
],
// Optional: export to multiple backends
exporters: ["langsmith", "jaeger"] // Multi-backend support
});
// Step 2: Create anonymizer for PII protection
const anonymizer = createAnonymizer([
{ pattern: /\b[\w\.-]+@[\w\.-]+\.\w+\b/g, replace: "[EMAIL]" },
{ pattern: /\bsk-[a-zA-Z0-9]+\b/g, replace: "[API_KEY]" },
{ pattern: /\b\d{3}-\d{2}-\d{4}\b/g, replace: "[SSN]" },
{ pattern: /\b\d{16}\b/g, replace: "[CARD]" }
]);
// Step 3: Configure client with privacy and performance
const client = new Client({
apiKey: process.env.LANGSMITH_API_KEY,
anonymizer: anonymizer,
// Performance optimization
autoBatchTracing: true,
batchSizeBytesLimit: 20_000_000,
traceBatchConcurrency: 10,
tracingSamplingRate: 0.1,
// Privacy controls
hideInputs: (inputs) => {
const { password, apiKey, ...safe } = inputs;
return safe;
},
omitTracedRuntimeInfo: true
});
// Step 4: Create traced functions with full observability
const processUserQuery = traceable(
async (input: { email: string; query: string; ssn?: string }) => {
// Both OTEL and LangSmith will trace this
const response = await handleQuery(input.query);
return {
response: response,
userEmail: input.email, // Will be anonymized to "[EMAIL]"
confidence: response.confidence
};
},
{
name: "process_user_query",
run_type: "chain",
client: client,
processInputs: anonymizer, // Double anonymization for safety
processOutputs: anonymizer,
metadata: {
environment: process.env.NODE_ENV,
version: process.env.APP_VERSION
},
tags: ["production", "privacy-critical"]
}
);
// Step 5: Express.js integration with full observability
import express from "express";
const app = express();
app.use(express.json());
app.post("/api/query", async (req, res) => {
try {
const result = await processUserQuery({
email: req.body.email, // → "[EMAIL]" in traces
query: req.body.query,
ssn: req.body.ssn // → "[SSN]" in traces
});
res.json({ response: result.response });
} catch (error) {
// Error traced automatically
res.status(500).json({ error: "Processing failed" });
}
});
// Step 6: Graceful shutdown with trace upload
process.on('SIGTERM', async () => {
console.log('SIGTERM received, flushing traces...');
await client.awaitPendingTraceBatches();
client.cleanup();
process.exit(0);
});
app.listen(3000, () => {
console.log("Server running with full observability");
console.log("- OpenTelemetry: Distributed tracing");
console.log("- LangSmith: LLM-specific tracing");
console.log("- Anonymization: PII protection enabled");
console.log("- Sampling: 10% in production");
});
// Helper (implement your logic)
async function handleQuery(query: string): Promise<any> {
return { confidence: 0.9, content: "Response" };
}Combines: Multiple SDK Wrappers + Project Organization + Performance Monitoring
Use Case: Route requests to different LLM providers based on requirements while maintaining unified tracing.
import { wrapOpenAI } from "langsmith/wrappers/openai";
import { wrapAnthropic } from "langsmith/wrappers/anthropic";
import { traceable } from "langsmith/traceable";
import { Client } from "langsmith";
import OpenAI from "openai";
import Anthropic from "@anthropic-ai/sdk";
const client = new Client({
projectName: "multi-provider-llm"
});
// Wrap different providers
const openai = wrapOpenAI(new OpenAI(), {
projectName: "multi-provider-llm",
runName: "openai-call",
tags: ["openai", "gpt"],
metadata: { provider: "openai" }
});
const anthropic = wrapAnthropic(new Anthropic(), {
project_name: "multi-provider-llm",
name: "anthropic-call",
tags: ["anthropic", "claude"],
metadata: { provider: "anthropic" }
});
// Route requests based on requirements
const routeRequest = traceable(
async (input: {
prompt: string;
requirements: {
speed?: boolean;
reasoning?: boolean;
cost?: "low" | "high";
}
}) => {
const { prompt, requirements } = input;
// Routing logic
if (requirements.reasoning) {
// Use Claude for complex reasoning
const message = await anthropic.messages.create({
model: "claude-sonnet-4-20250514",
max_tokens: 2000,
messages: [{ role: "user", content: prompt }]
});
return {
provider: "anthropic",
model: "claude-sonnet-4",
response: message.content[0].text,
usage: message.usage
};
} else if (requirements.speed || requirements.cost === "low") {
// Use GPT-3.5 for speed/cost
const completion = await openai.chat.completions.create({
model: "gpt-3.5-turbo",
messages: [{ role: "user", content: prompt }]
});
return {
provider: "openai",
model: "gpt-3.5-turbo",
response: completion.choices[0].message.content,
usage: completion.usage
};
} else {
// Default to GPT-4
const completion = await openai.chat.completions.create({
model: "gpt-4",
messages: [{ role: "user", content: prompt }]
});
return {
provider: "openai",
model: "gpt-4",
response: completion.choices[0].message.content,
usage: completion.usage
};
}
},
{
name: "llm-router",
run_type: "chain",
client: client,
tags: ["router", "multi-provider"]
}
);
// Usage examples
const response1 = await routeRequest({
prompt: "Explain quantum computing",
requirements: { reasoning: true } // Routes to Claude
});
const response2 = await routeRequest({
prompt: "What is 2+2?",
requirements: { speed: true } // Routes to GPT-3.5
});
const response3 = await routeRequest({
prompt: "Write a poem",
requirements: { cost: "low" } // Routes to GPT-3.5
});
// Analyze provider performance
async function analyzeProviderPerformance(projectName: string) {
const stats = {
openai: { count: 0, totalLatency: 0, totalCost: 0 },
anthropic: { count: 0, totalLatency: 0, totalCost: 0 }
};
for await (const run of client.listRuns({
projectName,
filter: 'has(tags, "router")',
limit: 1000
})) {
const provider = run.metadata?.provider;
if (provider && stats[provider]) {
stats[provider].count++;
stats[provider].totalLatency += (run.end_time - run.start_time);
stats[provider].totalCost += run.total_cost || 0;
}
}
console.log("=== Provider Performance ===");
for (const [provider, data] of Object.entries(stats)) {
console.log(`${provider}:`);
console.log(` Requests: ${data.count}`);
console.log(` Avg Latency: ${data.totalLatency / data.count}ms`);
console.log(` Total Cost: $${data.totalCost.toFixed(4)}`);
}
}Combines: Dataset Versioning + Evaluation + Comparative Analysis
Use Case: Maintain dataset versions, run evaluations, compare performance across versions.
import { Client } from "langsmith";
import { evaluate } from "langsmith/evaluation";
const client = new Client();
// Create initial dataset
const dataset = await client.createDataset({
datasetName: "qa-dataset",
description: "QA dataset with versioning",
dataType: "kv"
});
// Version 1: Initial examples
await client.createExamples({
datasetId: dataset.id,
inputs: [
{ question: "What is AI?" },
{ question: "What is ML?" }
],
outputs: [
{ answer: "Artificial Intelligence is..." },
{ answer: "Machine Learning is..." }
]
});
// Tag version 1
const v1Date = new Date();
await client.updateDatasetTag({
datasetId: dataset.id,
tag: "v1",
asOf: v1Date
});
// Run baseline evaluation on v1
const baselineResults = await evaluate(myModel, {
data: "qa-dataset",
experimentPrefix: "baseline-v1",
metadata: { dataset_version: "v1" }
});
console.log("Baseline accuracy:", baselineResults.results.filter(r => r.score === 1).length / baselineResults.results.length);
// Version 2: Add more examples (dataset auto-versions)
await client.createExamples({
datasetId: dataset.id,
inputs: [
{ question: "What is Deep Learning?" },
{ question: "What is NLP?" }
],
outputs: [
{ answer: "Deep Learning is..." },
{ answer: "Natural Language Processing is..." }
]
});
// Tag version 2
const v2Date = new Date();
await client.updateDatasetTag({
datasetId: dataset.id,
tag: "v2",
asOf: v2Date
});
// Compare versions
const diff = await client.diffDatasetVersions({
datasetName: "qa-dataset",
fromVersion: v1Date.toISOString(),
toVersion: v2Date.toISOString()
});
console.log("Dataset diff:");
console.log(" Added:", diff.examples_added.length);
console.log(" Modified:", diff.examples_modified.length);
console.log(" Removed:", diff.examples_removed.length);
// Run evaluation on v2
const v2Results = await evaluate(myModel, {
data: "qa-dataset",
experimentPrefix: "evaluation-v2",
metadata: { dataset_version: "v2" }
});
// Create comparative experiment between v1 and v2 results
const versionComparison = await client.createComparativeExperiment({
name: "Dataset V1 vs V2 Performance",
experimentIds: [
baselineResults.experimentName,
v2Results.experimentName
],
description: "Compare model performance on different dataset versions"
});
console.log("Version comparison:", versionComparison.url);
// Read specific version later
const v1Snapshot = await client.readDatasetVersion({
datasetName: "qa-dataset",
asOf: v1Date
});
console.log("V1 had", v1Snapshot.example_count, "examples");
// Helper
async function myModel(input: any) {
return { answer: "Generated answer" };
}Combines: RunTree + Headers Propagation + Manual Tracing
Use Case: Trace LLM operations across multiple services while maintaining parent-child relationships.
import { RunTree } from "langsmith";
import { Client } from "langsmith";
import express from "express";
import axios from "axios";
const client = new Client();
// ========== Service A: API Gateway ==========
const serviceA = express();
serviceA.use(express.json());
serviceA.post("/api/process", async (req, res) => {
// Create root run in Service A
const rootRun = new RunTree({
name: "api-gateway",
run_type: "chain",
inputs: { request: req.body },
client: client,
project_name: "microservices-trace"
});
try {
// Export trace context as headers
const traceHeaders = rootRun.toHeaders();
// Call Service B with trace context
const response = await axios.post(
"http://service-b:3001/process",
req.body,
{ headers: traceHeaders }
);
// End root run
await rootRun.end({
statusCode: 200,
response: response.data
});
await rootRun.postRun();
res.json(response.data);
} catch (error) {
await rootRun.end(undefined, error.message);
await rootRun.postRun();
res.status(500).json({ error: error.message });
}
});
// ========== Service B: Processing Service ==========
const serviceB = express();
serviceB.use(express.json());
serviceB.post("/process", async (req, res) => {
// Reconstruct run tree from headers
const parentRun = RunTree.fromHeaders(req.headers, {
name: "processing-service",
run_type: "chain",
client: client
});
if (!parentRun) {
// No trace context - create new root
console.warn("No trace context in headers");
}
const serviceRun = parentRun || new RunTree({
name: "processing-service",
run_type: "chain",
inputs: { data: req.body },
client: client
});
try {
// Create child run for LLM call
const llmRun = serviceRun.createChild({
name: "llm-generation",
run_type: "llm",
inputs: { prompt: req.body.prompt }
});
// Simulate LLM call
const llmResponse = await callLLM(req.body.prompt);
await llmRun.end({ response: llmResponse });
await llmRun.postRun();
// End service run
await serviceRun.end({ result: llmResponse });
await serviceRun.postRun();
res.json({ result: llmResponse });
} catch (error) {
await serviceRun.end(undefined, error.message);
await serviceRun.postRun();
res.status(500).json({ error: error.message });
}
});
// Helper
async function callLLM(prompt: string) {
return "LLM response";
}
// Start services
serviceA.listen(3000, () => console.log("Service A on :3000"));
serviceB.listen(3001, () => console.log("Service B on :3001"));
// When viewing traces in LangSmith:
// api-gateway (Service A)
// └─ processing-service (Service B)
// └─ llm-generation (LLM call in Service B)Combines: Dataset Management + Automated Evaluation + Feedback + Alerts
Use Case: Continuously evaluate model on production data, alert on regressions.
import { Client } from "langsmith";
import { evaluate } from "langsmith/evaluation";
import { traceable } from "langsmith/traceable";
const client = new Client();
// Step 1: Collect production data into dataset
const productionDataset = await client.createDataset({
datasetName: "production-golden-set",
description: "Curated examples from production",
dataType: "kv"
});
// Step 2: Automated production data collection
const productionBot = traceable(
async (query: string) => {
const response = await generateResponse(query);
return response;
},
{
name: "production-bot",
tags: ["production"],
metadata: { version: "2.0" }
}
);
// Collect high-quality production runs as examples
async function collectGoldenExamples(minFeedbackScore = 0.9) {
const goldenRuns = [];
// Find runs with high user feedback
for await (const feedback of client.listFeedback({
feedbackKeys: ["user_rating"],
has_score: true,
limit: 100
})) {
if (feedback.score >= minFeedbackScore) {
const run = await client.readRun(feedback.run_id);
goldenRuns.push({
inputs: run.inputs,
outputs: run.outputs,
metadata: {
source_run_id: run.id,
user_score: feedback.score,
collected_at: new Date().toISOString()
}
});
}
}
// Add to dataset
if (goldenRuns.length > 0) {
await client.createExamples({
datasetId: productionDataset.id,
examples: goldenRuns
});
console.log(`Added ${goldenRuns.length} golden examples from production`);
}
}
// Step 3: Scheduled evaluation (run daily)
async function runDailyEvaluation() {
const results = await evaluate(productionBot, {
data: "production-golden-set",
evaluators: [
({ run, example }) => ({
key: "correctness",
score: calculateSimilarity(run.outputs, example?.outputs)
}),
({ run }) => ({
key: "latency",
score: (run.end_time - run.start_time) < 2000 ? 1 : 0
})
],
experimentPrefix: `daily-eval-${new Date().toISOString().split('T')[0]}`,
metadata: {
type: "daily-regression-test",
dataset_size: (await client.readDataset({ datasetName: "production-golden-set" })).example_count
}
});
// Calculate metrics
const correctnessScores = results.results
.map(r => r.evaluation_results.find(e => e.key === "correctness")?.score || 0);
const averageCorrectness = correctnessScores.reduce((a, b) => a + b, 0) / correctnessScores.length;
console.log("=== Daily Evaluation Results ===");
console.log("Average Correctness:", averageCorrectness);
// Alert on regression
const regressionThreshold = 0.8;
if (averageCorrectness < regressionThreshold) {
await sendAlert({
severity: "high",
message: `Model regression detected: correctness ${averageCorrectness} < threshold ${regressionThreshold}`,
experimentUrl: `https://smith.langchain.com/experiments/${results.experimentName}`
});
}
return results;
}
// Step 4: Continuous improvement loop
async function continuousImprovementLoop() {
while (true) {
// Collect new golden examples weekly
await collectGoldenExamples(0.9);
// Run evaluation daily
const results = await runDailyEvaluation();
// Wait 24 hours
await new Promise(resolve => setTimeout(resolve, 24 * 60 * 60 * 1000));
}
}
// Helpers
function calculateSimilarity(output: any, expected: any): number {
return 0.85; // Your similarity logic
}
async function sendAlert(alert: any) {
console.error("🚨 ALERT:", alert.message);
// Send to Slack, PagerDuty, etc.
}
async function generateResponse(query: string) {
return { answer: "Generated response" };
}Combines: LangChain Callbacks + Traceable + Custom Chains
Use Case: Build LangChain application with full LangSmith observability.
import { traceable } from "langsmith/traceable";
import { getLangchainCallbacks, RunnableTraceable } from "langsmith/langchain";
import { ChatOpenAI } from "@langchain/openai";
import { PromptTemplate } from "@langchain/core/prompts";
import { StringOutputParser } from "@langchain/core/output_parsers";
import { RunnableSequence } from "@langchain/core/runnables";
// Define traceable custom logic
const preprocessInput = traceable(
async (input: string) => {
return input.trim().toLowerCase();
},
{ name: "preprocess-input", run_type: "tool" }
);
const postprocessOutput = traceable(
async (output: string) => {
return output.toUpperCase();
},
{ name: "postprocess-output", run_type: "tool" }
);
// Build LangChain pipeline with tracing
const buildTracedPipeline = traceable(
async (query: string) => {
// Get callbacks for LangChain integration
const callbacks = getLangchainCallbacks();
// Step 1: Preprocess (traced as child)
const processed = await preprocessInput(query);
// Step 2: LangChain pipeline (traced via callbacks)
const prompt = PromptTemplate.fromTemplate(
"Answer this question concisely: {question}"
);
const model = new ChatOpenAI({ temperature: 0 });
const parser = new StringOutputParser();
const chain = prompt.pipe(model).pipe(parser);
const llmResponse = await chain.invoke(
{ question: processed },
{ callbacks }
);
// Step 3: Postprocess (traced as child)
const final = await postprocessOutput(llmResponse);
return final;
},
{
name: "langchain-pipeline",
run_type: "chain",
tags: ["langchain", "production"]
}
);
// Alternative: Wrap traceable as Runnable
const traceableAsRunnable = RunnableTraceable.from(
traceable(async (input: string) => {
return `Processed: ${input}`;
}, { name: "custom-step" })
);
// Use in LangChain sequences
const fullChain = RunnableSequence.from([
preprocessInput,
traceableAsRunnable,
// Can mix traceable and LangChain runnables
]);
// Execute
const result = await buildTracedPipeline("What is LangSmith?");
console.log("Result:", result);
// View complete trace hierarchy:
// langchain-pipeline
// ├─ preprocess-input
// ├─ PromptTemplate (via callbacks)
// ├─ ChatOpenAI (via callbacks)
// ├─ StringOutputParser (via callbacks)
// └─ postprocess-outputCombines: Evaluation + Run Stats + Token Tracking
Use Case: Run evaluations while tracking and limiting costs.
import { Client } from "langsmith";
import { evaluate } from "langsmith/evaluation";
import OpenAI from "openai";
const client = new Client();
const openai = new OpenAI();
// Configuration
const COST_PER_1K_INPUT_TOKENS = 0.03;
const COST_PER_1K_OUTPUT_TOKENS = 0.06;
const MAX_EVALUATION_COST = 10.00; // $10 budget
let totalCost = 0;
// Cost-tracking evaluator
async function costAwareModel(input: { question: string }) {
// Check budget before call
if (totalCost >= MAX_EVALUATION_COST) {
throw new Error(`Budget exceeded: $${totalCost.toFixed(2)} >= $${MAX_EVALUATION_COST}`);
}
const completion = await openai.chat.completions.create({
model: "gpt-4",
messages: [{ role: "user", content: input.question }]
});
// Track cost
const usage = completion.usage;
if (usage) {
const callCost =
(usage.prompt_tokens / 1000) * COST_PER_1K_INPUT_TOKENS +
(usage.completion_tokens / 1000) * COST_PER_1K_OUTPUT_TOKENS;
totalCost += callCost;
console.log(`Call cost: $${callCost.toFixed(4)}, Total: $${totalCost.toFixed(4)}`);
}
return { answer: completion.choices[0].message.content };
}
// Run evaluation with cost tracking
try {
const results = await evaluate(costAwareModel, {
data: "qa-dataset",
evaluators: [
({ run, example }) => ({
key: "correctness",
score: run.outputs?.answer === example?.outputs?.answer ? 1 : 0
})
],
experimentPrefix: "cost-aware-eval",
metadata: {
max_budget: MAX_EVALUATION_COST,
cost_per_1k_input: COST_PER_1K_INPUT_TOKENS,
cost_per_1k_output: COST_PER_1K_OUTPUT_TOKENS
},
max_concurrency: 1 // Sequential to track cost accurately
});
console.log("=== Evaluation Complete ===");
console.log("Total cost:", `$${totalCost.toFixed(2)}`);
console.log("Budget remaining:", `$${(MAX_EVALUATION_COST - totalCost).toFixed(2)}`);
// Get detailed cost breakdown from LangSmith
const stats = await client.getRunStats({
projectName: "default",
filter: `eq(name, "${results.experimentName}")`
});
console.log("Total tokens:", stats.total_tokens);
console.log("Total cost (from LangSmith):", `$${stats.total_cost?.toFixed(4) || 0}`);
} catch (error) {
console.error("Evaluation stopped:", error.message);
console.log("Spent before stopping:", `$${totalCost.toFixed(2)}`);
}Combines: Tracing + Conversation Threading + Feedback
Use Case: Track multi-turn conversations with session management and per-turn feedback.
import { traceable, getCurrentRunTree } from "langsmith/traceable";
import { Client } from "langsmith";
import OpenAI from "openai";
const client = new Client();
const openai = new OpenAI();
interface ConversationTurn {
role: "user" | "assistant";
content: string;
}
// Traced conversation manager
const conversationSession = traceable(
async (sessionId: string, userMessage: string, history: ConversationTurn[]) => {
const runTree = getCurrentRunTree();
// Add conversation metadata
runTree.metadata = {
...runTree.metadata,
conversation_id: sessionId,
turn_number: Math.floor(history.length / 2) + 1
};
// Build conversation context
const messages = [
...history,
{ role: "user" as const, content: userMessage }
];
const completion = await openai.chat.completions.create({
model: "gpt-4",
messages: messages
});
const assistantMessage = completion.choices[0].message.content;
return {
message: assistantMessage,
conversationId: sessionId,
turnNumber: runTree.metadata.turn_number
};
},
{
name: "conversation-turn",
run_type: "chain",
tags: ["conversation", "multi-turn"]
}
);
// Conversation manager class
class ConversationManager {
private sessions = new Map<string, ConversationTurn[]>();
async sendMessage(sessionId: string, userMessage: string) {
// Get or create conversation history
const history = this.sessions.get(sessionId) || [];
// Process turn with tracing
const result = await conversationSession(sessionId, userMessage, history);
// Update history
history.push(
{ role: "user", content: userMessage },
{ role: "assistant", content: result.message }
);
this.sessions.set(sessionId, history);
return result;
}
async collectTurnFeedback(sessionId: string, runId: string, rating: number) {
await client.createFeedback(runId, "turn_quality", {
score: rating / 5,
value: rating,
feedbackSourceType: "app",
source_info: {,
conversation_id: sessionId,
feedback_type: "per-turn",
},
});
}
async getConversationHistory(sessionId: string) {
return this.sessions.get(sessionId) || [];
}
// Query all turns in a conversation
async getConversationRuns(sessionId: string) {
const runs = [];
for await (const run of client.listRuns({
filter: `eq(metadata.conversation_id, "${sessionId}")`,
order: "asc"
})) {
runs.push(run);
}
return runs;
}
}
// Usage
const manager = new ConversationManager();
const sessionId = "session-123";
// Turn 1
const turn1 = await manager.sendMessage(sessionId, "Hello!");
console.log("Bot:", turn1.message);
await manager.collectTurnFeedback(sessionId, turn1.runId, 5);
// Turn 2 (has context from turn 1)
const turn2 = await manager.sendMessage(sessionId, "What did I just say?");
console.log("Bot:", turn2.message);
// Get all conversation runs
const conversationRuns = await manager.getConversationRuns(sessionId);
console.log(`Conversation has ${conversationRuns.length} turns`);
// Helper
async function generateResponse(query: string) {
return "Response";
}