tessl install tessl/npm-langsmith@0.4.3TypeScript client SDK for the LangSmith LLM tracing, evaluation, and monitoring platform.
Methods for creating, managing, and analyzing feedback on runs.
The Feedback API enables collection of evaluative information from multiple sources: human annotations, model judgments, automated systems, and end-user ratings.
Feedback represents evaluative information about a run's performance. Feedback can come from multiple sources:
Feedback supports both quantitative scores (numeric ratings, booleans) and qualitative values (text comments, corrections, categorical labels). All feedback is associated with a specific run and can be queried, updated, or deleted.
/**
* Create feedback for a run
* @param runId - Run ID (null for feedback not associated with a run)
* @param key - Feedback key/name
* @param options - Feedback options
* @returns Promise resolving to created feedback
*/
createFeedback(
runId: string | null,
key: string,
options: FeedbackOptions
): Promise<Feedback>;
interface FeedbackOptions {
/** Numeric or boolean score */
score?: number | boolean | null;
/** Freeform value */
value?: number | boolean | string | object | null;
/** Comment */
comment?: string;
/** Correction data */
correction?: object;
/** Feedback source type */
feedbackSourceType?: "api" | "model" | "app";
/** Source run ID (for model feedback) */
sourceRunId?: string;
/** Feedback ID */
feedbackId?: string;
/** Feedback configuration */
feedbackConfig?: FeedbackConfig;
/** Project ID */
projectId?: string;
/** Comparative experiment ID */
comparativeExperimentId?: string;
/** Source information */
sourceInfo?: object;
}
interface Feedback {
/** Feedback ID */
id: string;
/** Run ID */
run_id: string;
/** Feedback key */
key: string;
/** Score */
score?: number | boolean | null;
/** Value */
value?: any;
/** Comment */
comment?: string;
/** Created timestamp */
created_at: string;
/** Modified timestamp */
modified_at: string;
}import { Client } from "langsmith";
const client = new Client();
// Thumbs up/down
await client.createFeedback(runId, "user_rating", {
score: 1,
comment: "Great response!"
});
// Numeric score
await client.createFeedback(runId, "accuracy", {
score: 0.95,
comment: "Highly accurate"
});
// With correction
await client.createFeedback(runId, "correctness", {
score: 0,
correction: {
outputs: { answer: "Correct answer" }
}
});
// Model feedback
await client.createFeedback(runId, "coherence", {
score: 0.88,
feedbackSourceType: "model",
sourceRunId: judgeRunId
});
// Categorical value
await client.createFeedback(runId, "category", {
value: "factual_question",
score: 1
});
// Feedback with custom configuration
await client.createFeedback(runId, "safety", {
score: 1,
feedbackConfig: {
type: "continuous",
min: 0,
max: 1
},
sourceInfo: {
reviewer: "safety_team",
review_date: "2024-01-15"
}
});/**
* Update feedback
* @param feedbackId - Feedback ID to update
* @param params - Update parameters
* @returns Promise resolving to updated feedback
*/
updateFeedback(
feedbackId: string,
params: FeedbackUpdate
): Promise<Feedback>;
interface FeedbackUpdate {
/** Updated score */
score?: number | boolean | null;
/** Updated value */
value?: any;
/** Updated comment */
comment?: string;
/** Updated correction */
correction?: object;
}await client.updateFeedback(feedbackId, {
score: 0.98,
comment: "Updated after review"
});/**
* Read feedback details
* @param feedbackId - Feedback ID
* @returns Promise resolving to feedback
*/
readFeedback(feedbackId: string): Promise<Feedback>;const feedback = await client.readFeedback(feedbackId);
console.log(`Score: ${feedback.score}`);/**
* List feedback with filtering
* @param params - List parameters
* @returns Async iterable of feedback
*/
listFeedback(params?: ListFeedbackParams): AsyncIterable<Feedback>;
interface ListFeedbackParams {
/** Filter by run IDs */
runIds?: string[];
/** Filter by feedback keys */
feedbackKeys?: string[];
/** Filter by source types */
feedbackSourceTypes?: string[];
/** Limit results */
limit?: number;
/** Offset for pagination */
offset?: number;
}// List feedback for specific runs
for await (const feedback of client.listFeedback({
runIds: [runId],
feedbackKeys: ["accuracy", "correctness"]
})) {
console.log(feedback.key, feedback.score);
}
// List human feedback
for await (const feedback of client.listFeedback({
feedbackSourceTypes: ["app"]
})) {
console.log("User feedback:", feedback.comment);
}
// List model-generated feedback
for await (const feedback of client.listFeedback({
feedbackSourceTypes: ["model"],
limit: 50
})) {
console.log(`Model evaluation: ${feedback.key} = ${feedback.score}`);
}
// Paginated feedback retrieval
const feedbackBatch = [];
for await (const feedback of client.listFeedback({
limit: 20,
offset: 40,
has_comment: true
})) {
feedbackBatch.push(feedback);
}
// List all feedback for multiple runs
for await (const feedback of client.listFeedback({
runIds: [
"550e8400-e29b-41d4-a716-446655440000",
"660e8400-e29b-41d4-a716-446655440001"
]
})) {
console.log(`${feedback.key}: ${feedback.score}`);
}/**
* Delete feedback
* @param feedbackId - Feedback ID to delete
* @returns Promise resolving when deletion completes
*/
deleteFeedback(feedbackId: string): Promise<void>;await client.deleteFeedback(feedbackId);/**
* Create presigned feedback token
* @param runId - Run ID
* @param feedbackKey - Feedback key
* @param options - Token options
* @returns Promise resolving to feedback ingest token
*/
createPresignedFeedbackToken(
runId: string,
feedbackKey: string,
options?: CreatePresignedFeedbackTokenParams
): Promise<FeedbackIngestToken>;
interface CreatePresignedFeedbackTokenParams {
/** Token expiration time (ISO 8601 string or TimeDelta object) */
expiration?: string | TimeDelta;
/** Feedback configuration */
feedbackConfig?: FeedbackConfig;
/** Source information */
sourceInfo?: Record<string, any>;
}
interface TimeDelta {
/** Number of days */
days?: number;
/** Number of hours */
hours?: number;
/** Number of minutes */
minutes?: number;
}
interface FeedbackIngestToken {
/** Token ID */
id: string;
/** Presigned URL */
url: string;
/** Token string */
token: string;
/** Run ID */
run_id: string;
/** Feedback key */
feedback_key: string;
/** Expiration */
expires_at?: string;
}// Create token for user feedback with TimeDelta
const token = await client.createPresignedFeedbackToken(
runId,
"user_rating",
{ expiration: { days: 7 } }
);
console.log("Feedback URL:", token.url);
// Users can POST to this URL without auth
// Create token with custom configuration
const configuredToken = await client.createPresignedFeedbackToken(
runId,
"quality_rating",
{
expiration: { days: 7 },
feedbackConfig: {
type: "continuous",
min: 1,
max: 5
}
}
);
// Multiple tokens for different feedback types
const correctnessToken = await client.createPresignedFeedbackToken(
runId,
"correctness",
{ expiration: { hours: 1 } }
);
const helpfulnessToken = await client.createPresignedFeedbackToken(
runId,
"helpfulness",
{ expiration: { hours: 1 } }
);
// Using ISO 8601 string expiration
const tokenWithString = await client.createPresignedFeedbackToken(
runId,
"rating",
{ expiration: "2024-12-31T23:59:59Z" }
);/**
* List presigned feedback tokens
* @param params - Optional filter parameters
* @returns Async iterable of feedback tokens
*/
listPresignedFeedbackTokens(params?: {
runId?: string;
limit?: number;
}): AsyncIterable<FeedbackIngestToken>;import { Client } from "langsmith";
const client = new Client();
// List all tokens for a run
for await (const token of client.listPresignedFeedbackTokens({
runId: "550e8400-e29b-41d4-a716-446655440000"
})) {
console.log(`Token: ${token.id}`);
console.log(`Feedback key: ${token.feedback_key}`);
console.log(`Expires: ${token.expires_at}`);
}
// List recent tokens
for await (const token of client.listPresignedFeedbackTokens({
limit: 100
})) {
console.log(`${token.feedback_key}: ${token.url}`);
}/**
* Evaluate a run and log feedback
* @param runId - Run ID to evaluate
* @param params - Evaluation parameters
* @returns Promise resolving to evaluation results
*/
evaluateRun(
runId: string,
params: {
evaluators: EvaluatorT[];
sourceInfo?: Record<string, any>;
}
): Promise<EvaluationResults>;import { Client } from "langsmith";
const client = new Client();
// Create evaluators
const accuracyEvaluator = ({ run, example }) => ({
key: "accuracy",
score: run.outputs?.correct ? 1 : 0
});
const latencyEvaluator = ({ run }) => ({
key: "latency",
score: (run.end_time || 0) - (run.start_time || 0),
value: `${(run.end_time || 0) - (run.start_time || 0)}ms`
});
// Evaluate a run
const results = await client.evaluateRun("run-123", {
evaluators: [accuracyEvaluator, latencyEvaluator],
sourceInfo: { evaluator_version: "1.0" }
});
console.log("Evaluation results:", results);/**
* Log evaluation feedback for a run
* @param params - Feedback logging parameters
* @returns Promise resolving when logging completes
*/
logEvaluationFeedback(params: {
runId: string;
key: string;
score?: number | boolean;
value?: any;
comment?: string;
sourceInfo?: Record<string, any>;
feedbackSourceType?: "model" | "api" | "app";
}): Promise<void>;import { Client } from "langsmith";
const client = new Client();
// Log simple score
await client.logEvaluationFeedback({
runId: "run-123",
key: "quality",
score: 0.85,
feedbackSourceType: "model"
});
// Log detailed feedback
await client.logEvaluationFeedback({
runId: "run-456",
key: "coherence",
score: 0.92,
value: {
subscores: {
grammar: 0.95,
relevance: 0.90,
completeness: 0.91
}
},
comment: "High coherence with minor relevance issues",
sourceInfo: {
evaluator: "gpt-4",
version: "2024-01",
temperature: 0.1
},
feedbackSourceType: "model"
});/**
* Create a comparative experiment
* @param params - Comparative experiment parameters
* @returns Promise resolving to created experiment
*/
createComparativeExperiment(params: {
name: string;
experimentIds: string[];
referenceDatasetId?: string;
description?: string;
metadata?: Record<string, any>;
}): Promise<ComparativeExperiment>;
interface ComparativeExperiment {
/** Experiment ID */
id: string;
/** Experiment name */
name: string;
/** Description */
description?: string;
/** Experiment IDs being compared */
experimentIds: string[];
/** Reference dataset ID */
referenceDatasetId?: string;
/** Comparison URL */
url: string;
/** Created timestamp */
created_at: string;
/** Metadata */
metadata?: Record<string, any>;
}import { Client } from "langsmith";
const client = new Client();
// Create comparative experiment
const comparison = await client.createComparativeExperiment({
name: "GPT-4 vs Claude Comparison",
experimentIds: ["exp-gpt4-baseline", "exp-claude-v1"],
description: "Comparing response quality across models",
metadata: {
evaluator: "human",
criteria: ["accuracy", "helpfulness", "safety"]
}
});
console.log(`Created comparison: ${comparison.id}`);
console.log(`Compare at: ${comparison.url}`);
// Create three-way comparison
const threeWayComparison = await client.createComparativeExperiment({
name: "Model Selection",
experimentIds: [
"exp-gpt4",
"exp-claude-opus",
"exp-gemini-pro"
],
referenceDatasetId: "dataset-123",
description: "Selecting best model for production",
metadata: {
stage: "production-candidate",
priority: "high"
}
});Detailed interfaces for different feedback sources.
/**
* Feedback source type enumeration
*/
type FeedbackSourceType = "model" | "api" | "app";
interface FeedbackSourceBase {
/** Source type */
type?: string;
/** Source metadata */
metadata?: Record<string, any>;
}
interface APIFeedbackSource extends FeedbackSourceBase {
type: "api";
/** API metadata (e.g., endpoint, version) */
metadata?: Record<string, any>;
}
interface ModelFeedbackSource extends FeedbackSourceBase {
type: "model";
/** Model identifier */
metadata?: {
model?: string;
model_version?: string;
prompt_version?: string;
[key: string]: any;
};
}// API feedback with source metadata
await client.createFeedback(runId, "automated_check", {
score: 1,
feedbackSourceType: "api",
source_info: {,
endpoint: "/api/validate",
version: "v2",
service: "validator",
},
});
// Model feedback with detailed metadata
await client.createFeedback(runId, "quality", {
score: 0.92,
feedbackSourceType: "model",
source_run_id: judgeRunId,
source_info: {,
model: "gpt-4",
model_version: "2024-01-01",
prompt_version: "v3.2",
temperature: 0.1,
},
});Configure feedback types and constraints.
interface FeedbackConfig {
/** Feedback type */
type?: "continuous" | "categorical" | "freeform";
/** Minimum value (for continuous) */
min?: number;
/** Maximum value (for continuous) */
max?: number;
/** Valid categories (for categorical) */
categories?: FeedbackCategory[];
}
interface FeedbackCategory {
/** Category value */
value: string | number;
/** Category label */
label?: string;
}// Continuous feedback (0-1 scale)
await client.createFeedback(runId, "quality_score", {
score: 0.87,
feedback_config: {,
type: "continuous",
min: 0,
max: 1,
},
});
// Categorical feedback
await client.createFeedback(runId, "response_type", {
value: "factual",
feedback_config: {,
type: "categorical",
categories: [,
{ value: "factual", label: "Factual Answer" },
{ value: "opinion", label: "Opinion-Based" },
{ value: "clarification", label: "Needs Clarification" },
],
},
});
// Star rating (1-5 scale)
await client.createFeedback(runId, "user_rating", {
score: 4,
feedback_config: {,
type: "continuous",
min: 1,
max: 5,
},
});Collect human feedback for model improvement.
import { Client } from "langsmith";
const client = new Client();
// Retrieve runs for annotation
for await (const run of client.listRuns({
project_name: "my-chatbot",
filter: 'has(tags, "needs_review")',
limit: 10
})) {
// Display run to human annotator
console.log(`Run ${run.id}: ${run.outputs}`);
// Collect human feedback (your UI logic)
const humanScore = await getHumanRating();
const humanComment = await getHumanComment();
// Save feedback
await client.createFeedback(run.id, "human_rating", {
score: humanScore,
comment: humanComment,
feedbackSourceType: "app",
});
}Use LLMs to evaluate other LLM outputs.
import { Client, traceable } from "langsmith";
import OpenAI from "openai";
const client = new Client();
const openai = new OpenAI();
const judgeResponse = traceable(
async (runId: string, input: string, output: string) => {
const judgment = await openai.chat.completions.create({
model: "gpt-4",
messages: [
{
role: "system",
content: "Rate the response quality from 0 to 1."
},
{
role: "user",
content: `Input: ${input}\nOutput: ${output}`
}
]
});
const score = parseFloat(judgment.choices[0].message.content);
const judgeRunTree = await traceable.getCurrentRunTree();
return { score, judgeRunId: judgeRunTree.id };
},
{ name: "judge_response" }
);
// Evaluate runs
for await (const run of client.listRuns({
project_name: "my-chatbot",
limit: 50
})) {
const { score, judgeRunId } = await judgeResponse(
run.id,
JSON.stringify(run.inputs),
JSON.stringify(run.outputs)
);
await client.createFeedback(run.id, "quality", {
score: score,
feedbackSourceType: "model",
source_run_id: judgeRunId,
source_info: {,
model: "gpt-4",
evaluation_prompt: "v1.0",
},
});
}Generate tokens for secure public feedback collection.
import { Client } from "langsmith";
const client = new Client();
// Server-side: Generate token after run completion
async function handleChatRequest(userMessage: string) {
// Execute your LLM call with tracing
const response = await tracedChat(userMessage);
const runId = response.runId;
// Generate feedback token
const feedbackToken = await client.createPresignedFeedbackToken(
runId,
"user_satisfaction",
{ expiration: { days: 1 } } // 24 hours
);
return {
message: response.message,
feedbackUrl: feedbackToken.url
};
}
// Client-side: Submit feedback
async function submitFeedback(feedbackUrl: string, rating: number, comment: string) {
await fetch(feedbackUrl, {
method: "POST",
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
score: rating,
comment: comment
})
});
}Analyze feedback trends and patterns.
import { Client } from "langsmith";
const client = new Client();
// Analyze feedback trends
const feedbackStats = {
total: 0,
positive: 0,
negative: 0,
scoreSum: 0
};
for await (const feedback of client.listFeedback({
feedbackKeys: ["user_rating"],
has_score: true,
limit: 1000
})) {
feedbackStats.total++;
feedbackStats.scoreSum += feedback.score as number;
if (feedback.score >= 0.7) {
feedbackStats.positive++;
} else if (feedback.score <= 0.3) {
feedbackStats.negative++;
}
}
console.log(`Average Score: ${feedbackStats.scoreSum / feedbackStats.total}`);
console.log(`Positive Rate: ${feedbackStats.positive / feedbackStats.total}`);
console.log(`Negative Rate: ${feedbackStats.negative / feedbackStats.total}`);
// Find runs with negative feedback
const problematicRuns = new Set<string>();
for await (const feedback of client.listFeedback({
feedbackKeys: ["correctness", "helpfulness"],
has_score: true
})) {
if (feedback.score < 0.5) {
problematicRuns.add(feedback.run_id);
}
}
console.log(`Runs needing attention: ${problematicRuns.size}`);Collect corrections for model fine-tuning.
import { Client } from "langsmith";
const client = new Client();
// Collect corrections for fine-tuning
const corrections: Array<{ input: any; incorrect: any; correct: any }> = [];
for await (const feedback of client.listFeedback({
feedbackKeys: ["correctness"],
has_score: true
})) {
if (feedback.score === 0 && feedback.correction) {
// Fetch the original run
const run = await client.readRun(feedback.run_id);
corrections.push({
input: run.inputs,
incorrect: run.outputs,
correct: feedback.correction
});
}
}
console.log(`Collected ${corrections.length} corrections for training`);
// Use corrections for model fine-tuning or prompt improvementFeedback integrates seamlessly with LangSmith's evaluation system.
import { evaluate } from "langsmith/evaluation";
import { Client } from "langsmith";
const client = new Client();
// Create evaluator that logs feedback
const feedbackEvaluator = async ({ run, example }) => {
const score = calculateScore(run.outputs, example?.outputs);
// Log as feedback
await client.createFeedback(run.id, "automated_score", {
score: score,
feedbackSourceType: "model",
source_info: {,
evaluator: "custom_scorer_v1",
},
});
return {
key: "automated_score",
score: score
};
};
// Run evaluation with feedback logging
await evaluate(
(input) => yourModel(input),
{
data: "my-dataset",
evaluators: [feedbackEvaluator],
experimentPrefix: "my-evaluation"
}
);Use descriptive, consistent feedback keys.
// Good
"correctness"
"helpfulness"
"response_quality"
"safety_compliance"
"user_satisfaction"
// Avoid
"feedback1"
"rating"
"score"Maintain consistent score ranges.
// Boolean: 0 or 1
score: 1 // thumbs up
score: 0 // thumbs down
// Normalized: 0.0 to 1.0
score: 0.85 // 85% quality
// Convert star ratings
const starRating = 4; // out of 5
const normalizedScore = starRating / 5;
await client.createFeedback(runId, "user_rating", {
score: normalizedScore,
value: starRating,
comment: `${starRating} stars`,
});Include relevant context in source_info.
await client.createFeedback(runId, "quality", {
score: 0.9,
source_info: {,
// Who/what provided feedback,
annotator_id: "user123",
annotation_tool: "internal_ui_v2",
// When,
timestamp: new Date().toISOString(),
// Context,
session_id: "session456",
user_context: "mobile_app",
// Model feedback specifics,
model: "gpt-4",
model_version: "2024-01-01",
prompt_template: "quality_judge_v3",
},
});Protect presigned feedback tokens.
// Good: Short expiration for public tokens
const publicToken = await client.createPresignedFeedbackToken(
runId,
"user_rating",
{ expiration: { hours: 1 } } // 1 hour
);
// Good: Longer expiration for email links
const emailToken = await client.createPresignedFeedbackToken(
runId,
"quality_review",
{ expiration: { days: 7 } } // 7 days
);
// Consider: Rate limiting and validation on feedback submissionUsing different key names for the same concept makes analysis difficult.
// BAD: Inconsistent naming
await client.createFeedback(run_id, "rating1", { score: 1 });
await client.createFeedback(run_id, "user-rating", { score: 1 });
await client.createFeedback(run_id, "UserRating", { score: 1 });
// GOOD: Consistent snake_case keys
const FEEDBACK_KEYS = {
USER_RATING: "user_rating",
CORRECTNESS: "correctness",
HELPFULNESS: "helpfulness"
} as const;
await client.createFeedback(run_id, FEEDBACK_KEYS.USER_RATING, { score: 1 });Mixing score ranges complicates aggregation and comparison.
// BAD: Different scales
await client.createFeedback(run_id, "quality", { score: 4 }); // Out of 5
await client.createFeedback(run_id, "accuracy", { score: 0.8 }); // Out of 1.0
await client.createFeedback(run_id, "speed", { score: 85 }); // Out of 100
// GOOD: Normalize to 0.0-1.0
await client.createFeedback(run_id, "quality", {
score: 4 / 5, // 0.8,
value: 4, // Original in value,
comment: "4/5 stars",
});
await client.createFeedback(run_id, "accuracy", { score: 0.8 // Already normalized });
await client.createFeedback(run_id, "speed", {
score: 85 / 100, // 0.85,
value: 85,
});Security risk if tokens leak or are shared publicly.
// BAD: 1 year expiration for public token
const token = await client.createPresignedFeedbackToken(runId, "rating", {
expiration: { days: 365 }
});
// GOOD: Short expiration for public collection
const token = await client.createPresignedFeedbackToken(runId, "rating", {
expiration: { days: 1 } // 24 hours
});
// ACCEPTABLE: Longer for email links with specific recipients
const emailToken = await client.createPresignedFeedbackToken(runId, "review", {
expiration: { days: 7 } // 7 days
});Unclear where feedback came from.
// BAD: Ambiguous source
await client.createFeedback(run_id, "quality", {
score: 0.9,
// Missing feedbackSourceType,
});
// GOOD: Explicit source
await client.createFeedback(run_id, "quality", {
score: 0.9,
feedbackSourceType: "model",
source_run_id: judgeRunId,
source_info: {,
model: "gpt-4",
judge_version: "v1.0",
},
});Feedback must reference a specific run.
// BAD: Missing run_id
// Note: With the new API, run_id is a required parameter
// await client.createFeedback("rating", { score: 1 }); // TypeScript error: Expected 3 arguments
// GOOD: Always include run_id
await client.createFeedback(runId, "rating", {
score: 1,
});See Anti-Patterns for more feedback pitfalls.