Comprehensive feedback management system for collecting, storing, and analyzing feedback on LangSmith runs. Enables human-in-the-loop workflows, model evaluation, and quality monitoring through structured feedback collection.
Feedback represents evaluative information about a run's performance. Feedback can come from multiple sources:
Feedback supports both quantitative scores (numeric ratings, booleans) and qualitative values (text comments, corrections, categorical labels). All feedback is associated with a specific run and can be queried, updated, or deleted.
Create feedback on a run with scores, comments, and metadata.
/**
* Create feedback on a run
* @param params - Feedback creation parameters
* @returns Promise resolving to created feedback object
*/
async createFeedback(params: FeedbackCreate): Promise<Feedback>;
interface FeedbackCreate {
/** Run ID to provide feedback for */
run_id: string;
/** Feedback key/name (e.g., "correctness", "helpfulness") */
key: string;
/** Numeric or boolean score */
score?: ScoreType;
/** Arbitrary feedback value (string, number, boolean, object) */
value?: ValueType;
/** Optional correction data */
correction?: object | null;
/** Optional comment */
comment?: string | null;
/** Source information (type and metadata) */
source_info?: object | null;
/** Feedback configuration */
feedback_config?: FeedbackConfig | null;
/** Feedback source type */
feedback_source_type?: FeedbackSourceType;
/** Source run ID (if feedback came from a model) */
source_run_id?: string | null;
/** ID of the feedback (optional, auto-generated if not provided) */
id?: string;
}
interface Feedback {
/** Feedback ID */
id: string;
/** Run ID */
run_id: string;
/** Feedback key */
key: string;
/** Score value */
score?: number | boolean | null;
/** Feedback value */
value?: number | boolean | string | object | null;
/** Correction data */
correction?: object | null;
/** Comment */
comment?: string | null;
/** Source information */
source_info?: object | null;
/** Feedback configuration */
feedback_config?: FeedbackConfig | null;
/** Created timestamp */
created_at: string;
/** Modified timestamp */
modified_at: string;
/** Feedback source */
feedback_source?: FeedbackSourceBase | APIFeedbackSource | ModelFeedbackSource | null;
}
type ScoreType = number | boolean | null;
type ValueType = number | boolean | string | object | null;
enum FeedbackSourceType {
API = "api",
MODEL = "model",
APP = "app"
}Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Simple thumbs up/down feedback
await client.createFeedback({
run_id: "550e8400-e29b-41d4-a716-446655440000",
key: "user_rating",
score: 1, // 1 for thumbs up, 0 for thumbs down
comment: "Great response!",
});
// Numeric score feedback
await client.createFeedback({
run_id: "550e8400-e29b-41d4-a716-446655440000",
key: "accuracy",
score: 0.95,
comment: "Highly accurate response with minor detail missing",
});
// Categorical feedback with value
await client.createFeedback({
run_id: "550e8400-e29b-41d4-a716-446655440000",
key: "category",
value: "factual_question",
score: 1,
});
// Feedback with correction
await client.createFeedback({
run_id: "550e8400-e29b-41d4-a716-446655440000",
key: "correctness",
score: 0,
comment: "Incorrect calculation",
correction: {
corrected_output: "The correct answer is 42",
},
});
// Model-generated feedback
await client.createFeedback({
run_id: "550e8400-e29b-41d4-a716-446655440000",
key: "coherence",
score: 0.88,
feedback_source_type: "model",
source_run_id: "660e8400-e29b-41d4-a716-446655440001",
source_info: {
model: "gpt-4",
prompt_version: "v1.2",
},
});
// Feedback with custom configuration
await client.createFeedback({
run_id: "550e8400-e29b-41d4-a716-446655440000",
key: "safety",
score: 1,
feedback_config: {
type: "continuous",
min: 0,
max: 1,
},
source_info: {
reviewer: "safety_team",
review_date: "2024-01-15",
},
});Update existing feedback to revise scores, comments, or other attributes.
/**
* Update existing feedback
* @param feedbackId - ID of feedback to update
* @param params - Fields to update
* @returns Promise resolving when update is complete
*/
async updateFeedback(
feedbackId: string,
params: FeedbackUpdate
): Promise<void>;
interface FeedbackUpdate {
/** Updated score */
score?: ScoreType;
/** Updated value */
value?: ValueType;
/** Updated correction */
correction?: object | null;
/** Updated comment */
comment?: string | null;
/** Updated feedback configuration */
feedback_config?: FeedbackConfig | null;
}Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Update score and comment
await client.updateFeedback("feedback-uuid", {
score: 0.8,
comment: "Updated rating after review",
});
// Add correction to existing feedback
await client.updateFeedback("feedback-uuid", {
correction: {
corrected_output: "Revised answer based on new information",
},
});
// Update value only
await client.updateFeedback("feedback-uuid", {
value: "high_quality",
});Retrieve feedback details by ID.
/**
* Read feedback by ID
* @param feedbackId - Feedback ID
* @returns Promise resolving to feedback object
*/
async readFeedback(feedbackId: string): Promise<Feedback>;Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Get feedback details
const feedback = await client.readFeedback("feedback-uuid");
console.log(`Score: ${feedback.score}`);
console.log(`Comment: ${feedback.comment}`);
console.log(`Created: ${feedback.created_at}`);Remove feedback from a run.
/**
* Delete feedback
* @param feedbackId - ID of feedback to delete
* @returns Promise resolving when deletion is complete
*/
async deleteFeedback(feedbackId: string): Promise<void>;Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Delete feedback
await client.deleteFeedback("feedback-uuid");Query feedback with flexible filtering options.
/**
* List feedback with filtering
* @param params - Query parameters for filtering feedback
* @returns AsyncIterable of feedback objects
*/
async *listFeedback(params?: ListFeedbackParams): AsyncIterable<Feedback>;
interface ListFeedbackParams {
/** Filter by run IDs */
run_ids?: string[];
/** Filter by feedback keys */
feedback_keys?: string[];
/** Filter by feedback source type */
feedback_source_types?: FeedbackSourceType[];
/** Maximum number of results */
limit?: number;
/** Offset for pagination */
offset?: number;
/** Filter by user ID (for app feedback) */
user_ids?: string[];
/** Filter by feedback with scores */
has_score?: boolean;
/** Filter by feedback with comments */
has_comment?: boolean;
}Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// List all feedback for specific runs
for await (const feedback of client.listFeedback({
run_ids: [
"550e8400-e29b-41d4-a716-446655440000",
"660e8400-e29b-41d4-a716-446655440001",
],
})) {
console.log(`${feedback.key}: ${feedback.score}`);
}
// List feedback by key
for await (const feedback of client.listFeedback({
feedback_keys: ["correctness", "helpfulness"],
limit: 100,
})) {
console.log(`Run ${feedback.run_id}: ${feedback.key} = ${feedback.score}`);
}
// List human feedback only
for await (const feedback of client.listFeedback({
feedback_source_types: ["app"],
has_score: true,
})) {
console.log(`User feedback: ${feedback.comment}`);
}
// List model-generated feedback
for await (const feedback of client.listFeedback({
feedback_source_types: ["model"],
limit: 50,
})) {
console.log(`Model evaluation: ${feedback.key} = ${feedback.score}`);
}
// Paginated feedback retrieval
const feedbackBatch = [];
for await (const feedback of client.listFeedback({
limit: 20,
offset: 40,
has_comment: true,
})) {
feedbackBatch.push(feedback);
}Generate secure tokens for collecting feedback without API keys, ideal for embedding feedback forms in public applications.
/**
* Create presigned feedback token for secure feedback collection
* @param params - Token creation parameters
* @returns Promise resolving to feedback ingest token
*/
async createPresignedFeedbackToken(
params: CreatePresignedFeedbackTokenParams
): Promise<FeedbackIngestToken>;
interface CreatePresignedFeedbackTokenParams {
/** Run ID to collect feedback for */
run_id: string;
/** Feedback key */
feedback_key: string;
/** Token expiration time in seconds (optional) */
expires_in?: number;
/** Additional token metadata (optional) */
feedback_config?: FeedbackConfig;
}
interface FeedbackIngestToken {
/** Public token for feedback submission */
token: string;
/** Token expiration timestamp */
expires_at: string;
/** Run ID associated with token */
run_id: string;
/** Feedback key for this token */
feedback_key: string;
/** Public URL for feedback submission */
url: string;
}Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Create token for user feedback
const token = await client.createPresignedFeedbackToken({
run_id: "550e8400-e29b-41d4-a716-446655440000",
feedback_key: "user_rating",
expires_in: 86400, // 24 hours
});
console.log(`Feedback URL: ${token.url}`);
console.log(`Token: ${token.token}`);
console.log(`Expires: ${token.expires_at}`);
// Embed feedback form in application
const feedbackUrl = token.url;
// Users can submit feedback via POST to this URL with:
// { score: 1, comment: "Great response!" }
// Create token with custom configuration
const configuredToken = await client.createPresignedFeedbackToken({
run_id: "550e8400-e29b-41d4-a716-446655440000",
feedback_key: "quality_rating",
expires_in: 604800, // 7 days
feedback_config: {
type: "continuous",
min: 1,
max: 5,
},
});
// Multiple tokens for different feedback types
const correctnessToken = await client.createPresignedFeedbackToken({
run_id: "550e8400-e29b-41d4-a716-446655440000",
feedback_key: "correctness",
expires_in: 3600, // 1 hour
});
const helpfulnessToken = await client.createPresignedFeedbackToken({
run_id: "550e8400-e29b-41d4-a716-446655440000",
feedback_key: "helpfulness",
expires_in: 3600,
});List all presigned feedback tokens, useful for managing and tracking distributed tokens.
/**
* List presigned feedback tokens
* @param params - Optional filter parameters
* @returns Async iterable of feedback tokens
*/
listPresignedFeedbackTokens(params?: {
runId?: string;
limit?: number;
}): AsyncIterable<FeedbackIngestToken>;Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// List all tokens for a run
for await (const token of client.listPresignedFeedbackTokens({
runId: "550e8400-e29b-41d4-a716-446655440000"
})) {
console.log(`Token: ${token.id}`);
console.log(`Feedback key: ${token.feedback_key}`);
console.log(`Expires: ${token.expires_at}`);
}
// List recent tokens
for await (const token of client.listPresignedFeedbackTokens({
limit: 100
})) {
console.log(`${token.feedback_key}: ${token.url}`);
}Evaluate a run using evaluators and automatically log the results as feedback.
/**
* Evaluate a run and log feedback
* @param runId - Run ID to evaluate
* @param params - Evaluation parameters
* @returns Promise resolving to evaluation results
*/
evaluateRun(
runId: string,
params: {
evaluators: EvaluatorT[];
sourceInfo?: Record<string, any>;
}
): Promise<EvaluationResults>;Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Create evaluators
const accuracyEvaluator = ({ run, example }) => ({
key: "accuracy",
score: run.outputs?.correct ? 1 : 0
});
const latencyEvaluator = ({ run }) => ({
key: "latency",
score: (run.end_time || 0) - (run.start_time || 0),
value: `${(run.end_time || 0) - (run.start_time || 0)}ms`
});
// Evaluate a run
const results = await client.evaluateRun("run-123", {
evaluators: [accuracyEvaluator, latencyEvaluator],
sourceInfo: { evaluator_version: "1.0" }
});
console.log("Evaluation results:", results);Log evaluation feedback directly, typically used after running custom evaluations.
/**
* Log evaluation feedback for a run
* @param params - Feedback logging parameters
* @returns Promise resolving when logging completes
*/
logEvaluationFeedback(params: {
runId: string;
key: string;
score?: number | boolean;
value?: any;
comment?: string;
sourceInfo?: Record<string, any>;
feedbackSourceType?: "model" | "api" | "app";
}): Promise<void>;Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Log simple score
await client.logEvaluationFeedback({
runId: "run-123",
key: "quality",
score: 0.85,
feedbackSourceType: "model"
});
// Log detailed feedback
await client.logEvaluationFeedback({
runId: "run-456",
key: "coherence",
score: 0.92,
value: {
subscores: {
grammar: 0.95,
relevance: 0.90,
completeness: 0.91
}
},
comment: "High coherence with minor relevance issues",
sourceInfo: {
evaluator: "gpt-4",
version: "2024-01",
temperature: 0.1
},
feedbackSourceType: "model"
});Create a comparative experiment for evaluating multiple runs side-by-side.
/**
* Create a comparative experiment
* @param params - Comparative experiment parameters
* @returns Promise resolving to created experiment
*/
createComparativeExperiment(params: {
name: string;
experimentIds: string[];
referenceDatasetId?: string;
description?: string;
metadata?: Record<string, any>;
}): Promise<ComparativeExperiment>;Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Create comparative experiment
const comparison = await client.createComparativeExperiment({
name: "GPT-4 vs Claude Comparison",
experimentIds: ["exp-gpt4-baseline", "exp-claude-v1"],
description: "Comparing response quality across models",
metadata: {
evaluator: "human",
criteria: ["accuracy", "helpfulness", "safety"]
}
});
console.log(`Created comparison: ${comparison.id}`);
console.log(`Compare at: ${comparison.url}`);
// Create three-way comparison
const threeWayComparison = await client.createComparativeExperiment({
name: "Model Selection",
experimentIds: [
"exp-gpt4",
"exp-claude-opus",
"exp-gemini-pro"
],
referenceDatasetId: "dataset-123",
description: "Selecting best model for production",
metadata: {
stage: "production-candidate",
priority: "high"
}
});interface FeedbackConfig {
/** Feedback type */
type?: "continuous" | "categorical" | "freeform";
/** Minimum value (for continuous) */
min?: number;
/** Maximum value (for continuous) */
max?: number;
/** Valid categories (for categorical) */
categories?: FeedbackCategory[];
}
interface FeedbackCategory {
/** Category value */
value: string | number;
/** Category label */
label?: string;
}interface FeedbackSourceBase {
/** Source type */
type?: string;
/** Source metadata */
metadata?: Record<string, any>;
}
interface APIFeedbackSource extends FeedbackSourceBase {
type: "api";
/** API metadata (e.g., endpoint, version) */
metadata?: Record<string, any>;
}
interface ModelFeedbackSource extends FeedbackSourceBase {
type: "model";
/** Model identifier */
metadata?: {
model?: string;
model_version?: string;
prompt_version?: string;
[key: string]: any;
};
}import { Client } from "langsmith";
const client = new Client();
// Retrieve runs for annotation
for await (const run of client.listRuns({
project_name: "my-chatbot",
filter: 'has(tags, "needs_review")',
limit: 10,
})) {
// Display run to human annotator
console.log(`Run ${run.id}: ${run.outputs}`);
// Collect human feedback
const humanScore = await getHumanRating(); // Your UI logic
const humanComment = await getHumanComment();
// Save feedback
await client.createFeedback({
run_id: run.id,
key: "human_rating",
score: humanScore,
comment: humanComment,
feedback_source_type: "app",
});
}import { Client, traceable } from "langsmith";
import OpenAI from "openai";
const client = new Client();
const openai = new OpenAI();
const judgeResponse = traceable(
async (runId: string, input: string, output: string) => {
const judgment = await openai.chat.completions.create({
model: "gpt-4",
messages: [
{
role: "system",
content: "Rate the response quality from 0 to 1.",
},
{
role: "user",
content: `Input: ${input}\nOutput: ${output}`,
},
],
});
const score = parseFloat(judgment.choices[0].message.content);
// Get the judge run ID from current context
const judgeRunTree = await traceable.getCurrentRunTree();
return { score, judgeRunId: judgeRunTree.id };
},
{ name: "judge_response" }
);
// Evaluate runs
for await (const run of client.listRuns({
project_name: "my-chatbot",
limit: 50,
})) {
const { score, judgeRunId } = await judgeResponse(
run.id,
JSON.stringify(run.inputs),
JSON.stringify(run.outputs)
);
await client.createFeedback({
run_id: run.id,
key: "quality",
score: score,
feedback_source_type: "model",
source_run_id: judgeRunId,
source_info: {
model: "gpt-4",
evaluation_prompt: "v1.0",
},
});
}import { Client } from "langsmith";
const client = new Client();
// Server-side: Generate token after run completion
async function handleChatRequest(userMessage: string) {
// Execute your LLM call with tracing
const response = await tracedChat(userMessage);
const runId = response.runId;
// Generate feedback token
const feedbackToken = await client.createPresignedFeedbackToken({
run_id: runId,
feedback_key: "user_satisfaction",
expires_in: 86400, // 24 hours
});
return {
message: response.message,
feedbackUrl: feedbackToken.url,
};
}
// Client-side: Submit feedback
async function submitFeedback(feedbackUrl: string, rating: number, comment: string) {
await fetch(feedbackUrl, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
score: rating,
comment: comment,
}),
});
}import { Client } from "langsmith";
const client = new Client();
// Analyze feedback trends
const feedbackStats = {
total: 0,
positive: 0,
negative: 0,
scoreSum: 0,
};
for await (const feedback of client.listFeedback({
feedback_keys: ["user_rating"],
has_score: true,
limit: 1000,
})) {
feedbackStats.total++;
feedbackStats.scoreSum += feedback.score as number;
if (feedback.score >= 0.7) {
feedbackStats.positive++;
} else if (feedback.score <= 0.3) {
feedbackStats.negative++;
}
}
console.log(`Average Score: ${feedbackStats.scoreSum / feedbackStats.total}`);
console.log(`Positive Rate: ${feedbackStats.positive / feedbackStats.total}`);
console.log(`Negative Rate: ${feedbackStats.negative / feedbackStats.total}`);
// Find runs with negative feedback
const problematicRuns = new Set<string>();
for await (const feedback of client.listFeedback({
feedback_keys: ["correctness", "helpfulness"],
has_score: true,
})) {
if (feedback.score < 0.5) {
problematicRuns.add(feedback.run_id);
}
}
console.log(`Runs needing attention: ${problematicRuns.size}`);import { Client } from "langsmith";
const client = new Client();
// Collect corrections for fine-tuning
const corrections: Array<{ input: any; incorrect: any; correct: any }> = [];
for await (const feedback of client.listFeedback({
feedback_keys: ["correctness"],
has_score: true,
})) {
if (feedback.score === 0 && feedback.correction) {
// Fetch the original run
const run = await client.readRun(feedback.run_id);
corrections.push({
input: run.inputs,
incorrect: run.outputs,
correct: feedback.correction,
});
}
}
console.log(`Collected ${corrections.length} corrections for training`);
// Use corrections for model fine-tuning or prompt improvementFeedback integrates seamlessly with LangSmith's evaluation system:
import { evaluate } from "langsmith/evaluation";
import { Client } from "langsmith";
const client = new Client();
// Create evaluator that logs feedback
const feedbackEvaluator = async ({ run, example }) => {
const score = calculateScore(run.outputs, example?.outputs);
// Log as feedback
await client.createFeedback({
run_id: run.id,
key: "automated_score",
score: score,
feedback_source_type: "model",
source_info: {
evaluator: "custom_scorer_v1",
},
});
return {
key: "automated_score",
score: score,
};
};
// Run evaluation with feedback logging
await evaluate(
(input) => yourModel(input),
{
data: "my-dataset",
evaluators: [feedbackEvaluator],
project_name: "my-evaluation",
}
);Use consistent, descriptive feedback keys:
// Good
"correctness"
"helpfulness"
"response_quality"
"safety_compliance"
"user_satisfaction"
// Avoid
"feedback1"
"rating"
"score"Maintain consistent score ranges:
// Boolean: 0 or 1
score: 1 // thumbs up
score: 0 // thumbs down
// Normalized: 0.0 to 1.0
score: 0.85 // 85% quality
// Star rating: Convert to normalized
const starRating = 4; // out of 5
const normalizedScore = starRating / 5;
await client.createFeedback({
run_id: runId,
key: "user_rating",
score: normalizedScore,
value: starRating, // Keep original in value
comment: `${starRating} stars`,
});Include relevant context in source_info:
await client.createFeedback({
run_id: runId,
key: "quality",
score: 0.9,
source_info: {
// Who/what provided feedback
annotator_id: "user123",
annotation_tool: "internal_ui_v2",
// When
timestamp: new Date().toISOString(),
// Context
session_id: "session456",
user_context: "mobile_app",
// Model feedback specifics
model: "gpt-4",
model_version: "2024-01-01",
prompt_template: "quality_judge_v3",
},
});Protect presigned feedback tokens:
// Good: Short expiration for public tokens
const publicToken = await client.createPresignedFeedbackToken({
run_id: runId,
feedback_key: "user_rating",
expires_in: 3600, // 1 hour
});
// Good: Longer expiration for email links
const emailToken = await client.createPresignedFeedbackToken({
run_id: runId,
feedback_key: "quality_review",
expires_in: 604800, // 7 days
});
// Consider: Rate limiting and validation on feedback submission