Annotation Queues in LangSmith provide a structured system for collecting human feedback on runs. They enable workflows where runs are added to queues for review, annotation, and quality assessment by human reviewers.
Annotation Queues are designed for human-in-the-loop workflows where you need to:
import { Client } from "langsmith";
const client = new Client();
// Create an annotation queue for feedback collection
const queue = await client.createAnnotationQueue({
name: "Model Output Review",
description: "Queue for reviewing customer-facing model outputs",
});
// Add runs to the queue for annotation
await client.addRunsToAnnotationQueue({
queueId: queue.id,
runIds: ["run-1", "run-2", "run-3"],
});
// Retrieve first run from queue for annotation
const firstRun = await client.getRunFromAnnotationQueue(queue.id, 0);
console.log(`Run ${firstRun.run.id} needs review`);
// Display to reviewer and collect feedbackCreates a new annotation queue for organizing runs that need human review.
/**
* Create a new annotation queue
* @param options - Queue configuration options
* @returns Promise resolving to the created annotation queue with details
*/
createAnnotationQueue(options: {
/** Unique name for the annotation queue */
name: string;
/** Optional description of the queue's purpose */
description?: string;
/** Optional custom queue ID (auto-generated if not provided) */
queueId?: string;
/** Optional rubric instructions for annotators */
rubricInstructions?: string;
}): Promise<AnnotationQueueWithDetails>;
interface AnnotationQueueWithDetails {
/** Unique identifier for the queue */
id: string;
/** Name of the annotation queue */
name: string;
/** Description of the queue's purpose */
description?: string;
/** Rubric instructions for annotators */
rubric_instructions?: string;
/** Timestamp when queue was created */
created_at: string;
/** Timestamp when queue was last updated */
updated_at: string;
}Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Create a queue for model output review
const reviewQueue = await client.createAnnotationQueue({
name: "Production Output Review",
description: "Review outputs from production chatbot for quality assurance",
});
// Create a queue for training data collection
const trainingQueue = await client.createAnnotationQueue({
name: "Training Data Collection",
description: "Collect human-annotated examples for model fine-tuning",
});
// Create a queue for comparative evaluation
const comparisonQueue = await client.createAnnotationQueue({
name: "Model A vs Model B",
description: "Side-by-side comparison of two model versions",
});
// Create queue with rubric instructions
const guidedQueue = await client.createAnnotationQueue({
name: "Quality Review with Rubric",
description: "Structured quality review process",
rubricInstructions: `
Rate on scale of 1-5:
- Accuracy: How factually correct is the response?
- Helpfulness: How useful is the response to the user?
- Safety: Is the response free from harmful content?
`,
});Retrieves details about a specific annotation queue.
/**
* Read annotation queue details
* @param queueId - The unique identifier of the queue
* @returns Promise resolving to the annotation queue details
*/
readAnnotationQueue(queueId: string): Promise<AnnotationQueueWithDetails>;Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Read queue details by ID
const queue = await client.readAnnotationQueue("queue-uuid-123");
console.log(`Queue: ${queue.name}`);
console.log(`Description: ${queue.description}`);
console.log(`Created: ${queue.created_at}`);
console.log(`Updated: ${queue.updated_at}`);
if (queue.rubric_instructions) {
console.log(`Rubric:\n${queue.rubric_instructions}`);
}
// Use queue details to display in UI
async function displayQueueInfo(queueId: string) {
const queue = await client.readAnnotationQueue(queueId);
return {
title: queue.name,
subtitle: queue.description,
lastModified: queue.updated_at,
hasRubric: !!queue.rubric_instructions,
};
}Lists all annotation queues with optional filtering.
/**
* List annotation queues
* @param options - Optional filtering options
* @returns Async iterable of annotation queues
*/
listAnnotationQueues(
options?: {
/** Filter by queue IDs */
queueIds?: string[];
/** Filter by exact name */
name?: string;
/** Filter by name substring */
nameContains?: string;
/** Limit number of results */
limit?: number;
}
): AsyncIterableIterator<AnnotationQueue>;
interface AnnotationQueue {
/** Queue ID */
id: string;
/** Queue name */
name: string;
/** Queue description */
description?: string;
/** Creation timestamp */
created_at: string;
/** Update timestamp */
updated_at: string;
/** Rubric instructions */
rubric_instructions?: string;
}Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// List all annotation queues (async iterable)
for await (const queue of client.listAnnotationQueues()) {
console.log(`[${queue.id}] ${queue.name}`);
}
// Search for queues by exact name
for await (const queue of client.listAnnotationQueues({ name: "Production QA" })) {
console.log(`Found queue: ${queue.name}`);
}
// Search queues by name substring
for await (const queue of client.listAnnotationQueues({ nameContains: "review" })) {
console.log(`Review queue: ${queue.name}`);
}
// Get specific queues by ID
for await (const queue of client.listAnnotationQueues({
queueIds: ["queue-id-1", "queue-id-2"],
})) {
console.log(`Queue: ${queue.name}`);
}
// Display queues in a dashboard with limit
const queues: AnnotationQueue[] = [];
for await (const queue of client.listAnnotationQueues({ limit: 20 })) {
queues.push(queue);
console.log(`[${queue.id}] ${queue.name}`);
console.log(` ${queue.description || 'No description'}`);
console.log(` Last updated: ${queue.updated_at}`);
}
console.log(`Total queues: ${queues.length}`);Updates an existing annotation queue's properties.
/**
* Update annotation queue
* @param queueId - The unique identifier of the queue to update
* @param options - Properties to update
* @returns Promise resolving to void
*/
updateAnnotationQueue(
queueId: string,
options: {
/** New name for the queue */
name: string;
/** New description for the queue */
description?: string;
/** Optional rubric instructions for annotators */
rubricInstructions?: string;
}
): Promise<void>;Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Update queue name
await client.updateAnnotationQueue("queue-id-123", {
name: "Updated Review Queue",
});
// Update queue description
await client.updateAnnotationQueue("queue-id-123", {
name: "Production QA",
description: "Now includes edge case reviews",
});
// Update both name and description
await client.updateAnnotationQueue("queue-id-123", {
name: "Production QA Queue",
description: "Quality assurance for all production outputs",
});
// Update rubric instructions
await client.updateAnnotationQueue("queue-id-123", {
name: "Quality Review",
rubricInstructions: `
Updated rubric:
1. Accuracy (1-5): Factual correctness
2. Helpfulness (1-5): User value
3. Safety (1-5): Harmfulness check
4. Clarity (1-5): Communication quality
`,
});
// Rename queue based on status
async function markQueueAsArchived(queueId: string) {
const queue = await client.readAnnotationQueue(queueId);
await client.updateAnnotationQueue(queueId, {
name: `[ARCHIVED] ${queue.name}`,
description: `${queue.description || ''} - Archived on ${new Date().toLocaleDateString()}`,
});
}Deletes an annotation queue permanently.
/**
* Delete annotation queue
* @param queueId - The unique identifier of the queue to delete
* @returns Promise resolving when deletion is complete
*/
deleteAnnotationQueue(queueId: string): Promise<void>;Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Delete a queue
await client.deleteAnnotationQueue("queue-id-123");
// Delete queue with confirmation
async function deleteQueueWithConfirmation(queueId: string) {
const queue = await client.readAnnotationQueue(queueId);
const confirmed = confirm(`Delete queue "${queue.name}"?`);
if (confirmed) {
await client.deleteAnnotationQueue(queueId);
console.log("Queue deleted successfully");
}
}
// Clean up old queues
async function archiveOldQueues(daysOld: number) {
const cutoffDate = new Date();
cutoffDate.setDate(cutoffDate.getDate() - daysOld);
for await (const queue of client.listAnnotationQueues()) {
const updatedAt = new Date(queue.updated_at);
if (updatedAt < cutoffDate) {
console.log(`Deleting old queue: ${queue.name}`);
await client.deleteAnnotationQueue(queue.id);
}
}
}Adds one or more runs to an annotation queue for human review.
/**
* Add runs to an annotation queue
* @param params - Parameters specifying queue and runs to add
* @returns Promise resolving when runs are added
*/
addRunsToAnnotationQueue(
params: AddRunsToAnnotationQueueParams
): Promise<void>;
interface AddRunsToAnnotationQueueParams {
/** ID of the annotation queue */
queueId: string;
/** Array of run IDs to add to the queue */
runIds: string[];
}Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Add specific runs to a queue
await client.addRunsToAnnotationQueue({
queueId: "queue-id-123",
runIds: ["run-1", "run-2", "run-3"],
});
// Add runs based on criteria
async function addFailedRunsToQueue(projectName: string, queueId: string) {
const runs = [];
// List runs with errors
for await (const run of client.listRuns({
projectName,
isRoot: true,
error: true,
})) {
runs.push(run);
}
const runIds = runs.map(run => run.id);
if (runIds.length > 0) {
await client.addRunsToAnnotationQueue({
queueId,
runIds,
});
console.log(`Added ${runIds.length} failed runs to queue`);
}
}
// Add runs with low confidence scores
async function queueLowConfidenceRuns(projectName: string, queueId: string, threshold: number = 0.7) {
const lowConfidenceRuns = [];
for await (const run of client.listRuns({ projectName, isRoot: true })) {
const confidence = run.outputs?.confidence;
if (confidence !== undefined && confidence < threshold) {
lowConfidenceRuns.push(run);
}
}
const runIds = lowConfidenceRuns.map(run => run.id);
if (runIds.length > 0) {
await client.addRunsToAnnotationQueue({
queueId,
runIds,
});
console.log(`Added ${runIds.length} low confidence runs`);
}
}
// Add runs in batches
async function addRunsInBatches(queueId: string, runIds: string[], batchSize: number = 50) {
for (let i = 0; i < runIds.length; i += batchSize) {
const batch = runIds.slice(i, i + batchSize);
await client.addRunsToAnnotationQueue({
queueId,
runIds: batch,
});
console.log(`Added batch ${Math.floor(i / batchSize) + 1} (${batch.length} runs)`);
}
}
// Add runs matching complex filter
async function addFilteredRuns(projectName: string, queueId: string) {
const filteredRuns = [];
for await (const run of client.listRuns({
projectName,
isRoot: true,
})) {
// Complex filtering logic
const needsReview = (
run.error !== undefined ||
(run.outputs?.confidence && run.outputs.confidence < 0.7) ||
hasUnusualPattern(run)
);
if (needsReview) {
filteredRuns.push(run);
}
}
if (filteredRuns.length > 0) {
await client.addRunsToAnnotationQueue({
queueId,
runIds: filteredRuns.map(r => r.id),
});
}
}
function hasUnusualPattern(run: any): boolean {
// Implement pattern detection logic
const inputStr = JSON.stringify(run.inputs);
return inputStr.length > 10000 || inputStr.includes("�");
}Retrieve a single run from the annotation queue by index, useful for iterating through queue items one at a time.
/**
* Get a single run from an annotation queue
* @param queueId - The unique identifier of the queue
* @param index - The index of the run to retrieve (0-based)
* @returns Promise resolving to run with queue information
*/
getRunFromAnnotationQueue(
queueId: string,
index: number
): Promise<RunWithAnnotationQueueInfo>;
interface RunWithAnnotationQueueInfo {
/** The run object containing trace data */
run: Run;
/** Annotation queue metadata */
queue_info?: {
/** When the run was added to the queue */
added_at: Date;
/** Current status in the queue */
status?: string;
/** Assigned reviewer if any */
assigned_to?: string;
};
}Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Get first run from queue
const firstRun = await client.getRunFromAnnotationQueue("queue-id-123", 0);
console.log(`Reviewing run: ${firstRun.run.id}`);
console.log(`Added to queue at: ${firstRun.queue_info?.added_at}`);
console.log(`Inputs: ${JSON.stringify(firstRun.run.inputs)}`);
console.log(`Outputs: ${JSON.stringify(firstRun.run.outputs)}`);
// Get specific run by index
const fifthRun = await client.getRunFromAnnotationQueue("queue-id-123", 4);
// Process runs sequentially
async function reviewQueueSequentially(queueId: string, maxToReview: number = 10) {
for (let i = 0; i < maxToReview; i++) {
try {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
// Present to human reviewer
const feedback = await collectHumanFeedback(runInfo.run);
// Save feedback
await client.createFeedback({
run_id: runInfo.run.id,
key: "human_review",
score: feedback.score,
comment: feedback.comment,
});
console.log(`Reviewed run ${i + 1}/${maxToReview}`);
} catch (error) {
// End of queue or error
console.log(`Completed ${i} reviews`);
break;
}
}
}
// Build reviewer interface
async function getNextRunForReview(queueId: string, currentIndex: number) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, currentIndex);
return {
runId: runInfo.run.id,
inputs: runInfo.run.inputs,
outputs: runInfo.run.outputs,
metadata: {
addedAt: runInfo.queue_info?.added_at,
queuePosition: currentIndex,
},
};
}Remove a run from an annotation queue after it has been reviewed or if it was added by mistake.
/**
* Delete a run from an annotation queue
* @param queueId - Queue identifier
* @param queueRunId - Run identifier to remove from queue
* @returns Promise resolving when deletion completes
*/
deleteRunFromAnnotationQueue(
queueId: string,
queueRunId: string
): Promise<void>;Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Remove run after annotation complete
await client.deleteRunFromAnnotationQueue("queue-id-123", "run-456");
// Batch remove reviewed runs
const reviewedRunIds = ["run-1", "run-2", "run-3"];
for (const runId of reviewedRunIds) {
await client.deleteRunFromAnnotationQueue("queue-id-123", runId);
}
console.log(`Removed ${reviewedRunIds.length} reviewed runs from queue`);
// Remove run and log action
async function removeAndLog(queueId: string, runId: string, reason: string) {
await client.deleteRunFromAnnotationQueue(queueId, runId);
console.log(`Removed run ${runId} from queue. Reason: ${reason}`);
}
// Clean up queue after processing
async function cleanupProcessedRuns(queueId: string) {
const { size } = await client.getSizeFromAnnotationQueue(queueId);
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
// Check if run has been reviewed
const feedbackList = [];
for await (const feedback of client.listFeedback({
runIds: [runInfo.run.id],
feedbackKeys: ["human_review"],
})) {
feedbackList.push(feedback);
}
if (feedbackList.length > 0) {
// Remove reviewed run
await client.deleteRunFromAnnotationQueue(queueId, runInfo.run.id);
console.log(`Removed reviewed run ${runInfo.run.id}`);
}
}
}Get the current number of runs in an annotation queue.
/**
* Get the size of an annotation queue
* @param queueId - Queue identifier
* @returns Promise resolving to an object with the queue size
*/
getSizeFromAnnotationQueue(queueId: string): Promise<{ size: number }>;Usage Examples:
import { Client } from "langsmith";
const client = new Client();
// Check queue size
const { size } = await client.getSizeFromAnnotationQueue("queue-id-123");
console.log(`Queue has ${size} runs pending review`);
// Monitor queue size
async function monitorQueue(queueId: string, checkIntervalMs: number = 60000) {
setInterval(async () => {
const { size } = await client.getSizeFromAnnotationQueue(queueId);
console.log(`[${new Date().toISOString()}] Current queue size: ${size}`);
if (size > 1000) {
console.warn("⚠️ Queue backlog is high!");
} else if (size === 0) {
console.log("✓ Queue is empty");
}
}, checkIntervalMs);
}
// Calculate progress percentage
async function getQueueProgress(queueId: string, totalAdded: number) {
const { size } = await client.getSizeFromAnnotationQueue(queueId);
const reviewed = totalAdded - size;
const percentComplete = (reviewed / totalAdded) * 100;
return {
total: totalAdded,
remaining: size,
reviewed,
percentComplete: percentComplete.toFixed(1),
};
}
// Wait for queue to be empty
async function waitForQueueCompletion(queueId: string, pollIntervalMs: number = 5000) {
while (true) {
const { size } = await client.getSizeFromAnnotationQueue(queueId);
if (size === 0) {
console.log("Queue processing complete!");
break;
}
console.log(`Waiting... ${size} runs remaining`);
await new Promise(resolve => setTimeout(resolve, pollIntervalMs));
}
}Note: The workflow examples in this section are illustrative and may reference helper patterns for working with annotation queues. To retrieve runs from a queue, use getRunFromAnnotationQueue(queueId, index) to get individual runs by their index position, combined with getSizeFromAnnotationQueue(queueId) to determine the total number of runs in the queue.
Collect human feedback on production outputs for quality monitoring.
import { Client } from "langsmith";
const client = new Client();
// Setup: Create QA queue
const qaQueue = await client.createAnnotationQueue({
name: "Production QA",
description: "Quality assurance for customer-facing outputs",
rubricInstructions: `
Rate each response on:
1. Accuracy (0-1): Is the information correct?
2. Helpfulness (0-1): Does it answer the question?
3. Safety (0-1): Is it free from harmful content?
`,
});
// Step 1: Sample production runs
async function sampleProductionRuns(projectName: string, queueId: string, sampleRate: number = 0.1) {
const sampledRuns = [];
for await (const run of client.listRuns({
projectName,
isRoot: true,
})) {
// Random sampling
if (Math.random() < sampleRate) {
sampledRuns.push(run);
}
}
const runIds = sampledRuns.map(run => run.id);
// Add to QA queue
if (runIds.length > 0) {
await client.addRunsToAnnotationQueue({
queueId,
runIds,
});
console.log(`Added ${runIds.length} runs to QA queue`);
}
return sampledRuns;
}
// Step 2: Review and annotate
async function conductQAReview(queueId: string, batchSize: number = 50) {
const { size } = await client.getSizeFromAnnotationQueue(queueId);
const reviewCount = Math.min(size, batchSize);
for (let i = 0; i < reviewCount; i++) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
const { run } = runInfo;
// Present to human reviewer
console.log(`\n--- Reviewing Run ${i + 1}/${reviewCount} ---`);
console.log(`Input: ${JSON.stringify(run.inputs)}`);
console.log(`Output: ${JSON.stringify(run.outputs)}`);
// Human reviewer provides scores
const humanReview = await getHumanReview(run);
// Create feedback with detailed scores
await client.createFeedback({
run_id: run.id,
key: "qa_review",
score: humanReview.overallScore,
comment: humanReview.comments,
value: humanReview.detailedScores,
feedbackSourceType: "app",
});
// Tag issues
if (humanReview.hasIssues) {
await client.createFeedback({
run_id: run.id,
key: "issue_flag",
score: 0,
comment: humanReview.issueDescription,
value: {
issueType: humanReview.issueType,
severity: humanReview.severity,
},
});
}
// Remove from queue after review
await client.deleteRunFromAnnotationQueue(queueId, run.id);
}
console.log(`\nCompleted ${reviewCount} reviews`);
}
// Step 3: Analyze QA results
async function analyzeQAResults(queueId: string) {
const stats = {
totalReviewed: 0,
totalScore: 0,
issueCount: 0,
byScoreRange: {
excellent: 0, // 0.9-1.0
good: 0, // 0.7-0.9
fair: 0, // 0.5-0.7
poor: 0, // 0.0-0.5
},
};
const { size } = await client.getSizeFromAnnotationQueue(queueId);
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
const feedbackList = [];
for await (const feedback of client.listFeedback({
runIds: [runInfo.run.id],
feedbackKeys: ["qa_review"],
})) {
feedbackList.push(feedback);
}
if (feedbackList.length > 0) {
const feedback = feedbackList[0];
const score = typeof feedback.score === 'number' ? feedback.score : 0;
stats.totalReviewed++;
stats.totalScore += score;
// Categorize by score range
if (score >= 0.9) stats.byScoreRange.excellent++;
else if (score >= 0.7) stats.byScoreRange.good++;
else if (score >= 0.5) stats.byScoreRange.fair++;
else stats.byScoreRange.poor++;
if (score < 0.7) {
stats.issueCount++;
}
}
}
const avgScore = stats.totalReviewed > 0 ? stats.totalScore / stats.totalReviewed : 0;
const issueRate = stats.totalReviewed > 0 ? stats.issueCount / stats.totalReviewed : 0;
console.log(`\n=== QA Results ===`);
console.log(`Total Reviewed: ${stats.totalReviewed}`);
console.log(`Average Score: ${avgScore.toFixed(2)}`);
console.log(`Issue Rate: ${(issueRate * 100).toFixed(1)}%`);
console.log(`\nScore Distribution:`);
console.log(` Excellent (0.9-1.0): ${stats.byScoreRange.excellent}`);
console.log(` Good (0.7-0.9): ${stats.byScoreRange.good}`);
console.log(` Fair (0.5-0.7): ${stats.byScoreRange.fair}`);
console.log(` Poor (0.0-0.5): ${stats.byScoreRange.poor}`);
return { avgScore, issueRate, stats };
}
// Helper function for collecting human review (implement as needed)
async function getHumanReview(run: any): Promise<any> {
// In production, this would present UI to reviewer
// For example purposes, return mock data
return {
overallScore: 0.85,
detailedScores: {
accuracy: 0.9,
helpfulness: 0.8,
safety: 1.0,
},
comments: "Good response overall",
hasIssues: false,
issueDescription: "",
issueType: "",
severity: 0,
};
}Collect human-annotated examples for model fine-tuning or evaluation datasets.
import { Client } from "langsmith";
const client = new Client();
// Setup: Create training data queue
const trainingQueue = await client.createAnnotationQueue({
name: "Training Data Collection",
description: "Collect high-quality examples for model fine-tuning",
rubricInstructions: `
For each example:
1. Verify the input is representative
2. Provide the ideal output
3. Mark any edge cases or special handling needed
`,
});
// Step 1: Add diverse examples to queue
async function collectDiverseExamples(projectName: string, queueId: string, targetCount: number = 100) {
const allRuns = [];
for await (const run of client.listRuns({
projectName,
isRoot: true,
})) {
allRuns.push(run);
}
// Select diverse examples using stratified sampling
const diverseRuns = selectDiverseExamples(allRuns, targetCount);
const runIds = diverseRuns.map(run => run.id);
await client.addRunsToAnnotationQueue({
queueId,
runIds,
});
console.log(`Added ${runIds.length} diverse examples to training queue`);
return diverseRuns;
}
// Step 2: Human annotation with corrections
async function annotateTrainingExamples(queueId: string) {
const annotatedExamples = [];
const { size } = await client.getSizeFromAnnotationQueue(queueId);
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
const { run } = runInfo;
// Human provides ideal output
const annotation = await getHumanAnnotation(run);
// Create feedback with correction
await client.createFeedback({
run_id: run.id,
key: "human_annotation",
score: annotation.isCorrect ? 1 : 0,
comment: annotation.reasoning,
correction: {
outputs: annotation.idealOutput,
},
feedbackSourceType: "app",
});
// Store for training dataset
annotatedExamples.push({
input: run.inputs,
output: annotation.idealOutput,
metadata: {
originalRunId: run.id,
annotatedAt: new Date().toISOString(),
annotator: annotation.annotator,
quality: annotation.quality,
},
});
console.log(`Annotated ${i + 1}/${size} examples`);
}
return annotatedExamples;
}
// Step 3: Create dataset from annotations
async function createTrainingDataset(queueId: string, datasetName: string) {
// Create dataset
const dataset = await client.createDataset({
datasetName,
description: "Human-annotated training examples",
dataType: "kv",
});
// Get queue size
const { size } = await client.getSizeFromAnnotationQueue(queueId);
let addedCount = 0;
// Process all runs in queue
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
// Get feedback with corrections
const feedbackList = [];
for await (const feedback of client.listFeedback({
runIds: [runInfo.run.id],
feedbackKeys: ["human_annotation"],
})) {
feedbackList.push(feedback);
}
for (const feedback of feedbackList) {
if (feedback.correction) {
// Add to dataset
await client.createExample({
dataset_id: dataset.id,
inputs: runInfo.run.inputs,
outputs: feedback.correction.outputs,
metadata: {
sourceRunId: runInfo.run.id,
annotationScore: feedback.score,
annotationComment: feedback.comment,
annotatedAt: feedback.created_at,
},
});
addedCount++;
}
}
}
console.log(`Created training dataset: ${dataset.name}`);
console.log(`Added ${addedCount} annotated examples`);
return dataset;
}
// Helper: Select diverse examples using various strategies
function selectDiverseExamples(runs: any[], targetCount: number): any[] {
// Strategy 1: Group by input type/category
const categories = new Map<string, any[]>();
for (const run of runs) {
const category = categorizeRun(run);
if (!categories.has(category)) {
categories.set(category, []);
}
categories.get(category)!.push(run);
}
// Strategy 2: Sample proportionally from each category
const samplesPerCategory = Math.ceil(targetCount / categories.size);
const selected: any[] = [];
for (const categoryRuns of categories.values()) {
// Random shuffle
const shuffled = categoryRuns.sort(() => Math.random() - 0.5);
// Take samples
selected.push(...shuffled.slice(0, samplesPerCategory));
}
// Return exactly targetCount runs
return selected.slice(0, targetCount);
}
function categorizeRun(run: any): string {
// Categorize based on input characteristics
const inputStr = JSON.stringify(run.inputs);
if (inputStr.includes("question")) return "question";
if (inputStr.includes("summary")) return "summary";
if (inputStr.includes("translate")) return "translation";
return "other";
}
// Helper function for collecting human annotation (implement as needed)
async function getHumanAnnotation(run: any): Promise<any> {
// In production, this would present UI to annotator
// For example purposes, return mock data
return {
isCorrect: false,
idealOutput: { answer: "Corrected answer based on human judgment" },
reasoning: "The original output was incomplete",
annotator: "reviewer-1",
quality: "high",
};
}Compare outputs from different models or prompts side-by-side.
import { Client } from "langsmith";
const client = new Client();
// Setup: Create comparison queue
const comparisonQueue = await client.createAnnotationQueue({
name: "Model A vs Model B Comparison",
description: "Side-by-side comparison of two model versions",
rubricInstructions: `
Compare both outputs and select the better one based on:
- Accuracy: Which is more factually correct?
- Helpfulness: Which better addresses the user's need?
- Clarity: Which is easier to understand?
- Overall: Which would you prefer to show to users?
`,
});
// Step 1: Identify runs to compare
async function setupComparison(projectA: string, projectB: string, queueId: string) {
const runsAList = [];
const runsBList = [];
// Collect runs from both projects
for await (const run of client.listRuns({ projectName: projectA, isRoot: true })) {
runsAList.push(run);
}
for await (const run of client.listRuns({ projectName: projectB, isRoot: true })) {
runsBList.push(run);
}
// Match runs with same inputs
const pairs = matchRunsByInput(runsAList, runsBList);
// Add both runs from each pair to queue
const allRunIds = pairs.flatMap(pair => [pair.runA.id, pair.runB.id]);
await client.addRunsToAnnotationQueue({
queueId,
runIds: allRunIds,
});
console.log(`Added ${pairs.length} run pairs (${allRunIds.length} total runs) for comparison`);
return pairs;
}
// Step 2: Collect comparative judgments
async function conductComparison(queueId: string, pairs: Array<{runA: any, runB: any}>) {
for (const pair of pairs) {
// Present both outputs to human
console.log(`\n=== Comparison ===`);
console.log(`Model A Output: ${JSON.stringify(pair.runA.outputs)}`);
console.log(`Model B Output: ${JSON.stringify(pair.runB.outputs)}`);
const judgment = await getComparativeJudgment(pair.runA, pair.runB);
// Record preference
await client.createFeedback({
run_id: judgment.preferred === 'A' ? pair.runA.id : pair.runB.id,
key: "comparison_winner",
score: 1,
comment: judgment.reasoning,
value: {
comparison_pair: {
runA: pair.runA.id,
runB: pair.runB.id,
},
criteria: judgment.criteria,
preferredModel: judgment.preferred,
},
});
// Record specific criteria scores for both models
for (const [criterion, scores] of Object.entries(judgment.criteriaScores)) {
await client.createFeedback({
run_id: pair.runA.id,
key: `criteria_${criterion}_modelA`,
score: (scores as any).modelA,
comment: `Model A ${criterion} score`,
});
await client.createFeedback({
run_id: pair.runB.id,
key: `criteria_${criterion}_modelB`,
score: (scores as any).modelB,
comment: `Model B ${criterion} score`,
});
}
}
console.log(`Completed ${pairs.length} comparative judgments`);
}
// Step 3: Analyze comparison results
async function analyzeComparison(pairs: Array<{runA: any, runB: any}>) {
let modelAWins = 0;
let modelBWins = 0;
const criteriaScores: Record<string, { A: number[], B: number[] }> = {};
for (const pair of pairs) {
// Check winner feedback
const feedbackAList = [];
const feedbackBList = [];
for await (const feedback of client.listFeedback({
runIds: [pair.runA.id],
feedbackKeys: ["comparison_winner"],
})) {
feedbackAList.push(feedback);
}
for await (const feedback of client.listFeedback({
runIds: [pair.runB.id],
feedbackKeys: ["comparison_winner"],
})) {
feedbackBList.push(feedback);
}
if (feedbackAList.length > 0) modelAWins++;
if (feedbackBList.length > 0) modelBWins++;
// Collect criteria scores
for await (const feedback of client.listFeedback({
runIds: [pair.runA.id],
})) {
if (feedback.key.startsWith("criteria_")) {
const criterion = feedback.key.replace("criteria_", "").replace("_modelA", "");
if (!criteriaScores[criterion]) {
criteriaScores[criterion] = { A: [], B: [] };
}
if (typeof feedback.score === 'number') {
criteriaScores[criterion].A.push(feedback.score);
}
}
}
for await (const feedback of client.listFeedback({
runIds: [pair.runB.id],
})) {
if (feedback.key.startsWith("criteria_")) {
const criterion = feedback.key.replace("criteria_", "").replace("_modelB", "");
if (!criteriaScores[criterion]) {
criteriaScores[criterion] = { A: [], B: [] };
}
if (typeof feedback.score === 'number') {
criteriaScores[criterion].B.push(feedback.score);
}
}
}
}
console.log(`\n=== Comparison Results ===`);
console.log(`Total Comparisons: ${pairs.length}`);
console.log(`Model A wins: ${modelAWins} (${(modelAWins / pairs.length * 100).toFixed(1)}%)`);
console.log(`Model B wins: ${modelBWins} (${(modelBWins / pairs.length * 100).toFixed(1)}%)`);
console.log(`\nCriteria Breakdown:`);
for (const [criterion, scores] of Object.entries(criteriaScores)) {
const avgA = scores.A.reduce((a, b) => a + b, 0) / scores.A.length;
const avgB = scores.B.reduce((a, b) => a + b, 0) / scores.B.length;
console.log(` ${criterion}:`);
console.log(` Model A: ${avgA.toFixed(2)}`);
console.log(` Model B: ${avgB.toFixed(2)}`);
}
return { modelAWins, modelBWins, totalComparisons: pairs.length, criteriaScores };
}
// Helper function to match runs
function matchRunsByInput(runsA: any[], runsB: any[]) {
const pairs = [];
for (const runA of runsA) {
const matchingRunB = runsB.find(
runB => JSON.stringify(runB.inputs) === JSON.stringify(runA.inputs)
);
if (matchingRunB) {
pairs.push({ runA, runB: matchingRunB });
}
}
return pairs;
}
// Helper function for collecting comparative judgment (implement as needed)
async function getComparativeJudgment(runA: any, runB: any): Promise<any> {
// In production, this would present UI with both outputs
// For example purposes, return mock data
return {
preferred: 'A',
reasoning: "Model A provides more accurate and comprehensive response",
criteria: ["accuracy", "helpfulness", "clarity"],
criteriaScores: {
accuracy: { modelA: 0.9, modelB: 0.7 },
helpfulness: { modelA: 0.8, modelB: 0.6 },
clarity: { modelA: 0.85, modelB: 0.75 },
},
};
}Identify and review edge cases and failure modes.
import { Client } from "langsmith";
const client = new Client();
// Setup: Create edge case queue
const edgeCaseQueue = await client.createAnnotationQueue({
name: "Edge Case Review",
description: "Review unusual inputs and failure modes",
rubricInstructions: `
For each edge case:
1. Categorize the type of edge case
2. Rate severity (1-5)
3. Mark if reproducible
4. Indicate if fix is needed
5. Suggest improvement if applicable
`,
});
// Step 1: Detect edge cases automatically
async function detectEdgeCases(projectName: string, queueId: string) {
const edgeCases = [];
for await (const run of client.listRuns({
projectName,
isRoot: true,
})) {
// Criteria for edge cases
const hasError = run.error !== undefined && run.error !== null;
const unusualLatency = run.end_time && run.start_time &&
(run.end_time - run.start_time) > 30000; // >30s
const unusualInput = isUnusualInput(run.inputs);
const lowConfidence = run.outputs?.confidence && run.outputs.confidence < 0.5;
const unexpectedOutput = isUnexpectedOutput(run.outputs);
if (hasError || unusualLatency || unusualInput || lowConfidence || unexpectedOutput) {
edgeCases.push(run);
}
}
const runIds = edgeCases.map(run => run.id);
if (runIds.length > 0) {
await client.addRunsToAnnotationQueue({
queueId,
runIds,
});
console.log(`Added ${runIds.length} edge cases to queue`);
}
return edgeCases;
}
// Step 2: Categorize and analyze edge cases
async function categorizeEdgeCases(queueId: string) {
const categories: Record<string, number> = {};
const { size } = await client.getSizeFromAnnotationQueue(queueId);
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
const { run } = runInfo;
// Human categorizes the edge case
const category = await categorizeEdgeCase(run);
categories[category.type] = (categories[category.type] || 0) + 1;
// Record categorization
await client.createFeedback({
run_id: run.id,
key: "edge_case_category",
score: category.severity,
comment: category.description,
value: {
category: category.type,
reproducible: category.reproducible,
needsFix: category.needsFix,
suggestedFix: category.suggestedFix,
},
});
}
console.log("\n=== Edge Case Categories ===");
for (const [type, count] of Object.entries(categories)) {
console.log(` ${type}: ${count}`);
}
return categories;
}
// Helper functions
function isUnusualInput(inputs: any): boolean {
const inputStr = JSON.stringify(inputs);
// Check for unusual patterns
return (
inputStr.length > 10000 || // Very long input
inputStr.includes("�") || // Encoding issues
/[^\x00-\x7F]{20,}/.test(inputStr) || // Many non-ASCII chars
inputStr.length < 5 // Very short input
);
}
function isUnexpectedOutput(outputs: any): boolean {
if (!outputs) return true;
const outputStr = JSON.stringify(outputs);
// Check for unexpected patterns
return (
outputStr.length === 0 || // Empty output
outputStr.includes("error") || // Error in output
outputStr.includes("undefined") // Undefined values
);
}
async function categorizeEdgeCase(run: any): Promise<any> {
// In production, this would present UI to reviewer
// For example purposes, return mock data
return {
type: "unusual_input_encoding",
severity: 0.7,
description: "Input contains unusual character encoding",
reproducible: true,
needsFix: true,
suggestedFix: "Add input validation and normalization",
};
}Annotation queues work seamlessly with LangSmith's feedback system to collect and store human annotations.
import { Client } from "langsmith";
const client = new Client();
// Complete annotation workflow
async function completeAnnotationWorkflow(queueId: string, batchSize: number = 10) {
// Get queue size
const { size } = await client.getSizeFromAnnotationQueue(queueId);
const processingCount = Math.min(size, batchSize);
console.log(`Processing ${processingCount} runs from queue (${size} total)`);
for (let i = 0; i < processingCount; i++) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
const { run } = runInfo;
// Collect human feedback on multiple dimensions
const annotation = {
accuracy: await rateAccuracy(run),
helpfulness: await rateHelpfulness(run),
safety: await rateSafety(run),
overallScore: 0,
comments: "",
corrections: null,
};
annotation.overallScore =
(annotation.accuracy + annotation.helpfulness + annotation.safety) / 3;
// Submit multiple feedback entries
await client.createFeedback({
run_id: run.id,
key: "accuracy",
score: annotation.accuracy,
comment: "Factual correctness rating",
});
await client.createFeedback({
run_id: run.id,
key: "helpfulness",
score: annotation.helpfulness,
comment: "Usefulness rating",
});
await client.createFeedback({
run_id: run.id,
key: "safety",
score: annotation.safety,
comment: "Safety rating",
});
await client.createFeedback({
run_id: run.id,
key: "overall_human_rating",
score: annotation.overallScore,
comment: annotation.comments,
correction: annotation.corrections,
feedbackSourceType: "app",
});
console.log(`Annotated run ${i + 1}/${processingCount}`);
}
console.log("Annotation workflow complete");
}
// Query feedback from annotated runs
async function analyzeAnnotations(queueId: string) {
const { size } = await client.getSizeFromAnnotationQueue(queueId);
const feedbackByKey: Record<string, number[]> = {};
const runIds: string[] = [];
// Collect all run IDs from queue
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
runIds.push(runInfo.run.id);
}
// Get all feedback for these runs
for await (const feedback of client.listFeedback({
runIds,
})) {
if (!feedbackByKey[feedback.key]) {
feedbackByKey[feedback.key] = [];
}
if (typeof feedback.score === 'number') {
feedbackByKey[feedback.key].push(feedback.score);
}
}
// Calculate averages
console.log("\n=== Annotation Summary ===");
for (const [key, scores] of Object.entries(feedbackByKey)) {
const avg = scores.reduce((a, b) => a + b, 0) / scores.length;
console.log(` ${key}: ${avg.toFixed(2)} (n=${scores.length})`);
}
return feedbackByKey;
}
// Helper functions for rating dimensions (implement as needed)
async function rateAccuracy(run: any): Promise<number> {
// Present to reviewer for accuracy rating
return 0.85; // Mock score
}
async function rateHelpfulness(run: any): Promise<number> {
// Present to reviewer for helpfulness rating
return 0.90; // Mock score
}
async function rateSafety(run: any): Promise<number> {
// Present to reviewer for safety rating
return 1.0; // Mock score
}Create specialized queues for different purposes to maintain clarity and organization.
import { Client } from "langsmith";
const client = new Client();
// Create specialized queues for different purposes
async function setupQueueStructure() {
// High-priority immediate review
const urgentQueue = await client.createAnnotationQueue({
name: "Urgent Review",
description: "High-priority runs requiring immediate attention",
rubricInstructions: "Focus on critical issues: safety, factual errors, harmful content",
});
// Regular quality checks
const regularQAQueue = await client.createAnnotationQueue({
name: "Regular QA",
description: "Routine quality assurance sampling",
rubricInstructions: "Standard quality metrics: accuracy, helpfulness, clarity",
});
// Training data collection
const trainingQueue = await client.createAnnotationQueue({
name: "Training Examples",
description: "High-quality examples for model training",
rubricInstructions: "Provide ideal outputs for training. Ensure examples are clear and representative.",
});
// Edge case analysis
const edgeCaseQueue = await client.createAnnotationQueue({
name: "Edge Cases",
description: "Unusual inputs and edge cases for review",
rubricInstructions: "Categorize edge case type, assess severity, determine if fix is needed",
});
// Comparative evaluation
const comparisonQueue = await client.createAnnotationQueue({
name: "Model Comparison",
description: "A/B testing between model versions",
rubricInstructions: "Compare models side-by-side on accuracy, helpfulness, and overall preference",
});
return { urgentQueue, regularQAQueue, trainingQueue, edgeCaseQueue, comparisonQueue };
}
// Route runs to appropriate queues
async function routeRunToQueue(run: any, queues: any) {
// Route based on characteristics
if (run.error || run.outputs?.safety_score < 0.5) {
await client.addRunsToAnnotationQueue({
queueId: queues.urgentQueue.id,
runIds: [run.id],
});
} else if (run.outputs?.confidence < 0.7) {
await client.addRunsToAnnotationQueue({
queueId: queues.edgeCaseQueue.id,
runIds: [run.id],
});
} else if (Math.random() < 0.1) { // 10% sampling
await client.addRunsToAnnotationQueue({
queueId: queues.regularQAQueue.id,
runIds: [run.id],
});
}
}Use different sampling strategies to select runs for annotation efficiently.
import { Client } from "langsmith";
const client = new Client();
// Random sampling
async function randomSample(projectName: string, queueId: string, rate: number) {
const sampledRuns = [];
for await (const run of client.listRuns({ projectName, isRoot: true })) {
if (Math.random() < rate) {
sampledRuns.push(run);
}
}
const runIds = sampledRuns.map(r => r.id);
if (runIds.length > 0) {
await client.addRunsToAnnotationQueue({ queueId, runIds });
console.log(`Random sample: added ${runIds.length} runs`);
}
return sampledRuns;
}
// Stratified sampling (ensure diverse examples)
async function stratifiedSample(
projectName: string,
queueId: string,
samplesPerCategory: number
) {
const runs = [];
for await (const run of client.listRuns({ projectName, isRoot: true })) {
runs.push(run);
}
// Group by category
const categories: Record<string, any[]> = {};
for (const run of runs) {
const category = categorizeRun(run);
if (!categories[category]) categories[category] = [];
categories[category].push(run);
}
// Sample from each category
const sampledIds: string[] = [];
for (const [category, categoryRuns] of Object.entries(categories)) {
const samples = categoryRuns
.sort(() => Math.random() - 0.5)
.slice(0, samplesPerCategory);
sampledIds.push(...samples.map(r => r.id));
console.log(`Sampled ${samples.length} from category: ${category}`);
}
await client.addRunsToAnnotationQueue({ queueId, runIds: sampledIds });
console.log(`Stratified sample: added ${sampledIds.length} runs across ${Object.keys(categories).length} categories`);
}
// Uncertainty sampling (focus on low-confidence predictions)
async function uncertaintySample(projectName: string, queueId: string, threshold: number = 0.7) {
const uncertainRuns = [];
for await (const run of client.listRuns({ projectName, isRoot: true })) {
const confidence = run.outputs?.confidence;
if (confidence !== undefined && confidence < threshold) {
uncertainRuns.push(run);
}
}
const runIds = uncertainRuns.map(r => r.id);
if (runIds.length > 0) {
await client.addRunsToAnnotationQueue({ queueId, runIds });
console.log(`Uncertainty sample: added ${runIds.length} low-confidence runs`);
}
return uncertainRuns;
}
// Active learning sampling (most informative examples)
async function activeLearningS sample(projectName: string, queueId: string, count: number) {
const runs = [];
for await (const run of client.listRuns({ projectName, isRoot: true })) {
runs.push(run);
}
// Score each run by informativeness
const scored = runs.map(run => ({
run,
score: calculateInformativenessScore(run),
}));
// Sort by informativeness (highest first)
scored.sort((a, b) => b.score - a.score);
// Take top N most informative
const topRuns = scored.slice(0, count);
const runIds = topRuns.map(item => item.run.id);
await client.addRunsToAnnotationQueue({ queueId, runIds });
console.log(`Active learning sample: added ${runIds.length} most informative runs`);
}
// Temporal sampling (sample across time periods)
async function temporalSample(
projectName: string,
queueId: string,
daysToSample: number,
samplesPerDay: number
) {
const now = new Date();
const sampledIds: string[] = [];
for (let day = 0; day < daysToSample; day++) {
const startTime = new Date(now);
startTime.setDate(startTime.getDate() - day);
startTime.setHours(0, 0, 0, 0);
const endTime = new Date(startTime);
endTime.setHours(23, 59, 59, 999);
const dayRuns = [];
for await (const run of client.listRuns({
projectName,
isRoot: true,
startTime,
endTime,
})) {
dayRuns.push(run);
}
// Random sample from this day
const samples = dayRuns
.sort(() => Math.random() - 0.5)
.slice(0, samplesPerDay)
.map(r => r.id);
sampledIds.push(...samples);
console.log(`Day -${day}: sampled ${samples.length} runs`);
}
await client.addRunsToAnnotationQueue({ queueId, runIds: sampledIds });
console.log(`Temporal sample: added ${sampledIds.length} runs across ${daysToSample} days`);
}
function categorizeRun(run: any): string {
// Implement categorization logic based on run characteristics
const inputStr = JSON.stringify(run.inputs);
if (inputStr.includes("question")) return "question_answering";
if (inputStr.includes("summary")) return "summarization";
if (inputStr.includes("translate")) return "translation";
if (run.run_type === "tool") return "tool_use";
if (run.run_type === "retriever") return "retrieval";
return "other";
}
function calculateInformativenessScore(run: any): number {
// Score based on how informative this example would be for training/evaluation
let score = 0;
// Higher score for low confidence (uncertain cases are informative)
const confidence = run.outputs?.confidence || 1;
score += (1 - confidence) * 0.4;
// Higher score for edge cases
if (run.error) score += 0.3;
if (isUnusualInput(run.inputs)) score += 0.2;
// Higher score for diverse inputs
const inputComplexity = JSON.stringify(run.inputs).length / 1000;
score += Math.min(inputComplexity, 0.1);
return score;
}Process annotation queues in batches for efficient review workflows.
import { Client } from "langsmith";
const client = new Client();
// Process annotation queue in batches
async function processBatchAnnotations(
queueId: string,
batchSize: number,
annotateFunction: (runs: RunWithAnnotationQueueInfo[]) => Promise<void>
) {
const { size } = await client.getSizeFromAnnotationQueue(queueId);
let processedCount = 0;
for (let i = 0; i < size; i += batchSize) {
const batch: any[] = [];
const actualBatchSize = Math.min(batchSize, size - i);
// Collect batch
for (let j = 0; j < actualBatchSize; j++) {
try {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i + j);
batch.push(runInfo);
} catch (error) {
// End of queue or error
break;
}
}
if (batch.length === 0) break;
console.log(`Processing batch starting at offset ${i} (${batch.length} runs)...`);
await annotateFunction(batch);
processedCount += batch.length;
}
console.log(`All batches processed (${processedCount} total runs)`);
return processedCount;
}
// Example usage: Parallel annotation within batches
await processBatchAnnotations("queue-id-123", 20, async (runs) => {
// Process runs in parallel within batch
await Promise.all(runs.map(async (runInfo) => {
const feedback = await collectFeedback(runInfo.run);
await client.createFeedback({
run_id: runInfo.run.id,
key: "human_review",
score: feedback.score,
comment: feedback.comment,
value: feedback.details,
});
}));
});
// Batch annotation with progress tracking
async function batchAnnotateWithProgress(queueId: string, batchSize: number) {
const { size } = await client.getSizeFromAnnotationQueue(queueId);
let annotatedCount = 0;
for (let i = 0; i < size; i += batchSize) {
const batch: any[] = [];
const actualBatchSize = Math.min(batchSize, size - i);
for (let j = 0; j < actualBatchSize; j++) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i + j);
batch.push(runInfo);
}
// Annotate batch
for (const runInfo of batch) {
const annotation = await getHumanAnnotation(runInfo.run);
await client.createFeedback({
run_id: runInfo.run.id,
key: "annotation",
score: annotation.score,
comment: annotation.comment,
correction: annotation.correction,
});
annotatedCount++;
}
// Show progress
const progress = ((annotatedCount / size) * 100).toFixed(1);
console.log(`Progress: ${annotatedCount}/${size} (${progress}%)`);
}
}
// Helper function for collecting feedback (implement as needed)
async function collectFeedback(run: any): Promise<any> {
return {
score: 0.85,
comment: "Good quality output",
details: { accuracy: 0.9, helpfulness: 0.8 },
};
}Monitor annotation progress across queues.
import { Client } from "langsmith";
const client = new Client();
// Track annotation progress for a single queue
async function trackAnnotationProgress(queueId: string) {
const queue = await client.readAnnotationQueue(queueId);
const { size } = await client.getSizeFromAnnotationQueue(queueId);
const runIds: string[] = [];
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
runIds.push(runInfo.run.id);
}
// Count how many have feedback
let annotatedCount = 0;
for (const runId of runIds) {
const feedbackList = [];
for await (const feedback of client.listFeedback({ runIds: [runId] })) {
feedbackList.push(feedback);
}
if (feedbackList.length > 0) {
annotatedCount++;
}
}
const progress = {
queueName: queue.name,
queueId: queue.id,
totalRuns: size,
annotated: annotatedCount,
remaining: size - annotatedCount,
percentComplete: size > 0 ? ((annotatedCount / size) * 100).toFixed(1) : "0.0",
};
console.log(`\n=== Queue Progress: ${progress.queueName} ===`);
console.log(`Progress: ${progress.annotated}/${progress.totalRuns} (${progress.percentComplete}%)`);
console.log(`Remaining: ${progress.remaining}`);
return progress;
}
// Track progress across all queues
async function trackAllQueueProgress() {
const allProgress = [];
for await (const queue of client.listAnnotationQueues()) {
const progress = await trackAnnotationProgress(queue.id);
allProgress.push(progress);
}
console.log(`\n=== Overall Progress ===`);
const totalRuns = allProgress.reduce((sum, p) => sum + p.totalRuns, 0);
const totalAnnotated = allProgress.reduce((sum, p) => sum + p.annotated, 0);
const overallPercent = totalRuns > 0 ? ((totalAnnotated / totalRuns) * 100).toFixed(1) : "0.0";
console.log(`Total Runs: ${totalRuns}`);
console.log(`Total Annotated: ${totalAnnotated}`);
console.log(`Overall Progress: ${overallPercent}%`);
return allProgress;
}
// Monitor annotation velocity
async function measureAnnotationVelocity(queueId: string, intervalHours: number = 24) {
const measurements: Array<{ timestamp: Date; annotated: number }> = [];
setInterval(async () => {
const { size } = await client.getSizeFromAnnotationQueue(queueId);
const runIds: string[] = [];
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
runIds.push(runInfo.run.id);
}
let annotated = 0;
for (const runId of runIds) {
const feedbackList = [];
for await (const feedback of client.listFeedback({ runIds: [runId] })) {
feedbackList.push(feedback);
}
if (feedbackList.length > 0) annotated++;
}
measurements.push({
timestamp: new Date(),
annotated,
});
// Calculate velocity if we have at least 2 measurements
if (measurements.length >= 2) {
const last = measurements[measurements.length - 1];
const previous = measurements[measurements.length - 2];
const delta = last.annotated - previous.annotated;
const hours = (last.timestamp.getTime() - previous.timestamp.getTime()) / (1000 * 60 * 60);
const velocity = delta / hours;
console.log(`Annotation velocity: ${velocity.toFixed(1)} annotations/hour`);
}
}, intervalHours * 60 * 60 * 1000);
}Automatically add runs to queues based on criteria.
import { Client } from "langsmith";
const client = new Client();
// Automatically add runs based on criteria
async function autoPopulateQueue(projectName: string, queueId: string, interval Ms: number = 3600000) {
// Run periodically (e.g., via cron job)
setInterval(async () => {
const recentRuns = [];
for await (const run of client.listRuns({
projectName,
isRoot: true,
// Get runs from last hour
startTime: new Date(Date.now() - 3600000),
})) {
recentRuns.push(run);
}
// Filter based on criteria
const runsNeedingReview = recentRuns.filter(run => {
return (
run.error !== undefined ||
(run.outputs?.confidence && run.outputs.confidence < 0.7) ||
hasUnusualPattern(run) ||
hasNegativeFeedback(run)
);
});
if (runsNeedingReview.length > 0) {
await client.addRunsToAnnotationQueue({
queueId,
runIds: runsNeedingReview.map(r => r.id),
});
console.log(`[${new Date().toISOString()}] Auto-added ${runsNeedingReview.length} runs to queue`);
}
}, intervalMs);
}
function hasNegativeFeedback(run: any): boolean {
// Check if run has existing negative feedback
return false; // Implement based on your needs
}
// Trigger-based population
async function populateOnTrigger(projectName: string, queueId: string) {
// Listen for specific events and add runs
for await (const run of client.listRuns({
projectName,
isRoot: true,
})) {
// Check trigger conditions
if (shouldTriggerReview(run)) {
await client.addRunsToAnnotationQueue({
queueId,
runIds: [run.id],
});
console.log(`Triggered review for run: ${run.id}`);
}
}
}
function shouldTriggerReview(run: any): boolean {
// Complex trigger logic
const triggers = {
hasError: run.error !== undefined,
highLatency: run.end_time && run.start_time && (run.end_time - run.start_time) > 10000,
unusualOutput: run.outputs && JSON.stringify(run.outputs).includes("error"),
flaggedBySystem: run.tags?.includes("flagged"),
};
// Trigger if any condition is met
return Object.values(triggers).some(Boolean);
}
// Smart queue population with deduplication
async function smartPopulateQueue(projectName: string, queueId: string) {
const existingRunIds = new Set<string>();
// Get existing runs in queue
const { size } = await client.getSizeFromAnnotationQueue(queueId);
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
existingRunIds.add(runInfo.run.id);
}
// Find new runs to add
const newRunIds: string[] = [];
for await (const run of client.listRuns({
projectName,
isRoot: true,
})) {
if (!existingRunIds.has(run.id) && shouldAddToQueue(run)) {
newRunIds.push(run.id);
}
}
if (newRunIds.length > 0) {
await client.addRunsToAnnotationQueue({ queueId, runIds: newRunIds });
console.log(`Added ${newRunIds.length} new runs (skipped ${existingRunIds.size} already in queue)`);
}
}
function shouldAddToQueue(run: any): boolean {
// Implement your queue admission criteria
return run.outputs?.confidence < 0.7;
}Implement multi-stage annotation pipelines with different review levels.
import { Client } from "langsmith";
const client = new Client();
// Multi-stage annotation pipeline
async function multiStageAnnotation() {
// Stage 1: Initial screening
const screeningQueue = await client.createAnnotationQueue({
name: "Stage 1: Initial Screening",
description: "First pass: quick review to flag issues",
rubricInstructions: "Quick pass/fail: Does this need detailed review? (30 seconds max)",
});
// Stage 2: Detailed review
const detailedQueue = await client.createAnnotationQueue({
name: "Stage 2: Detailed Review",
description: "In-depth analysis of flagged items",
rubricInstructions: "Thorough review: accuracy, helpfulness, safety, provide corrections",
});
// Stage 3: Final verification
const verificationQueue = await client.createAnnotationQueue({
name: "Stage 3: Final Verification",
description: "Verify corrections before deployment",
rubricInstructions: "Verify suggested corrections are appropriate and complete",
});
// Process pipeline
await processScreening(screeningQueue.id, detailedQueue.id);
await processDetailedReview(detailedQueue.id, verificationQueue.id);
await processFinalVerification(verificationQueue.id);
return { screeningQueue, detailedQueue, verificationQueue };
}
async function processScreening(screeningQueueId: string, detailedQueueId: string) {
const { size } = await client.getSizeFromAnnotationQueue(screeningQueueId);
let escalatedCount = 0;
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(screeningQueueId, i);
const { run } = runInfo;
// Quick screening (30 seconds max)
const needsDetailedReview = await quickScreen(run);
// Record screening result
await client.createFeedback({
run_id: run.id,
key: "screening_result",
score: needsDetailedReview ? 0 : 1,
comment: needsDetailedReview ? "Needs detailed review" : "Passed screening",
value: { stage: "screening", escalated: needsDetailedReview },
});
if (needsDetailedReview) {
// Move to detailed review
await client.addRunsToAnnotationQueue({
queueId: detailedQueueId,
runIds: [run.id],
});
escalatedCount++;
}
// Remove from screening queue
await client.deleteRunFromAnnotationQueue(screeningQueueId, run.id);
}
console.log(`Screening complete: ${escalatedCount} escalated to detailed review`);
}
async function processDetailedReview(detailedQueueId: string, verificationQueueId: string) {
const { size } = await client.getSizeFromAnnotationQueue(detailedQueueId);
let correctionCount = 0;
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(detailedQueueId, i);
const { run } = runInfo;
// Detailed review (5-10 minutes)
const review = await detailedReview(run);
// Create detailed feedback
await client.createFeedback({
run_id: run.id,
key: "detailed_review",
score: review.score,
comment: review.comment,
correction: review.correction,
value: {
stage: "detailed_review",
dimensions: review.dimensions,
needsCorrection: !!review.correction,
},
});
// If correction provided, send to verification
if (review.correction) {
await client.addRunsToAnnotationQueue({
queueId: verificationQueueId,
runIds: [run.id],
});
correctionCount++;
}
// Remove from detailed queue
await client.deleteRunFromAnnotationQueue(detailedQueueId, run.id);
}
console.log(`Detailed review complete: ${correctionCount} sent to verification`);
}
async function processFinalVerification(verificationQueueId: string) {
const { size } = await client.getSizeFromAnnotationQueue(verificationQueueId);
let approvedCount = 0;
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(verificationQueueId, i);
const { run } = runInfo;
// Get the correction from detailed review
const correctionFeedback = [];
for await (const feedback of client.listFeedback({
runIds: [run.id],
feedbackKeys: ["detailed_review"],
})) {
correctionFeedback.push(feedback);
}
if (correctionFeedback.length > 0 && correctionFeedback[0].correction) {
// Verify correction
const verification = await verifyCorrection(run, correctionFeedback[0].correction);
// Record verification result
await client.createFeedback({
run_id: run.id,
key: "verification_result",
score: verification.approved ? 1 : 0,
comment: verification.comment,
value: {
stage: "verification",
approved: verification.approved,
verifier: verification.verifier,
},
});
if (verification.approved) {
approvedCount++;
// Could update production model or dataset here
console.log(`✓ Approved correction for run ${run.id}`);
}
}
// Remove from verification queue
await client.deleteRunFromAnnotationQueue(verificationQueueId, run.id);
}
console.log(`Verification complete: ${approvedCount} corrections approved`);
}
// Helper functions (implement as needed)
async function quickScreen(run: any): Promise<boolean> {
// Quick screening logic - is detailed review needed?
return run.error !== undefined || run.outputs?.confidence < 0.7;
}
async function detailedReview(run: any): Promise<any> {
// Detailed review logic
return {
score: 0.75,
comment: "Needs minor correction",
correction: { outputs: { answer: "Corrected answer" } },
dimensions: { accuracy: 0.7, helpfulness: 0.8, safety: 1.0 },
};
}
async function verifyCorrection(run: any, correction: any): Promise<any> {
// Verification logic
return {
approved: true,
comment: "Correction looks good",
verifier: "senior-reviewer",
};
}Move runs between queues as they progress through annotation stages.
import { Client } from "langsmith";
const client = new Client();
// Define workflow stages
const workflow = {
intake: await client.createAnnotationQueue({
name: "Intake",
description: "Initial triage and categorization",
}),
technical: await client.createAnnotationQueue({
name: "Technical Review",
description: "Technical accuracy review",
}),
editorial: await client.createAnnotationQueue({
name: "Editorial Review",
description: "Clarity and communication review",
}),
approved: await client.createAnnotationQueue({
name: "Approved",
description: "Final approved examples",
}),
};
// Process intake queue and route to appropriate next stage
async function processIntake(intakeQueueId: string, workflowQueues: any) {
const { size } = await client.getSizeFromAnnotationQueue(intakeQueueId);
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(intakeQueueId, i);
const { run } = runInfo;
// Categorize and route
const category = await categorizeForWorkflow(run);
let nextQueueId;
if (category.needsTechnicalReview) {
nextQueueId = workflowQueues.technical.id;
} else if (category.needsEditorialReview) {
nextQueueId = workflowQueues.editorial.id;
} else {
nextQueueId = workflowQueues.approved.id;
}
// Add to next queue
await client.addRunsToAnnotationQueue({
queueId: nextQueueId,
runIds: [run.id],
});
// Record routing decision
await client.createFeedback({
run_id: run.id,
key: "workflow_routing",
value: {
from: "intake",
to: category.needsTechnicalReview ? "technical" : "editorial",
reason: category.reason,
},
});
// Remove from intake
await client.deleteRunFromAnnotationQueue(intakeQueueId, run.id);
}
}
async function categorizeForWorkflow(run: any): Promise<any> {
// Categorization logic
return {
needsTechnicalReview: run.outputs?.technical_terms_count > 5,
needsEditorialReview: run.outputs?.readability_score < 0.7,
reason: "Contains technical content",
};
}Implement priority-based processing for annotation queues.
import { Client } from "langsmith";
const client = new Client();
// Create priority-based queue system
async function setupPriorityQueues() {
return {
critical: await client.createAnnotationQueue({
name: "P0 - Critical",
description: "Critical issues requiring immediate review",
}),
high: await client.createAnnotationQueue({
name: "P1 - High Priority",
description: "High-priority items for review within 24 hours",
}),
medium: await client.createAnnotationQueue({
name: "P2 - Medium Priority",
description: "Medium-priority items for review within 1 week",
}),
low: await client.createAnnotationQueue({
name: "P3 - Low Priority",
description: "Low-priority items for review when capacity allows",
}),
};
}
// Route runs to priority queues
async function routeToPriorityQueue(run: any, priorityQueues: any) {
const priority = calculatePriority(run);
let targetQueue;
if (priority >= 0.9) targetQueue = priorityQueues.critical;
else if (priority >= 0.7) targetQueue = priorityQueues.high;
else if (priority >= 0.4) targetQueue = priorityQueues.medium;
else targetQueue = priorityQueues.low;
await client.addRunsToAnnotationQueue({
queueId: targetQueue.id,
runIds: [run.id],
});
// Tag with priority
await client.createFeedback({
run_id: run.id,
key: "priority_score",
score: priority,
value: { priority_level: targetQueue.name },
});
}
function calculatePriority(run: any): number {
let priority = 0;
// Critical: errors or safety concerns
if (run.error) priority += 0.5;
if (run.outputs?.safety_score < 0.5) priority += 0.5;
// High: low confidence or user complaints
if (run.outputs?.confidence < 0.5) priority += 0.3;
if (run.tags?.includes("user_complaint")) priority += 0.3;
// Medium: moderate confidence
if (run.outputs?.confidence >= 0.5 && run.outputs?.confidence < 0.7) priority += 0.2;
return Math.min(priority, 1.0);
}Build a dashboard to manage annotation queues.
import { Client } from "langsmith";
const client = new Client();
// Get comprehensive queue dashboard data
async function getQueueDashboard() {
const dashboard = {
queues: [] as any[],
summary: {
totalQueues: 0,
totalPendingRuns: 0,
totalAnnotated: 0,
estimatedHoursRemaining: 0,
},
};
// Get all queues
for await (const queue of client.listAnnotationQueues()) {
const { size } = await client.getSizeFromAnnotationQueue(queue.id);
// Sample first few runs to estimate annotation status
let annotatedInQueue = 0;
const sampleSize = Math.min(size, 20);
for (let i = 0; i < sampleSize; i++) {
const runInfo = await client.getRunFromAnnotationQueue(queue.id, i);
const feedbackList = [];
for await (const feedback of client.listFeedback({
runIds: [runInfo.run.id],
})) {
feedbackList.push(feedback);
}
if (feedbackList.length > 0) annotatedInQueue++;
}
const annotationRate = sampleSize > 0 ? annotatedInQueue / sampleSize : 0;
const estimatedAnnotated = Math.round(size * annotationRate);
dashboard.queues.push({
id: queue.id,
name: queue.name,
description: queue.description,
totalRuns: size,
estimatedAnnotated,
estimatedPending: size - estimatedAnnotated,
percentComplete: size > 0 ? ((estimatedAnnotated / size) * 100).toFixed(1) : "0",
lastUpdated: queue.updated_at,
});
dashboard.summary.totalPendingRuns += size - estimatedAnnotated;
dashboard.summary.totalAnnotated += estimatedAnnotated;
}
dashboard.summary.totalQueues = dashboard.queues.length;
// Estimate 5 minutes per annotation
dashboard.summary.estimatedHoursRemaining =
(dashboard.summary.totalPendingRuns * 5) / 60;
return dashboard;
}
// Display dashboard
async function displayDashboard() {
const dashboard = await getQueueDashboard();
console.log("\n" + "=".repeat(60));
console.log("ANNOTATION QUEUE DASHBOARD");
console.log("=".repeat(60));
console.log(`\nTotal Queues: ${dashboard.summary.totalQueues}`);
console.log(`Total Pending: ${dashboard.summary.totalPendingRuns}`);
console.log(`Total Annotated: ${dashboard.summary.totalAnnotated}`);
console.log(`Estimated Hours Remaining: ${dashboard.summary.estimatedHoursRemaining.toFixed(1)}`);
console.log(`\n${"Queue Name".padEnd(30)} | ${"Runs".padEnd(6)} | ${"Done".padEnd(6)} | Progress`);
console.log("-".repeat(60));
for (const queue of dashboard.queues) {
const name = queue.name.padEnd(30).substring(0, 30);
const total = queue.totalRuns.toString().padEnd(6);
const annotated = queue.estimatedAnnotated.toString().padEnd(6);
const progress = `${queue.percentComplete}%`;
console.log(`${name} | ${total} | ${annotated} | ${progress}`);
}
}Assign specific reviewers to annotation queue items.
import { Client } from "langsmith";
const client = new Client();
// Assign reviewers to queue items using metadata
async function assignReviewers(queueId: string, reviewers: string[]) {
const { size } = await client.getSizeFromAnnotationQueue(queueId);
let assignmentCount = 0;
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
// Round-robin assignment
const assignedReviewer = reviewers[i % reviewers.length];
// Record assignment using feedback
await client.createFeedback({
run_id: runInfo.run.id,
key: "assigned_reviewer",
value: assignedReviewer,
comment: `Assigned to ${assignedReviewer} for review`,
});
assignmentCount++;
}
console.log(`Assigned ${assignmentCount} runs to ${reviewers.length} reviewers`);
}
// Get runs assigned to specific reviewer
async function getReviewerWorkload(queueId: string, reviewerId: string) {
const assignedRuns = [];
const { size } = await client.getSizeFromAnnotationQueue(queueId);
for (let i = 0; i < size; i++) {
const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
// Check assignment
const assignmentList = [];
for await (const feedback of client.listFeedback({
runIds: [runInfo.run.id],
feedbackKeys: ["assigned_reviewer"],
})) {
assignmentList.push(feedback);
}
if (assignmentList.length > 0 && assignmentList[0].value === reviewerId) {
assignedRuns.push(runInfo.run);
}
}
console.log(`Reviewer ${reviewerId} has ${assignedRuns.length} runs assigned`);
return assignedRuns;
}