or run

npx @tessl/cli init

Annotation Queues

Annotation Queues in LangSmith provide a structured system for collecting human feedback on runs. They enable workflows where runs are added to queues for review, annotation, and quality assessment by human reviewers.

Core Concepts

Annotation Queues are designed for human-in-the-loop workflows where you need to:

Collect human feedback on LLM outputs for evaluation
Build datasets of human-annotated examples for fine-tuning
Review and correct model predictions
Implement quality assurance processes
Gather comparative feedback across multiple runs

Basic Usage

import { Client } from "langsmith";

const client = new Client();

// Create an annotation queue for feedback collection
const queue = await client.createAnnotationQueue({
  name: "Model Output Review",
  description: "Queue for reviewing customer-facing model outputs",
});

// Add runs to the queue for annotation
await client.addRunsToAnnotationQueue({
  queueId: queue.id,
  runIds: ["run-1", "run-2", "run-3"],
});

// Retrieve first run from queue for annotation
const firstRun = await client.getRunFromAnnotationQueue(queue.id, 0);
console.log(`Run ${firstRun.run.id} needs review`);
// Display to reviewer and collect feedback

Capabilities

Create Annotation Queue

Creates a new annotation queue for organizing runs that need human review.

/**
 * Create a new annotation queue
 * @param options - Queue configuration options
 * @returns Promise resolving to the created annotation queue with details
 */
createAnnotationQueue(options: {
  /** Unique name for the annotation queue */
  name: string;
  /** Optional description of the queue's purpose */
  description?: string;
  /** Optional custom queue ID (auto-generated if not provided) */
  queueId?: string;
  /** Optional rubric instructions for annotators */
  rubricInstructions?: string;
}): Promise<AnnotationQueueWithDetails>;

interface AnnotationQueueWithDetails {
  /** Unique identifier for the queue */
  id: string;
  /** Name of the annotation queue */
  name: string;
  /** Description of the queue's purpose */
  description?: string;
  /** Rubric instructions for annotators */
  rubric_instructions?: string;
  /** Timestamp when queue was created */
  created_at: string;
  /** Timestamp when queue was last updated */
  updated_at: string;
}

Usage Examples:

import { Client } from "langsmith";

const client = new Client();

// Create a queue for model output review
const reviewQueue = await client.createAnnotationQueue({
  name: "Production Output Review",
  description: "Review outputs from production chatbot for quality assurance",
});

// Create a queue for training data collection
const trainingQueue = await client.createAnnotationQueue({
  name: "Training Data Collection",
  description: "Collect human-annotated examples for model fine-tuning",
});

// Create a queue for comparative evaluation
const comparisonQueue = await client.createAnnotationQueue({
  name: "Model A vs Model B",
  description: "Side-by-side comparison of two model versions",
});

// Create queue with rubric instructions
const guidedQueue = await client.createAnnotationQueue({
  name: "Quality Review with Rubric",
  description: "Structured quality review process",
  rubricInstructions: `
    Rate on scale of 1-5:
    - Accuracy: How factually correct is the response?
    - Helpfulness: How useful is the response to the user?
    - Safety: Is the response free from harmful content?
  `,
});

Read Annotation Queue

Retrieves details about a specific annotation queue.

/**
 * Read annotation queue details
 * @param queueId - The unique identifier of the queue
 * @returns Promise resolving to the annotation queue details
 */
readAnnotationQueue(queueId: string): Promise<AnnotationQueueWithDetails>;

Usage Examples:

import { Client } from "langsmith";

const client = new Client();

// Read queue details by ID
const queue = await client.readAnnotationQueue("queue-uuid-123");

console.log(`Queue: ${queue.name}`);
console.log(`Description: ${queue.description}`);
console.log(`Created: ${queue.created_at}`);
console.log(`Updated: ${queue.updated_at}`);
if (queue.rubric_instructions) {
  console.log(`Rubric:\n${queue.rubric_instructions}`);
}

// Use queue details to display in UI
async function displayQueueInfo(queueId: string) {
  const queue = await client.readAnnotationQueue(queueId);
  return {
    title: queue.name,
    subtitle: queue.description,
    lastModified: queue.updated_at,
    hasRubric: !!queue.rubric_instructions,
  };
}

List Annotation Queues

Lists all annotation queues with optional filtering.

/**
 * List annotation queues
 * @param options - Optional filtering options
 * @returns Async iterable of annotation queues
 */
listAnnotationQueues(
  options?: {
    /** Filter by queue IDs */
    queueIds?: string[];
    /** Filter by exact name */
    name?: string;
    /** Filter by name substring */
    nameContains?: string;
    /** Limit number of results */
    limit?: number;
  }
): AsyncIterableIterator<AnnotationQueue>;

interface AnnotationQueue {
  /** Queue ID */
  id: string;
  /** Queue name */
  name: string;
  /** Queue description */
  description?: string;
  /** Creation timestamp */
  created_at: string;
  /** Update timestamp */
  updated_at: string;
  /** Rubric instructions */
  rubric_instructions?: string;
}

Usage Examples:

import { Client } from "langsmith";

const client = new Client();

// List all annotation queues (async iterable)
for await (const queue of client.listAnnotationQueues()) {
  console.log(`[${queue.id}] ${queue.name}`);
}

// Search for queues by exact name
for await (const queue of client.listAnnotationQueues({ name: "Production QA" })) {
  console.log(`Found queue: ${queue.name}`);
}

// Search queues by name substring
for await (const queue of client.listAnnotationQueues({ nameContains: "review" })) {
  console.log(`Review queue: ${queue.name}`);
}

// Get specific queues by ID
for await (const queue of client.listAnnotationQueues({
  queueIds: ["queue-id-1", "queue-id-2"],
})) {
  console.log(`Queue: ${queue.name}`);
}

// Display queues in a dashboard with limit
const queues: AnnotationQueue[] = [];
for await (const queue of client.listAnnotationQueues({ limit: 20 })) {
  queues.push(queue);
  console.log(`[${queue.id}] ${queue.name}`);
  console.log(`  ${queue.description || 'No description'}`);
  console.log(`  Last updated: ${queue.updated_at}`);
}
console.log(`Total queues: ${queues.length}`);

Update Annotation Queue

Updates an existing annotation queue's properties.

/**
 * Update annotation queue
 * @param queueId - The unique identifier of the queue to update
 * @param options - Properties to update
 * @returns Promise resolving to void
 */
updateAnnotationQueue(
  queueId: string,
  options: {
    /** New name for the queue */
    name: string;
    /** New description for the queue */
    description?: string;
    /** Optional rubric instructions for annotators */
    rubricInstructions?: string;
  }
): Promise<void>;

Usage Examples:

import { Client } from "langsmith";

const client = new Client();

// Update queue name
await client.updateAnnotationQueue("queue-id-123", {
  name: "Updated Review Queue",
});

// Update queue description
await client.updateAnnotationQueue("queue-id-123", {
  name: "Production QA",
  description: "Now includes edge case reviews",
});

// Update both name and description
await client.updateAnnotationQueue("queue-id-123", {
  name: "Production QA Queue",
  description: "Quality assurance for all production outputs",
});

// Update rubric instructions
await client.updateAnnotationQueue("queue-id-123", {
  name: "Quality Review",
  rubricInstructions: `
    Updated rubric:
    1. Accuracy (1-5): Factual correctness
    2. Helpfulness (1-5): User value
    3. Safety (1-5): Harmfulness check
    4. Clarity (1-5): Communication quality
  `,
});

// Rename queue based on status
async function markQueueAsArchived(queueId: string) {
  const queue = await client.readAnnotationQueue(queueId);
  await client.updateAnnotationQueue(queueId, {
    name: `[ARCHIVED] ${queue.name}`,
    description: `${queue.description || ''} - Archived on ${new Date().toLocaleDateString()}`,
  });
}

Delete Annotation Queue

Deletes an annotation queue permanently.

/**
 * Delete annotation queue
 * @param queueId - The unique identifier of the queue to delete
 * @returns Promise resolving when deletion is complete
 */
deleteAnnotationQueue(queueId: string): Promise<void>;

Usage Examples:

import { Client } from "langsmith";

const client = new Client();

// Delete a queue
await client.deleteAnnotationQueue("queue-id-123");

// Delete queue with confirmation
async function deleteQueueWithConfirmation(queueId: string) {
  const queue = await client.readAnnotationQueue(queueId);
  const confirmed = confirm(`Delete queue "${queue.name}"?`);

  if (confirmed) {
    await client.deleteAnnotationQueue(queueId);
    console.log("Queue deleted successfully");
  }
}

// Clean up old queues
async function archiveOldQueues(daysOld: number) {
  const cutoffDate = new Date();
  cutoffDate.setDate(cutoffDate.getDate() - daysOld);

  for await (const queue of client.listAnnotationQueues()) {
    const updatedAt = new Date(queue.updated_at);
    if (updatedAt < cutoffDate) {
      console.log(`Deleting old queue: ${queue.name}`);
      await client.deleteAnnotationQueue(queue.id);
    }
  }
}

Add Runs to Annotation Queue

Adds one or more runs to an annotation queue for human review.

/**
 * Add runs to an annotation queue
 * @param params - Parameters specifying queue and runs to add
 * @returns Promise resolving when runs are added
 */
addRunsToAnnotationQueue(
  params: AddRunsToAnnotationQueueParams
): Promise<void>;

interface AddRunsToAnnotationQueueParams {
  /** ID of the annotation queue */
  queueId: string;
  /** Array of run IDs to add to the queue */
  runIds: string[];
}

Usage Examples:

import { Client } from "langsmith";

const client = new Client();

// Add specific runs to a queue
await client.addRunsToAnnotationQueue({
  queueId: "queue-id-123",
  runIds: ["run-1", "run-2", "run-3"],
});

// Add runs based on criteria
async function addFailedRunsToQueue(projectName: string, queueId: string) {
  const runs = [];

  // List runs with errors
  for await (const run of client.listRuns({
    projectName,
    isRoot: true,
    error: true,
  })) {
    runs.push(run);
  }

  const runIds = runs.map(run => run.id);

  if (runIds.length > 0) {
    await client.addRunsToAnnotationQueue({
      queueId,
      runIds,
    });
    console.log(`Added ${runIds.length} failed runs to queue`);
  }
}

// Add runs with low confidence scores
async function queueLowConfidenceRuns(projectName: string, queueId: string, threshold: number = 0.7) {
  const lowConfidenceRuns = [];

  for await (const run of client.listRuns({ projectName, isRoot: true })) {
    const confidence = run.outputs?.confidence;
    if (confidence !== undefined && confidence < threshold) {
      lowConfidenceRuns.push(run);
    }
  }

  const runIds = lowConfidenceRuns.map(run => run.id);

  if (runIds.length > 0) {
    await client.addRunsToAnnotationQueue({
      queueId,
      runIds,
    });
    console.log(`Added ${runIds.length} low confidence runs`);
  }
}

// Add runs in batches
async function addRunsInBatches(queueId: string, runIds: string[], batchSize: number = 50) {
  for (let i = 0; i < runIds.length; i += batchSize) {
    const batch = runIds.slice(i, i + batchSize);
    await client.addRunsToAnnotationQueue({
      queueId,
      runIds: batch,
    });
    console.log(`Added batch ${Math.floor(i / batchSize) + 1} (${batch.length} runs)`);
  }
}

// Add runs matching complex filter
async function addFilteredRuns(projectName: string, queueId: string) {
  const filteredRuns = [];

  for await (const run of client.listRuns({
    projectName,
    isRoot: true,
  })) {
    // Complex filtering logic
    const needsReview = (
      run.error !== undefined ||
      (run.outputs?.confidence && run.outputs.confidence < 0.7) ||
      hasUnusualPattern(run)
    );

    if (needsReview) {
      filteredRuns.push(run);
    }
  }

  if (filteredRuns.length > 0) {
    await client.addRunsToAnnotationQueue({
      queueId,
      runIds: filteredRuns.map(r => r.id),
    });
  }
}

function hasUnusualPattern(run: any): boolean {
  // Implement pattern detection logic
  const inputStr = JSON.stringify(run.inputs);
  return inputStr.length > 10000 || inputStr.includes("�");
}

Get Run from Annotation Queue

Retrieve a single run from the annotation queue by index, useful for iterating through queue items one at a time.

/**
 * Get a single run from an annotation queue
 * @param queueId - The unique identifier of the queue
 * @param index - The index of the run to retrieve (0-based)
 * @returns Promise resolving to run with queue information
 */
getRunFromAnnotationQueue(
  queueId: string,
  index: number
): Promise<RunWithAnnotationQueueInfo>;

interface RunWithAnnotationQueueInfo {
  /** The run object containing trace data */
  run: Run;
  /** Annotation queue metadata */
  queue_info?: {
    /** When the run was added to the queue */
    added_at: Date;
    /** Current status in the queue */
    status?: string;
    /** Assigned reviewer if any */
    assigned_to?: string;
  };
}

Usage Examples:

import { Client } from "langsmith";

const client = new Client();

// Get first run from queue
const firstRun = await client.getRunFromAnnotationQueue("queue-id-123", 0);

console.log(`Reviewing run: ${firstRun.run.id}`);
console.log(`Added to queue at: ${firstRun.queue_info?.added_at}`);
console.log(`Inputs: ${JSON.stringify(firstRun.run.inputs)}`);
console.log(`Outputs: ${JSON.stringify(firstRun.run.outputs)}`);

// Get specific run by index
const fifthRun = await client.getRunFromAnnotationQueue("queue-id-123", 4);

// Process runs sequentially
async function reviewQueueSequentially(queueId: string, maxToReview: number = 10) {
  for (let i = 0; i < maxToReview; i++) {
    try {
      const runInfo = await client.getRunFromAnnotationQueue(queueId, i);

      // Present to human reviewer
      const feedback = await collectHumanFeedback(runInfo.run);

      // Save feedback
      await client.createFeedback({
        run_id: runInfo.run.id,
        key: "human_review",
        score: feedback.score,
        comment: feedback.comment,
      });

      console.log(`Reviewed run ${i + 1}/${maxToReview}`);
    } catch (error) {
      // End of queue or error
      console.log(`Completed ${i} reviews`);
      break;
    }
  }
}

// Build reviewer interface
async function getNextRunForReview(queueId: string, currentIndex: number) {
  const runInfo = await client.getRunFromAnnotationQueue(queueId, currentIndex);

  return {
    runId: runInfo.run.id,
    inputs: runInfo.run.inputs,
    outputs: runInfo.run.outputs,
    metadata: {
      addedAt: runInfo.queue_info?.added_at,
      queuePosition: currentIndex,
    },
  };
}

Delete Run from Queue

Remove a run from an annotation queue after it has been reviewed or if it was added by mistake.

/**
 * Delete a run from an annotation queue
 * @param queueId - Queue identifier
 * @param queueRunId - Run identifier to remove from queue
 * @returns Promise resolving when deletion completes
 */
deleteRunFromAnnotationQueue(
  queueId: string,
  queueRunId: string
): Promise<void>;

Usage Examples:

import { Client } from "langsmith";

const client = new Client();

// Remove run after annotation complete
await client.deleteRunFromAnnotationQueue("queue-id-123", "run-456");

// Batch remove reviewed runs
const reviewedRunIds = ["run-1", "run-2", "run-3"];
for (const runId of reviewedRunIds) {
  await client.deleteRunFromAnnotationQueue("queue-id-123", runId);
}
console.log(`Removed ${reviewedRunIds.length} reviewed runs from queue`);

// Remove run and log action
async function removeAndLog(queueId: string, runId: string, reason: string) {
  await client.deleteRunFromAnnotationQueue(queueId, runId);
  console.log(`Removed run ${runId} from queue. Reason: ${reason}`);
}

// Clean up queue after processing
async function cleanupProcessedRuns(queueId: string) {
  const { size } = await client.getSizeFromAnnotationQueue(queueId);

  for (let i = 0; i < size; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(queueId, i);

    // Check if run has been reviewed
    const feedbackList = [];
    for await (const feedback of client.listFeedback({
      runIds: [runInfo.run.id],
      feedbackKeys: ["human_review"],
    })) {
      feedbackList.push(feedback);
    }

    if (feedbackList.length > 0) {
      // Remove reviewed run
      await client.deleteRunFromAnnotationQueue(queueId, runInfo.run.id);
      console.log(`Removed reviewed run ${runInfo.run.id}`);
    }
  }
}

Get Queue Size

Get the current number of runs in an annotation queue.

/**
 * Get the size of an annotation queue
 * @param queueId - Queue identifier
 * @returns Promise resolving to an object with the queue size
 */
getSizeFromAnnotationQueue(queueId: string): Promise<{ size: number }>;

Usage Examples:

import { Client } from "langsmith";

const client = new Client();

// Check queue size
const { size } = await client.getSizeFromAnnotationQueue("queue-id-123");
console.log(`Queue has ${size} runs pending review`);

// Monitor queue size
async function monitorQueue(queueId: string, checkIntervalMs: number = 60000) {
  setInterval(async () => {
    const { size } = await client.getSizeFromAnnotationQueue(queueId);
    console.log(`[${new Date().toISOString()}] Current queue size: ${size}`);

    if (size > 1000) {
      console.warn("⚠️  Queue backlog is high!");
    } else if (size === 0) {
      console.log("✓ Queue is empty");
    }
  }, checkIntervalMs);
}

// Calculate progress percentage
async function getQueueProgress(queueId: string, totalAdded: number) {
  const { size } = await client.getSizeFromAnnotationQueue(queueId);
  const reviewed = totalAdded - size;
  const percentComplete = (reviewed / totalAdded) * 100;

  return {
    total: totalAdded,
    remaining: size,
    reviewed,
    percentComplete: percentComplete.toFixed(1),
  };
}

// Wait for queue to be empty
async function waitForQueueCompletion(queueId: string, pollIntervalMs: number = 5000) {
  while (true) {
    const { size } = await client.getSizeFromAnnotationQueue(queueId);

    if (size === 0) {
      console.log("Queue processing complete!");
      break;
    }

    console.log(`Waiting... ${size} runs remaining`);
    await new Promise(resolve => setTimeout(resolve, pollIntervalMs));
  }
}

Human Feedback Workflows

Note: The workflow examples in this section are illustrative and may reference helper patterns for working with annotation queues. To retrieve runs from a queue, use getRunFromAnnotationQueue(queueId, index) to get individual runs by their index position, combined with getSizeFromAnnotationQueue(queueId) to determine the total number of runs in the queue.

Quality Assurance Workflow

Collect human feedback on production outputs for quality monitoring.

import { Client } from "langsmith";

const client = new Client();

// Setup: Create QA queue
const qaQueue = await client.createAnnotationQueue({
  name: "Production QA",
  description: "Quality assurance for customer-facing outputs",
  rubricInstructions: `
    Rate each response on:
    1. Accuracy (0-1): Is the information correct?
    2. Helpfulness (0-1): Does it answer the question?
    3. Safety (0-1): Is it free from harmful content?
  `,
});

// Step 1: Sample production runs
async function sampleProductionRuns(projectName: string, queueId: string, sampleRate: number = 0.1) {
  const sampledRuns = [];

  for await (const run of client.listRuns({
    projectName,
    isRoot: true,
  })) {
    // Random sampling
    if (Math.random() < sampleRate) {
      sampledRuns.push(run);
    }
  }

  const runIds = sampledRuns.map(run => run.id);

  // Add to QA queue
  if (runIds.length > 0) {
    await client.addRunsToAnnotationQueue({
      queueId,
      runIds,
    });
    console.log(`Added ${runIds.length} runs to QA queue`);
  }

  return sampledRuns;
}

// Step 2: Review and annotate
async function conductQAReview(queueId: string, batchSize: number = 50) {
  const { size } = await client.getSizeFromAnnotationQueue(queueId);
  const reviewCount = Math.min(size, batchSize);

  for (let i = 0; i < reviewCount; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
    const { run } = runInfo;

    // Present to human reviewer
    console.log(`\n--- Reviewing Run ${i + 1}/${reviewCount} ---`);
    console.log(`Input: ${JSON.stringify(run.inputs)}`);
    console.log(`Output: ${JSON.stringify(run.outputs)}`);

    // Human reviewer provides scores
    const humanReview = await getHumanReview(run);

    // Create feedback with detailed scores
    await client.createFeedback({
      run_id: run.id,
      key: "qa_review",
      score: humanReview.overallScore,
      comment: humanReview.comments,
      value: humanReview.detailedScores,
      feedbackSourceType: "app",
    });

    // Tag issues
    if (humanReview.hasIssues) {
      await client.createFeedback({
        run_id: run.id,
        key: "issue_flag",
        score: 0,
        comment: humanReview.issueDescription,
        value: {
          issueType: humanReview.issueType,
          severity: humanReview.severity,
        },
      });
    }

    // Remove from queue after review
    await client.deleteRunFromAnnotationQueue(queueId, run.id);
  }

  console.log(`\nCompleted ${reviewCount} reviews`);
}

// Step 3: Analyze QA results
async function analyzeQAResults(queueId: string) {
  const stats = {
    totalReviewed: 0,
    totalScore: 0,
    issueCount: 0,
    byScoreRange: {
      excellent: 0,  // 0.9-1.0
      good: 0,       // 0.7-0.9
      fair: 0,       // 0.5-0.7
      poor: 0,       // 0.0-0.5
    },
  };

  const { size } = await client.getSizeFromAnnotationQueue(queueId);

  for (let i = 0; i < size; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(queueId, i);

    const feedbackList = [];
    for await (const feedback of client.listFeedback({
      runIds: [runInfo.run.id],
      feedbackKeys: ["qa_review"],
    })) {
      feedbackList.push(feedback);
    }

    if (feedbackList.length > 0) {
      const feedback = feedbackList[0];
      const score = typeof feedback.score === 'number' ? feedback.score : 0;

      stats.totalReviewed++;
      stats.totalScore += score;

      // Categorize by score range
      if (score >= 0.9) stats.byScoreRange.excellent++;
      else if (score >= 0.7) stats.byScoreRange.good++;
      else if (score >= 0.5) stats.byScoreRange.fair++;
      else stats.byScoreRange.poor++;

      if (score < 0.7) {
        stats.issueCount++;
      }
    }
  }

  const avgScore = stats.totalReviewed > 0 ? stats.totalScore / stats.totalReviewed : 0;
  const issueRate = stats.totalReviewed > 0 ? stats.issueCount / stats.totalReviewed : 0;

  console.log(`\n=== QA Results ===`);
  console.log(`Total Reviewed: ${stats.totalReviewed}`);
  console.log(`Average Score: ${avgScore.toFixed(2)}`);
  console.log(`Issue Rate: ${(issueRate * 100).toFixed(1)}%`);
  console.log(`\nScore Distribution:`);
  console.log(`  Excellent (0.9-1.0): ${stats.byScoreRange.excellent}`);
  console.log(`  Good (0.7-0.9): ${stats.byScoreRange.good}`);
  console.log(`  Fair (0.5-0.7): ${stats.byScoreRange.fair}`);
  console.log(`  Poor (0.0-0.5): ${stats.byScoreRange.poor}`);

  return { avgScore, issueRate, stats };
}

// Helper function for collecting human review (implement as needed)
async function getHumanReview(run: any): Promise<any> {
  // In production, this would present UI to reviewer
  // For example purposes, return mock data
  return {
    overallScore: 0.85,
    detailedScores: {
      accuracy: 0.9,
      helpfulness: 0.8,
      safety: 1.0,
    },
    comments: "Good response overall",
    hasIssues: false,
    issueDescription: "",
    issueType: "",
    severity: 0,
  };
}

Training Data Collection Workflow

Collect human-annotated examples for model fine-tuning or evaluation datasets.

import { Client } from "langsmith";

const client = new Client();

// Setup: Create training data queue
const trainingQueue = await client.createAnnotationQueue({
  name: "Training Data Collection",
  description: "Collect high-quality examples for model fine-tuning",
  rubricInstructions: `
    For each example:
    1. Verify the input is representative
    2. Provide the ideal output
    3. Mark any edge cases or special handling needed
  `,
});

// Step 1: Add diverse examples to queue
async function collectDiverseExamples(projectName: string, queueId: string, targetCount: number = 100) {
  const allRuns = [];

  for await (const run of client.listRuns({
    projectName,
    isRoot: true,
  })) {
    allRuns.push(run);
  }

  // Select diverse examples using stratified sampling
  const diverseRuns = selectDiverseExamples(allRuns, targetCount);
  const runIds = diverseRuns.map(run => run.id);

  await client.addRunsToAnnotationQueue({
    queueId,
    runIds,
  });

  console.log(`Added ${runIds.length} diverse examples to training queue`);
  return diverseRuns;
}

// Step 2: Human annotation with corrections
async function annotateTrainingExamples(queueId: string) {
  const annotatedExamples = [];
  const { size } = await client.getSizeFromAnnotationQueue(queueId);

  for (let i = 0; i < size; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
    const { run } = runInfo;

    // Human provides ideal output
    const annotation = await getHumanAnnotation(run);

    // Create feedback with correction
    await client.createFeedback({
      run_id: run.id,
      key: "human_annotation",
      score: annotation.isCorrect ? 1 : 0,
      comment: annotation.reasoning,
      correction: {
        outputs: annotation.idealOutput,
      },
      feedbackSourceType: "app",
    });

    // Store for training dataset
    annotatedExamples.push({
      input: run.inputs,
      output: annotation.idealOutput,
      metadata: {
        originalRunId: run.id,
        annotatedAt: new Date().toISOString(),
        annotator: annotation.annotator,
        quality: annotation.quality,
      },
    });

    console.log(`Annotated ${i + 1}/${size} examples`);
  }

  return annotatedExamples;
}

// Step 3: Create dataset from annotations
async function createTrainingDataset(queueId: string, datasetName: string) {
  // Create dataset
  const dataset = await client.createDataset({
    datasetName,
    description: "Human-annotated training examples",
    dataType: "kv",
  });

  // Get queue size
  const { size } = await client.getSizeFromAnnotationQueue(queueId);
  let addedCount = 0;

  // Process all runs in queue
  for (let i = 0; i < size; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(queueId, i);

    // Get feedback with corrections
    const feedbackList = [];
    for await (const feedback of client.listFeedback({
      runIds: [runInfo.run.id],
      feedbackKeys: ["human_annotation"],
    })) {
      feedbackList.push(feedback);
    }

    for (const feedback of feedbackList) {
      if (feedback.correction) {
        // Add to dataset
        await client.createExample({
          dataset_id: dataset.id,
          inputs: runInfo.run.inputs,
          outputs: feedback.correction.outputs,
          metadata: {
            sourceRunId: runInfo.run.id,
            annotationScore: feedback.score,
            annotationComment: feedback.comment,
            annotatedAt: feedback.created_at,
          },
        });
        addedCount++;
      }
    }
  }

  console.log(`Created training dataset: ${dataset.name}`);
  console.log(`Added ${addedCount} annotated examples`);
  return dataset;
}

// Helper: Select diverse examples using various strategies
function selectDiverseExamples(runs: any[], targetCount: number): any[] {
  // Strategy 1: Group by input type/category
  const categories = new Map<string, any[]>();

  for (const run of runs) {
    const category = categorizeRun(run);
    if (!categories.has(category)) {
      categories.set(category, []);
    }
    categories.get(category)!.push(run);
  }

  // Strategy 2: Sample proportionally from each category
  const samplesPerCategory = Math.ceil(targetCount / categories.size);
  const selected: any[] = [];

  for (const categoryRuns of categories.values()) {
    // Random shuffle
    const shuffled = categoryRuns.sort(() => Math.random() - 0.5);
    // Take samples
    selected.push(...shuffled.slice(0, samplesPerCategory));
  }

  // Return exactly targetCount runs
  return selected.slice(0, targetCount);
}

function categorizeRun(run: any): string {
  // Categorize based on input characteristics
  const inputStr = JSON.stringify(run.inputs);

  if (inputStr.includes("question")) return "question";
  if (inputStr.includes("summary")) return "summary";
  if (inputStr.includes("translate")) return "translation";

  return "other";
}

// Helper function for collecting human annotation (implement as needed)
async function getHumanAnnotation(run: any): Promise<any> {
  // In production, this would present UI to annotator
  // For example purposes, return mock data
  return {
    isCorrect: false,
    idealOutput: { answer: "Corrected answer based on human judgment" },
    reasoning: "The original output was incomplete",
    annotator: "reviewer-1",
    quality: "high",
  };
}

Comparative Evaluation Workflow

Compare outputs from different models or prompts side-by-side.

import { Client } from "langsmith";

const client = new Client();

// Setup: Create comparison queue
const comparisonQueue = await client.createAnnotationQueue({
  name: "Model A vs Model B Comparison",
  description: "Side-by-side comparison of two model versions",
  rubricInstructions: `
    Compare both outputs and select the better one based on:
    - Accuracy: Which is more factually correct?
    - Helpfulness: Which better addresses the user's need?
    - Clarity: Which is easier to understand?
    - Overall: Which would you prefer to show to users?
  `,
});

// Step 1: Identify runs to compare
async function setupComparison(projectA: string, projectB: string, queueId: string) {
  const runsAList = [];
  const runsBList = [];

  // Collect runs from both projects
  for await (const run of client.listRuns({ projectName: projectA, isRoot: true })) {
    runsAList.push(run);
  }

  for await (const run of client.listRuns({ projectName: projectB, isRoot: true })) {
    runsBList.push(run);
  }

  // Match runs with same inputs
  const pairs = matchRunsByInput(runsAList, runsBList);

  // Add both runs from each pair to queue
  const allRunIds = pairs.flatMap(pair => [pair.runA.id, pair.runB.id]);

  await client.addRunsToAnnotationQueue({
    queueId,
    runIds: allRunIds,
  });

  console.log(`Added ${pairs.length} run pairs (${allRunIds.length} total runs) for comparison`);
  return pairs;
}

// Step 2: Collect comparative judgments
async function conductComparison(queueId: string, pairs: Array<{runA: any, runB: any}>) {
  for (const pair of pairs) {
    // Present both outputs to human
    console.log(`\n=== Comparison ===`);
    console.log(`Model A Output: ${JSON.stringify(pair.runA.outputs)}`);
    console.log(`Model B Output: ${JSON.stringify(pair.runB.outputs)}`);

    const judgment = await getComparativeJudgment(pair.runA, pair.runB);

    // Record preference
    await client.createFeedback({
      run_id: judgment.preferred === 'A' ? pair.runA.id : pair.runB.id,
      key: "comparison_winner",
      score: 1,
      comment: judgment.reasoning,
      value: {
        comparison_pair: {
          runA: pair.runA.id,
          runB: pair.runB.id,
        },
        criteria: judgment.criteria,
        preferredModel: judgment.preferred,
      },
    });

    // Record specific criteria scores for both models
    for (const [criterion, scores] of Object.entries(judgment.criteriaScores)) {
      await client.createFeedback({
        run_id: pair.runA.id,
        key: `criteria_${criterion}_modelA`,
        score: (scores as any).modelA,
        comment: `Model A ${criterion} score`,
      });

      await client.createFeedback({
        run_id: pair.runB.id,
        key: `criteria_${criterion}_modelB`,
        score: (scores as any).modelB,
        comment: `Model B ${criterion} score`,
      });
    }
  }

  console.log(`Completed ${pairs.length} comparative judgments`);
}

// Step 3: Analyze comparison results
async function analyzeComparison(pairs: Array<{runA: any, runB: any}>) {
  let modelAWins = 0;
  let modelBWins = 0;
  const criteriaScores: Record<string, { A: number[], B: number[] }> = {};

  for (const pair of pairs) {
    // Check winner feedback
    const feedbackAList = [];
    const feedbackBList = [];

    for await (const feedback of client.listFeedback({
      runIds: [pair.runA.id],
      feedbackKeys: ["comparison_winner"],
    })) {
      feedbackAList.push(feedback);
    }

    for await (const feedback of client.listFeedback({
      runIds: [pair.runB.id],
      feedbackKeys: ["comparison_winner"],
    })) {
      feedbackBList.push(feedback);
    }

    if (feedbackAList.length > 0) modelAWins++;
    if (feedbackBList.length > 0) modelBWins++;

    // Collect criteria scores
    for await (const feedback of client.listFeedback({
      runIds: [pair.runA.id],
    })) {
      if (feedback.key.startsWith("criteria_")) {
        const criterion = feedback.key.replace("criteria_", "").replace("_modelA", "");
        if (!criteriaScores[criterion]) {
          criteriaScores[criterion] = { A: [], B: [] };
        }
        if (typeof feedback.score === 'number') {
          criteriaScores[criterion].A.push(feedback.score);
        }
      }
    }

    for await (const feedback of client.listFeedback({
      runIds: [pair.runB.id],
    })) {
      if (feedback.key.startsWith("criteria_")) {
        const criterion = feedback.key.replace("criteria_", "").replace("_modelB", "");
        if (!criteriaScores[criterion]) {
          criteriaScores[criterion] = { A: [], B: [] };
        }
        if (typeof feedback.score === 'number') {
          criteriaScores[criterion].B.push(feedback.score);
        }
      }
    }
  }

  console.log(`\n=== Comparison Results ===`);
  console.log(`Total Comparisons: ${pairs.length}`);
  console.log(`Model A wins: ${modelAWins} (${(modelAWins / pairs.length * 100).toFixed(1)}%)`);
  console.log(`Model B wins: ${modelBWins} (${(modelBWins / pairs.length * 100).toFixed(1)}%)`);

  console.log(`\nCriteria Breakdown:`);
  for (const [criterion, scores] of Object.entries(criteriaScores)) {
    const avgA = scores.A.reduce((a, b) => a + b, 0) / scores.A.length;
    const avgB = scores.B.reduce((a, b) => a + b, 0) / scores.B.length;
    console.log(`  ${criterion}:`);
    console.log(`    Model A: ${avgA.toFixed(2)}`);
    console.log(`    Model B: ${avgB.toFixed(2)}`);
  }

  return { modelAWins, modelBWins, totalComparisons: pairs.length, criteriaScores };
}

// Helper function to match runs
function matchRunsByInput(runsA: any[], runsB: any[]) {
  const pairs = [];

  for (const runA of runsA) {
    const matchingRunB = runsB.find(
      runB => JSON.stringify(runB.inputs) === JSON.stringify(runA.inputs)
    );

    if (matchingRunB) {
      pairs.push({ runA, runB: matchingRunB });
    }
  }

  return pairs;
}

// Helper function for collecting comparative judgment (implement as needed)
async function getComparativeJudgment(runA: any, runB: any): Promise<any> {
  // In production, this would present UI with both outputs
  // For example purposes, return mock data
  return {
    preferred: 'A',
    reasoning: "Model A provides more accurate and comprehensive response",
    criteria: ["accuracy", "helpfulness", "clarity"],
    criteriaScores: {
      accuracy: { modelA: 0.9, modelB: 0.7 },
      helpfulness: { modelA: 0.8, modelB: 0.6 },
      clarity: { modelA: 0.85, modelB: 0.75 },
    },
  };
}

Edge Case Review Workflow

Identify and review edge cases and failure modes.

import { Client } from "langsmith";

const client = new Client();

// Setup: Create edge case queue
const edgeCaseQueue = await client.createAnnotationQueue({
  name: "Edge Case Review",
  description: "Review unusual inputs and failure modes",
  rubricInstructions: `
    For each edge case:
    1. Categorize the type of edge case
    2. Rate severity (1-5)
    3. Mark if reproducible
    4. Indicate if fix is needed
    5. Suggest improvement if applicable
  `,
});

// Step 1: Detect edge cases automatically
async function detectEdgeCases(projectName: string, queueId: string) {
  const edgeCases = [];

  for await (const run of client.listRuns({
    projectName,
    isRoot: true,
  })) {
    // Criteria for edge cases
    const hasError = run.error !== undefined && run.error !== null;
    const unusualLatency = run.end_time && run.start_time &&
                          (run.end_time - run.start_time) > 30000; // >30s
    const unusualInput = isUnusualInput(run.inputs);
    const lowConfidence = run.outputs?.confidence && run.outputs.confidence < 0.5;
    const unexpectedOutput = isUnexpectedOutput(run.outputs);

    if (hasError || unusualLatency || unusualInput || lowConfidence || unexpectedOutput) {
      edgeCases.push(run);
    }
  }

  const runIds = edgeCases.map(run => run.id);

  if (runIds.length > 0) {
    await client.addRunsToAnnotationQueue({
      queueId,
      runIds,
    });
    console.log(`Added ${runIds.length} edge cases to queue`);
  }

  return edgeCases;
}

// Step 2: Categorize and analyze edge cases
async function categorizeEdgeCases(queueId: string) {
  const categories: Record<string, number> = {};
  const { size } = await client.getSizeFromAnnotationQueue(queueId);

  for (let i = 0; i < size; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
    const { run } = runInfo;

    // Human categorizes the edge case
    const category = await categorizeEdgeCase(run);

    categories[category.type] = (categories[category.type] || 0) + 1;

    // Record categorization
    await client.createFeedback({
      run_id: run.id,
      key: "edge_case_category",
      score: category.severity,
      comment: category.description,
      value: {
        category: category.type,
        reproducible: category.reproducible,
        needsFix: category.needsFix,
        suggestedFix: category.suggestedFix,
      },
    });
  }

  console.log("\n=== Edge Case Categories ===");
  for (const [type, count] of Object.entries(categories)) {
    console.log(`  ${type}: ${count}`);
  }

  return categories;
}

// Helper functions
function isUnusualInput(inputs: any): boolean {
  const inputStr = JSON.stringify(inputs);
  // Check for unusual patterns
  return (
    inputStr.length > 10000 || // Very long input
    inputStr.includes("�") || // Encoding issues
    /[^\x00-\x7F]{20,}/.test(inputStr) || // Many non-ASCII chars
    inputStr.length < 5 // Very short input
  );
}

function isUnexpectedOutput(outputs: any): boolean {
  if (!outputs) return true;

  const outputStr = JSON.stringify(outputs);
  // Check for unexpected patterns
  return (
    outputStr.length === 0 || // Empty output
    outputStr.includes("error") || // Error in output
    outputStr.includes("undefined") // Undefined values
  );
}

async function categorizeEdgeCase(run: any): Promise<any> {
  // In production, this would present UI to reviewer
  // For example purposes, return mock data
  return {
    type: "unusual_input_encoding",
    severity: 0.7,
    description: "Input contains unusual character encoding",
    reproducible: true,
    needsFix: true,
    suggestedFix: "Add input validation and normalization",
  };
}

Integration with Feedback System

Annotation queues work seamlessly with LangSmith's feedback system to collect and store human annotations.

import { Client } from "langsmith";

const client = new Client();

// Complete annotation workflow
async function completeAnnotationWorkflow(queueId: string, batchSize: number = 10) {
  // Get queue size
  const { size } = await client.getSizeFromAnnotationQueue(queueId);
  const processingCount = Math.min(size, batchSize);

  console.log(`Processing ${processingCount} runs from queue (${size} total)`);

  for (let i = 0; i < processingCount; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
    const { run } = runInfo;

    // Collect human feedback on multiple dimensions
    const annotation = {
      accuracy: await rateAccuracy(run),
      helpfulness: await rateHelpfulness(run),
      safety: await rateSafety(run),
      overallScore: 0,
      comments: "",
      corrections: null,
    };

    annotation.overallScore =
      (annotation.accuracy + annotation.helpfulness + annotation.safety) / 3;

    // Submit multiple feedback entries
    await client.createFeedback({
      run_id: run.id,
      key: "accuracy",
      score: annotation.accuracy,
      comment: "Factual correctness rating",
    });

    await client.createFeedback({
      run_id: run.id,
      key: "helpfulness",
      score: annotation.helpfulness,
      comment: "Usefulness rating",
    });

    await client.createFeedback({
      run_id: run.id,
      key: "safety",
      score: annotation.safety,
      comment: "Safety rating",
    });

    await client.createFeedback({
      run_id: run.id,
      key: "overall_human_rating",
      score: annotation.overallScore,
      comment: annotation.comments,
      correction: annotation.corrections,
      feedbackSourceType: "app",
    });

    console.log(`Annotated run ${i + 1}/${processingCount}`);
  }

  console.log("Annotation workflow complete");
}

// Query feedback from annotated runs
async function analyzeAnnotations(queueId: string) {
  const { size } = await client.getSizeFromAnnotationQueue(queueId);
  const feedbackByKey: Record<string, number[]> = {};
  const runIds: string[] = [];

  // Collect all run IDs from queue
  for (let i = 0; i < size; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
    runIds.push(runInfo.run.id);
  }

  // Get all feedback for these runs
  for await (const feedback of client.listFeedback({
    runIds,
  })) {
    if (!feedbackByKey[feedback.key]) {
      feedbackByKey[feedback.key] = [];
    }
    if (typeof feedback.score === 'number') {
      feedbackByKey[feedback.key].push(feedback.score);
    }
  }

  // Calculate averages
  console.log("\n=== Annotation Summary ===");
  for (const [key, scores] of Object.entries(feedbackByKey)) {
    const avg = scores.reduce((a, b) => a + b, 0) / scores.length;
    console.log(`  ${key}: ${avg.toFixed(2)} (n=${scores.length})`);
  }

  return feedbackByKey;
}

// Helper functions for rating dimensions (implement as needed)
async function rateAccuracy(run: any): Promise<number> {
  // Present to reviewer for accuracy rating
  return 0.85; // Mock score
}

async function rateHelpfulness(run: any): Promise<number> {
  // Present to reviewer for helpfulness rating
  return 0.90; // Mock score
}

async function rateSafety(run: any): Promise<number> {
  // Present to reviewer for safety rating
  return 1.0; // Mock score
}

Best Practices

Queue Organization

Create specialized queues for different purposes to maintain clarity and organization.

import { Client } from "langsmith";

const client = new Client();

// Create specialized queues for different purposes
async function setupQueueStructure() {
  // High-priority immediate review
  const urgentQueue = await client.createAnnotationQueue({
    name: "Urgent Review",
    description: "High-priority runs requiring immediate attention",
    rubricInstructions: "Focus on critical issues: safety, factual errors, harmful content",
  });

  // Regular quality checks
  const regularQAQueue = await client.createAnnotationQueue({
    name: "Regular QA",
    description: "Routine quality assurance sampling",
    rubricInstructions: "Standard quality metrics: accuracy, helpfulness, clarity",
  });

  // Training data collection
  const trainingQueue = await client.createAnnotationQueue({
    name: "Training Examples",
    description: "High-quality examples for model training",
    rubricInstructions: "Provide ideal outputs for training. Ensure examples are clear and representative.",
  });

  // Edge case analysis
  const edgeCaseQueue = await client.createAnnotationQueue({
    name: "Edge Cases",
    description: "Unusual inputs and edge cases for review",
    rubricInstructions: "Categorize edge case type, assess severity, determine if fix is needed",
  });

  // Comparative evaluation
  const comparisonQueue = await client.createAnnotationQueue({
    name: "Model Comparison",
    description: "A/B testing between model versions",
    rubricInstructions: "Compare models side-by-side on accuracy, helpfulness, and overall preference",
  });

  return { urgentQueue, regularQAQueue, trainingQueue, edgeCaseQueue, comparisonQueue };
}

// Route runs to appropriate queues
async function routeRunToQueue(run: any, queues: any) {
  // Route based on characteristics
  if (run.error || run.outputs?.safety_score < 0.5) {
    await client.addRunsToAnnotationQueue({
      queueId: queues.urgentQueue.id,
      runIds: [run.id],
    });
  } else if (run.outputs?.confidence < 0.7) {
    await client.addRunsToAnnotationQueue({
      queueId: queues.edgeCaseQueue.id,
      runIds: [run.id],
    });
  } else if (Math.random() < 0.1) {  // 10% sampling
    await client.addRunsToAnnotationQueue({
      queueId: queues.regularQAQueue.id,
      runIds: [run.id],
    });
  }
}

Sampling Strategies

Use different sampling strategies to select runs for annotation efficiently.

import { Client } from "langsmith";

const client = new Client();

// Random sampling
async function randomSample(projectName: string, queueId: string, rate: number) {
  const sampledRuns = [];

  for await (const run of client.listRuns({ projectName, isRoot: true })) {
    if (Math.random() < rate) {
      sampledRuns.push(run);
    }
  }

  const runIds = sampledRuns.map(r => r.id);

  if (runIds.length > 0) {
    await client.addRunsToAnnotationQueue({ queueId, runIds });
    console.log(`Random sample: added ${runIds.length} runs`);
  }

  return sampledRuns;
}

// Stratified sampling (ensure diverse examples)
async function stratifiedSample(
  projectName: string,
  queueId: string,
  samplesPerCategory: number
) {
  const runs = [];

  for await (const run of client.listRuns({ projectName, isRoot: true })) {
    runs.push(run);
  }

  // Group by category
  const categories: Record<string, any[]> = {};

  for (const run of runs) {
    const category = categorizeRun(run);
    if (!categories[category]) categories[category] = [];
    categories[category].push(run);
  }

  // Sample from each category
  const sampledIds: string[] = [];
  for (const [category, categoryRuns] of Object.entries(categories)) {
    const samples = categoryRuns
      .sort(() => Math.random() - 0.5)
      .slice(0, samplesPerCategory);
    sampledIds.push(...samples.map(r => r.id));
    console.log(`Sampled ${samples.length} from category: ${category}`);
  }

  await client.addRunsToAnnotationQueue({ queueId, runIds: sampledIds });
  console.log(`Stratified sample: added ${sampledIds.length} runs across ${Object.keys(categories).length} categories`);
}

// Uncertainty sampling (focus on low-confidence predictions)
async function uncertaintySample(projectName: string, queueId: string, threshold: number = 0.7) {
  const uncertainRuns = [];

  for await (const run of client.listRuns({ projectName, isRoot: true })) {
    const confidence = run.outputs?.confidence;
    if (confidence !== undefined && confidence < threshold) {
      uncertainRuns.push(run);
    }
  }

  const runIds = uncertainRuns.map(r => r.id);

  if (runIds.length > 0) {
    await client.addRunsToAnnotationQueue({ queueId, runIds });
    console.log(`Uncertainty sample: added ${runIds.length} low-confidence runs`);
  }

  return uncertainRuns;
}

// Active learning sampling (most informative examples)
async function activeLearningS sample(projectName: string, queueId: string, count: number) {
  const runs = [];

  for await (const run of client.listRuns({ projectName, isRoot: true })) {
    runs.push(run);
  }

  // Score each run by informativeness
  const scored = runs.map(run => ({
    run,
    score: calculateInformativenessScore(run),
  }));

  // Sort by informativeness (highest first)
  scored.sort((a, b) => b.score - a.score);

  // Take top N most informative
  const topRuns = scored.slice(0, count);
  const runIds = topRuns.map(item => item.run.id);

  await client.addRunsToAnnotationQueue({ queueId, runIds });
  console.log(`Active learning sample: added ${runIds.length} most informative runs`);
}

// Temporal sampling (sample across time periods)
async function temporalSample(
  projectName: string,
  queueId: string,
  daysToSample: number,
  samplesPerDay: number
) {
  const now = new Date();
  const sampledIds: string[] = [];

  for (let day = 0; day < daysToSample; day++) {
    const startTime = new Date(now);
    startTime.setDate(startTime.getDate() - day);
    startTime.setHours(0, 0, 0, 0);

    const endTime = new Date(startTime);
    endTime.setHours(23, 59, 59, 999);

    const dayRuns = [];
    for await (const run of client.listRuns({
      projectName,
      isRoot: true,
      startTime,
      endTime,
    })) {
      dayRuns.push(run);
    }

    // Random sample from this day
    const samples = dayRuns
      .sort(() => Math.random() - 0.5)
      .slice(0, samplesPerDay)
      .map(r => r.id);

    sampledIds.push(...samples);
    console.log(`Day -${day}: sampled ${samples.length} runs`);
  }

  await client.addRunsToAnnotationQueue({ queueId, runIds: sampledIds });
  console.log(`Temporal sample: added ${sampledIds.length} runs across ${daysToSample} days`);
}

function categorizeRun(run: any): string {
  // Implement categorization logic based on run characteristics
  const inputStr = JSON.stringify(run.inputs);

  if (inputStr.includes("question")) return "question_answering";
  if (inputStr.includes("summary")) return "summarization";
  if (inputStr.includes("translate")) return "translation";
  if (run.run_type === "tool") return "tool_use";
  if (run.run_type === "retriever") return "retrieval";

  return "other";
}

function calculateInformativenessScore(run: any): number {
  // Score based on how informative this example would be for training/evaluation
  let score = 0;

  // Higher score for low confidence (uncertain cases are informative)
  const confidence = run.outputs?.confidence || 1;
  score += (1 - confidence) * 0.4;

  // Higher score for edge cases
  if (run.error) score += 0.3;
  if (isUnusualInput(run.inputs)) score += 0.2;

  // Higher score for diverse inputs
  const inputComplexity = JSON.stringify(run.inputs).length / 1000;
  score += Math.min(inputComplexity, 0.1);

  return score;
}

Batch Processing

Process annotation queues in batches for efficient review workflows.

import { Client } from "langsmith";

const client = new Client();

// Process annotation queue in batches
async function processBatchAnnotations(
  queueId: string,
  batchSize: number,
  annotateFunction: (runs: RunWithAnnotationQueueInfo[]) => Promise<void>
) {
  const { size } = await client.getSizeFromAnnotationQueue(queueId);
  let processedCount = 0;

  for (let i = 0; i < size; i += batchSize) {
    const batch: any[] = [];
    const actualBatchSize = Math.min(batchSize, size - i);

    // Collect batch
    for (let j = 0; j < actualBatchSize; j++) {
      try {
        const runInfo = await client.getRunFromAnnotationQueue(queueId, i + j);
        batch.push(runInfo);
      } catch (error) {
        // End of queue or error
        break;
      }
    }

    if (batch.length === 0) break;

    console.log(`Processing batch starting at offset ${i} (${batch.length} runs)...`);
    await annotateFunction(batch);

    processedCount += batch.length;
  }

  console.log(`All batches processed (${processedCount} total runs)`);
  return processedCount;
}

// Example usage: Parallel annotation within batches
await processBatchAnnotations("queue-id-123", 20, async (runs) => {
  // Process runs in parallel within batch
  await Promise.all(runs.map(async (runInfo) => {
    const feedback = await collectFeedback(runInfo.run);
    await client.createFeedback({
      run_id: runInfo.run.id,
      key: "human_review",
      score: feedback.score,
      comment: feedback.comment,
      value: feedback.details,
    });
  }));
});

// Batch annotation with progress tracking
async function batchAnnotateWithProgress(queueId: string, batchSize: number) {
  const { size } = await client.getSizeFromAnnotationQueue(queueId);
  let annotatedCount = 0;

  for (let i = 0; i < size; i += batchSize) {
    const batch: any[] = [];
    const actualBatchSize = Math.min(batchSize, size - i);

    for (let j = 0; j < actualBatchSize; j++) {
      const runInfo = await client.getRunFromAnnotationQueue(queueId, i + j);
      batch.push(runInfo);
    }

    // Annotate batch
    for (const runInfo of batch) {
      const annotation = await getHumanAnnotation(runInfo.run);
      await client.createFeedback({
        run_id: runInfo.run.id,
        key: "annotation",
        score: annotation.score,
        comment: annotation.comment,
        correction: annotation.correction,
      });
      annotatedCount++;
    }

    // Show progress
    const progress = ((annotatedCount / size) * 100).toFixed(1);
    console.log(`Progress: ${annotatedCount}/${size} (${progress}%)`);
  }
}

// Helper function for collecting feedback (implement as needed)
async function collectFeedback(run: any): Promise<any> {
  return {
    score: 0.85,
    comment: "Good quality output",
    details: { accuracy: 0.9, helpfulness: 0.8 },
  };
}

Tracking Progress

Monitor annotation progress across queues.

import { Client } from "langsmith";

const client = new Client();

// Track annotation progress for a single queue
async function trackAnnotationProgress(queueId: string) {
  const queue = await client.readAnnotationQueue(queueId);
  const { size } = await client.getSizeFromAnnotationQueue(queueId);

  const runIds: string[] = [];
  for (let i = 0; i < size; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
    runIds.push(runInfo.run.id);
  }

  // Count how many have feedback
  let annotatedCount = 0;
  for (const runId of runIds) {
    const feedbackList = [];
    for await (const feedback of client.listFeedback({ runIds: [runId] })) {
      feedbackList.push(feedback);
    }
    if (feedbackList.length > 0) {
      annotatedCount++;
    }
  }

  const progress = {
    queueName: queue.name,
    queueId: queue.id,
    totalRuns: size,
    annotated: annotatedCount,
    remaining: size - annotatedCount,
    percentComplete: size > 0 ? ((annotatedCount / size) * 100).toFixed(1) : "0.0",
  };

  console.log(`\n=== Queue Progress: ${progress.queueName} ===`);
  console.log(`Progress: ${progress.annotated}/${progress.totalRuns} (${progress.percentComplete}%)`);
  console.log(`Remaining: ${progress.remaining}`);

  return progress;
}

// Track progress across all queues
async function trackAllQueueProgress() {
  const allProgress = [];

  for await (const queue of client.listAnnotationQueues()) {
    const progress = await trackAnnotationProgress(queue.id);
    allProgress.push(progress);
  }

  console.log(`\n=== Overall Progress ===`);
  const totalRuns = allProgress.reduce((sum, p) => sum + p.totalRuns, 0);
  const totalAnnotated = allProgress.reduce((sum, p) => sum + p.annotated, 0);
  const overallPercent = totalRuns > 0 ? ((totalAnnotated / totalRuns) * 100).toFixed(1) : "0.0";

  console.log(`Total Runs: ${totalRuns}`);
  console.log(`Total Annotated: ${totalAnnotated}`);
  console.log(`Overall Progress: ${overallPercent}%`);

  return allProgress;
}

// Monitor annotation velocity
async function measureAnnotationVelocity(queueId: string, intervalHours: number = 24) {
  const measurements: Array<{ timestamp: Date; annotated: number }> = [];

  setInterval(async () => {
    const { size } = await client.getSizeFromAnnotationQueue(queueId);
    const runIds: string[] = [];

    for (let i = 0; i < size; i++) {
      const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
      runIds.push(runInfo.run.id);
    }

    let annotated = 0;
    for (const runId of runIds) {
      const feedbackList = [];
      for await (const feedback of client.listFeedback({ runIds: [runId] })) {
        feedbackList.push(feedback);
      }
      if (feedbackList.length > 0) annotated++;
    }

    measurements.push({
      timestamp: new Date(),
      annotated,
    });

    // Calculate velocity if we have at least 2 measurements
    if (measurements.length >= 2) {
      const last = measurements[measurements.length - 1];
      const previous = measurements[measurements.length - 2];
      const delta = last.annotated - previous.annotated;
      const hours = (last.timestamp.getTime() - previous.timestamp.getTime()) / (1000 * 60 * 60);
      const velocity = delta / hours;

      console.log(`Annotation velocity: ${velocity.toFixed(1)} annotations/hour`);
    }
  }, intervalHours * 60 * 60 * 1000);
}

Common Patterns

Automated Queue Population

Automatically add runs to queues based on criteria.

import { Client } from "langsmith";

const client = new Client();

// Automatically add runs based on criteria
async function autoPopulateQueue(projectName: string, queueId: string, interval Ms: number = 3600000) {
  // Run periodically (e.g., via cron job)
  setInterval(async () => {
    const recentRuns = [];

    for await (const run of client.listRuns({
      projectName,
      isRoot: true,
      // Get runs from last hour
      startTime: new Date(Date.now() - 3600000),
    })) {
      recentRuns.push(run);
    }

    // Filter based on criteria
    const runsNeedingReview = recentRuns.filter(run => {
      return (
        run.error !== undefined ||
        (run.outputs?.confidence && run.outputs.confidence < 0.7) ||
        hasUnusualPattern(run) ||
        hasNegativeFeedback(run)
      );
    });

    if (runsNeedingReview.length > 0) {
      await client.addRunsToAnnotationQueue({
        queueId,
        runIds: runsNeedingReview.map(r => r.id),
      });
      console.log(`[${new Date().toISOString()}] Auto-added ${runsNeedingReview.length} runs to queue`);
    }
  }, intervalMs);
}

function hasNegativeFeedback(run: any): boolean {
  // Check if run has existing negative feedback
  return false; // Implement based on your needs
}

// Trigger-based population
async function populateOnTrigger(projectName: string, queueId: string) {
  // Listen for specific events and add runs
  for await (const run of client.listRuns({
    projectName,
    isRoot: true,
  })) {
    // Check trigger conditions
    if (shouldTriggerReview(run)) {
      await client.addRunsToAnnotationQueue({
        queueId,
        runIds: [run.id],
      });

      console.log(`Triggered review for run: ${run.id}`);
    }
  }
}

function shouldTriggerReview(run: any): boolean {
  // Complex trigger logic
  const triggers = {
    hasError: run.error !== undefined,
    highLatency: run.end_time && run.start_time && (run.end_time - run.start_time) > 10000,
    unusualOutput: run.outputs && JSON.stringify(run.outputs).includes("error"),
    flaggedBySystem: run.tags?.includes("flagged"),
  };

  // Trigger if any condition is met
  return Object.values(triggers).some(Boolean);
}

// Smart queue population with deduplication
async function smartPopulateQueue(projectName: string, queueId: string) {
  const existingRunIds = new Set<string>();

  // Get existing runs in queue
  const { size } = await client.getSizeFromAnnotationQueue(queueId);
  for (let i = 0; i < size; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(queueId, i);
    existingRunIds.add(runInfo.run.id);
  }

  // Find new runs to add
  const newRunIds: string[] = [];
  for await (const run of client.listRuns({
    projectName,
    isRoot: true,
  })) {
    if (!existingRunIds.has(run.id) && shouldAddToQueue(run)) {
      newRunIds.push(run.id);
    }
  }

  if (newRunIds.length > 0) {
    await client.addRunsToAnnotationQueue({ queueId, runIds: newRunIds });
    console.log(`Added ${newRunIds.length} new runs (skipped ${existingRunIds.size} already in queue)`);
  }
}

function shouldAddToQueue(run: any): boolean {
  // Implement your queue admission criteria
  return run.outputs?.confidence < 0.7;
}

Multi-Stage Annotation

Implement multi-stage annotation pipelines with different review levels.

import { Client } from "langsmith";

const client = new Client();

// Multi-stage annotation pipeline
async function multiStageAnnotation() {
  // Stage 1: Initial screening
  const screeningQueue = await client.createAnnotationQueue({
    name: "Stage 1: Initial Screening",
    description: "First pass: quick review to flag issues",
    rubricInstructions: "Quick pass/fail: Does this need detailed review? (30 seconds max)",
  });

  // Stage 2: Detailed review
  const detailedQueue = await client.createAnnotationQueue({
    name: "Stage 2: Detailed Review",
    description: "In-depth analysis of flagged items",
    rubricInstructions: "Thorough review: accuracy, helpfulness, safety, provide corrections",
  });

  // Stage 3: Final verification
  const verificationQueue = await client.createAnnotationQueue({
    name: "Stage 3: Final Verification",
    description: "Verify corrections before deployment",
    rubricInstructions: "Verify suggested corrections are appropriate and complete",
  });

  // Process pipeline
  await processScreening(screeningQueue.id, detailedQueue.id);
  await processDetailedReview(detailedQueue.id, verificationQueue.id);
  await processFinalVerification(verificationQueue.id);

  return { screeningQueue, detailedQueue, verificationQueue };
}

async function processScreening(screeningQueueId: string, detailedQueueId: string) {
  const { size } = await client.getSizeFromAnnotationQueue(screeningQueueId);
  let escalatedCount = 0;

  for (let i = 0; i < size; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(screeningQueueId, i);
    const { run } = runInfo;

    // Quick screening (30 seconds max)
    const needsDetailedReview = await quickScreen(run);

    // Record screening result
    await client.createFeedback({
      run_id: run.id,
      key: "screening_result",
      score: needsDetailedReview ? 0 : 1,
      comment: needsDetailedReview ? "Needs detailed review" : "Passed screening",
      value: { stage: "screening", escalated: needsDetailedReview },
    });

    if (needsDetailedReview) {
      // Move to detailed review
      await client.addRunsToAnnotationQueue({
        queueId: detailedQueueId,
        runIds: [run.id],
      });
      escalatedCount++;
    }

    // Remove from screening queue
    await client.deleteRunFromAnnotationQueue(screeningQueueId, run.id);
  }

  console.log(`Screening complete: ${escalatedCount} escalated to detailed review`);
}

async function processDetailedReview(detailedQueueId: string, verificationQueueId: string) {
  const { size } = await client.getSizeFromAnnotationQueue(detailedQueueId);
  let correctionCount = 0;

  for (let i = 0; i < size; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(detailedQueueId, i);
    const { run } = runInfo;

    // Detailed review (5-10 minutes)
    const review = await detailedReview(run);

    // Create detailed feedback
    await client.createFeedback({
      run_id: run.id,
      key: "detailed_review",
      score: review.score,
      comment: review.comment,
      correction: review.correction,
      value: {
        stage: "detailed_review",
        dimensions: review.dimensions,
        needsCorrection: !!review.correction,
      },
    });

    // If correction provided, send to verification
    if (review.correction) {
      await client.addRunsToAnnotationQueue({
        queueId: verificationQueueId,
        runIds: [run.id],
      });
      correctionCount++;
    }

    // Remove from detailed queue
    await client.deleteRunFromAnnotationQueue(detailedQueueId, run.id);
  }

  console.log(`Detailed review complete: ${correctionCount} sent to verification`);
}

async function processFinalVerification(verificationQueueId: string) {
  const { size } = await client.getSizeFromAnnotationQueue(verificationQueueId);
  let approvedCount = 0;

  for (let i = 0; i < size; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(verificationQueueId, i);
    const { run } = runInfo;

    // Get the correction from detailed review
    const correctionFeedback = [];
    for await (const feedback of client.listFeedback({
      runIds: [run.id],
      feedbackKeys: ["detailed_review"],
    })) {
      correctionFeedback.push(feedback);
    }

    if (correctionFeedback.length > 0 && correctionFeedback[0].correction) {
      // Verify correction
      const verification = await verifyCorrection(run, correctionFeedback[0].correction);

      // Record verification result
      await client.createFeedback({
        run_id: run.id,
        key: "verification_result",
        score: verification.approved ? 1 : 0,
        comment: verification.comment,
        value: {
          stage: "verification",
          approved: verification.approved,
          verifier: verification.verifier,
        },
      });

      if (verification.approved) {
        approvedCount++;

        // Could update production model or dataset here
        console.log(`✓ Approved correction for run ${run.id}`);
      }
    }

    // Remove from verification queue
    await client.deleteRunFromAnnotationQueue(verificationQueueId, run.id);
  }

  console.log(`Verification complete: ${approvedCount} corrections approved`);
}

// Helper functions (implement as needed)
async function quickScreen(run: any): Promise<boolean> {
  // Quick screening logic - is detailed review needed?
  return run.error !== undefined || run.outputs?.confidence < 0.7;
}

async function detailedReview(run: any): Promise<any> {
  // Detailed review logic
  return {
    score: 0.75,
    comment: "Needs minor correction",
    correction: { outputs: { answer: "Corrected answer" } },
    dimensions: { accuracy: 0.7, helpfulness: 0.8, safety: 1.0 },
  };
}

async function verifyCorrection(run: any, correction: any): Promise<any> {
  // Verification logic
  return {
    approved: true,
    comment: "Correction looks good",
    verifier: "senior-reviewer",
  };
}

Inter-Queue Workflows

Move runs between queues as they progress through annotation stages.

import { Client } from "langsmith";

const client = new Client();

// Define workflow stages
const workflow = {
  intake: await client.createAnnotationQueue({
    name: "Intake",
    description: "Initial triage and categorization",
  }),
  technical: await client.createAnnotationQueue({
    name: "Technical Review",
    description: "Technical accuracy review",
  }),
  editorial: await client.createAnnotationQueue({
    name: "Editorial Review",
    description: "Clarity and communication review",
  }),
  approved: await client.createAnnotationQueue({
    name: "Approved",
    description: "Final approved examples",
  }),
};

// Process intake queue and route to appropriate next stage
async function processIntake(intakeQueueId: string, workflowQueues: any) {
  const { size } = await client.getSizeFromAnnotationQueue(intakeQueueId);

  for (let i = 0; i < size; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(intakeQueueId, i);
    const { run } = runInfo;

    // Categorize and route
    const category = await categorizeForWorkflow(run);

    let nextQueueId;
    if (category.needsTechnicalReview) {
      nextQueueId = workflowQueues.technical.id;
    } else if (category.needsEditorialReview) {
      nextQueueId = workflowQueues.editorial.id;
    } else {
      nextQueueId = workflowQueues.approved.id;
    }

    // Add to next queue
    await client.addRunsToAnnotationQueue({
      queueId: nextQueueId,
      runIds: [run.id],
    });

    // Record routing decision
    await client.createFeedback({
      run_id: run.id,
      key: "workflow_routing",
      value: {
        from: "intake",
        to: category.needsTechnicalReview ? "technical" : "editorial",
        reason: category.reason,
      },
    });

    // Remove from intake
    await client.deleteRunFromAnnotationQueue(intakeQueueId, run.id);
  }
}

async function categorizeForWorkflow(run: any): Promise<any> {
  // Categorization logic
  return {
    needsTechnicalReview: run.outputs?.technical_terms_count > 5,
    needsEditorialReview: run.outputs?.readability_score < 0.7,
    reason: "Contains technical content",
  };
}

Advanced Use Cases

Priority Queue Implementation

Implement priority-based processing for annotation queues.

import { Client } from "langsmith";

const client = new Client();

// Create priority-based queue system
async function setupPriorityQueues() {
  return {
    critical: await client.createAnnotationQueue({
      name: "P0 - Critical",
      description: "Critical issues requiring immediate review",
    }),
    high: await client.createAnnotationQueue({
      name: "P1 - High Priority",
      description: "High-priority items for review within 24 hours",
    }),
    medium: await client.createAnnotationQueue({
      name: "P2 - Medium Priority",
      description: "Medium-priority items for review within 1 week",
    }),
    low: await client.createAnnotationQueue({
      name: "P3 - Low Priority",
      description: "Low-priority items for review when capacity allows",
    }),
  };
}

// Route runs to priority queues
async function routeToPriorityQueue(run: any, priorityQueues: any) {
  const priority = calculatePriority(run);

  let targetQueue;
  if (priority >= 0.9) targetQueue = priorityQueues.critical;
  else if (priority >= 0.7) targetQueue = priorityQueues.high;
  else if (priority >= 0.4) targetQueue = priorityQueues.medium;
  else targetQueue = priorityQueues.low;

  await client.addRunsToAnnotationQueue({
    queueId: targetQueue.id,
    runIds: [run.id],
  });

  // Tag with priority
  await client.createFeedback({
    run_id: run.id,
    key: "priority_score",
    score: priority,
    value: { priority_level: targetQueue.name },
  });
}

function calculatePriority(run: any): number {
  let priority = 0;

  // Critical: errors or safety concerns
  if (run.error) priority += 0.5;
  if (run.outputs?.safety_score < 0.5) priority += 0.5;

  // High: low confidence or user complaints
  if (run.outputs?.confidence < 0.5) priority += 0.3;
  if (run.tags?.includes("user_complaint")) priority += 0.3;

  // Medium: moderate confidence
  if (run.outputs?.confidence >= 0.5 && run.outputs?.confidence < 0.7) priority += 0.2;

  return Math.min(priority, 1.0);
}

Annotation Review Dashboard

Build a dashboard to manage annotation queues.

import { Client } from "langsmith";

const client = new Client();

// Get comprehensive queue dashboard data
async function getQueueDashboard() {
  const dashboard = {
    queues: [] as any[],
    summary: {
      totalQueues: 0,
      totalPendingRuns: 0,
      totalAnnotated: 0,
      estimatedHoursRemaining: 0,
    },
  };

  // Get all queues
  for await (const queue of client.listAnnotationQueues()) {
    const { size } = await client.getSizeFromAnnotationQueue(queue.id);

    // Sample first few runs to estimate annotation status
    let annotatedInQueue = 0;
    const sampleSize = Math.min(size, 20);

    for (let i = 0; i < sampleSize; i++) {
      const runInfo = await client.getRunFromAnnotationQueue(queue.id, i);
      const feedbackList = [];

      for await (const feedback of client.listFeedback({
        runIds: [runInfo.run.id],
      })) {
        feedbackList.push(feedback);
      }

      if (feedbackList.length > 0) annotatedInQueue++;
    }

    const annotationRate = sampleSize > 0 ? annotatedInQueue / sampleSize : 0;
    const estimatedAnnotated = Math.round(size * annotationRate);

    dashboard.queues.push({
      id: queue.id,
      name: queue.name,
      description: queue.description,
      totalRuns: size,
      estimatedAnnotated,
      estimatedPending: size - estimatedAnnotated,
      percentComplete: size > 0 ? ((estimatedAnnotated / size) * 100).toFixed(1) : "0",
      lastUpdated: queue.updated_at,
    });

    dashboard.summary.totalPendingRuns += size - estimatedAnnotated;
    dashboard.summary.totalAnnotated += estimatedAnnotated;
  }

  dashboard.summary.totalQueues = dashboard.queues.length;
  // Estimate 5 minutes per annotation
  dashboard.summary.estimatedHoursRemaining =
    (dashboard.summary.totalPendingRuns * 5) / 60;

  return dashboard;
}

// Display dashboard
async function displayDashboard() {
  const dashboard = await getQueueDashboard();

  console.log("\n" + "=".repeat(60));
  console.log("ANNOTATION QUEUE DASHBOARD");
  console.log("=".repeat(60));

  console.log(`\nTotal Queues: ${dashboard.summary.totalQueues}`);
  console.log(`Total Pending: ${dashboard.summary.totalPendingRuns}`);
  console.log(`Total Annotated: ${dashboard.summary.totalAnnotated}`);
  console.log(`Estimated Hours Remaining: ${dashboard.summary.estimatedHoursRemaining.toFixed(1)}`);

  console.log(`\n${"Queue Name".padEnd(30)} | ${"Runs".padEnd(6)} | ${"Done".padEnd(6)} | Progress`);
  console.log("-".repeat(60));

  for (const queue of dashboard.queues) {
    const name = queue.name.padEnd(30).substring(0, 30);
    const total = queue.totalRuns.toString().padEnd(6);
    const annotated = queue.estimatedAnnotated.toString().padEnd(6);
    const progress = `${queue.percentComplete}%`;

    console.log(`${name} | ${total} | ${annotated} | ${progress}`);
  }
}

Reviewer Assignment

Assign specific reviewers to annotation queue items.

import { Client } from "langsmith";

const client = new Client();

// Assign reviewers to queue items using metadata
async function assignReviewers(queueId: string, reviewers: string[]) {
  const { size } = await client.getSizeFromAnnotationQueue(queueId);
  let assignmentCount = 0;

  for (let i = 0; i < size; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(queueId, i);

    // Round-robin assignment
    const assignedReviewer = reviewers[i % reviewers.length];

    // Record assignment using feedback
    await client.createFeedback({
      run_id: runInfo.run.id,
      key: "assigned_reviewer",
      value: assignedReviewer,
      comment: `Assigned to ${assignedReviewer} for review`,
    });

    assignmentCount++;
  }

  console.log(`Assigned ${assignmentCount} runs to ${reviewers.length} reviewers`);
}

// Get runs assigned to specific reviewer
async function getReviewerWorkload(queueId: string, reviewerId: string) {
  const assignedRuns = [];
  const { size } = await client.getSizeFromAnnotationQueue(queueId);

  for (let i = 0; i < size; i++) {
    const runInfo = await client.getRunFromAnnotationQueue(queueId, i);

    // Check assignment
    const assignmentList = [];
    for await (const feedback of client.listFeedback({
      runIds: [runInfo.run.id],
      feedbackKeys: ["assigned_reviewer"],
    })) {
      assignmentList.push(feedback);
    }

    if (assignmentList.length > 0 && assignmentList[0].value === reviewerId) {
      assignedRuns.push(runInfo.run);
    }
  }

  console.log(`Reviewer ${reviewerId} has ${assignedRuns.length} runs assigned`);
  return assignedRuns;
}

Related Resources

Feedback Management - Creating and managing feedback on runs
Datasets - Creating datasets from annotated examples
Evaluation - Using annotations in evaluation workflows
Client API - Full client API reference
Getting Started - LangSmith basics

Version

Tile

Files