LangSmith Vitest Integration provides seamless integration between Vitest testing framework and LangSmith's evaluation and tracing platform. It enables test-driven evaluation workflows where Vitest tests automatically create datasets, run evaluations, and track results in LangSmith. The integration includes enhanced matchers for LLM output validation, custom evaluators, and automatic feedback logging.
npm install langsmithlangsmith/vitestlangsmith/vitest/reporterimport {
test,
it,
describe,
expect,
logFeedback,
logOutputs,
wrapEvaluator
} from "langsmith/vitest";For CommonJS:
const {
test,
it,
describe,
expect,
logFeedback,
logOutputs,
wrapEvaluator
} = require("langsmith/vitest");import { test, expect } from "langsmith/vitest";
// Define a LangSmith-tracked test
test(
"summarize text correctly",
{
input: { text: "Long document about climate change..." },
expected: { summary: "Climate change overview" }
},
async (input) => {
const result = await summarizeText(input.text);
expect(result.summary).toBeRelativeCloseTo(
"Climate change overview",
{ threshold: 0.8 }
);
return result;
}
);
// Use describe to group tests
describe("Translation Tests", () => {
test(
"translate to Spanish",
{
input: { text: "Hello world", targetLang: "es" },
expected: { translation: "Hola mundo" }
},
async (input) => {
const result = await translate(input.text, input.targetLang);
return result;
}
);
});LangSmith Vitest Integration is built around several key components:
test, it, and describe functions that automatically log tests as dataset examples and experimentsDefine Vitest tests with automatic LangSmith dataset and experiment tracking.
function test<I, O>(
name: string,
lsParams: LangSmithJestlikeWrapperParams<I, O>,
fn: (input: I) => O | Promise<O>,
timeout?: number
): void;
function it<I, O>(
name: string,
lsParams: LangSmithJestlikeWrapperParams<I, O>,
fn: (input: I) => O | Promise<O>,
timeout?: number
): void;Define test suites with LangSmith integration for organizing related tests.
function describe(name: string, fn: () => void, config?: object): void;Enhanced expect assertion library with custom matchers for LLM output validation.
function expect(value: any): ExtendedExpect;
interface ExtendedExpect extends Expect {
toBeRelativeCloseTo(expected: string, options?: { threshold?: number }): void;
toBeAbsoluteCloseTo(expected: string, options?: { threshold?: number }): void;
toBeSemanticCloseTo(
expected: string,
options?: { threshold?: number; embeddings?: any }
): void;
evaluatedBy(evaluator: SimpleEvaluator): void;
}Functions to log feedback and outputs during test execution.
function logFeedback(feedback: FeedbackCreate | FeedbackCreate[]): void;
function logOutputs(output: any): void;Wrap evaluator functions for use with custom matchers and test evaluation.
function wrapEvaluator(evaluator: (args: any) => any): SimpleEvaluator;Configuration parameters for LangSmith-tracked tests.
interface LangSmithJestlikeWrapperParams<I, O> {
input: I;
expected?: O;
evaluators?: SimpleEvaluator[];
client?: Client;
datasetName?: string;
projectName?: string;
}Define Vitest tests with automatic LangSmith integration. Tests are automatically logged as dataset examples, and test runs create experiments in LangSmith for tracking and evaluation.
Creates a LangSmith-tracked test case that logs inputs, outputs, and evaluation results.
/**
* Define a Vitest test case with LangSmith tracking
* @param name - Test name/description
* @param lsParams - LangSmith parameters including input, expected output, and evaluators
* @param fn - Test function that receives input and returns output
* @param timeout - Optional test timeout in milliseconds
*/
function test<I, O>(
name: string,
lsParams: LangSmithJestlikeWrapperParams<I, O>,
fn: (input: I) => O | Promise<O>,
timeout?: number
): void;Usage Examples:
import { test, expect } from "langsmith/vitest";
// Simple test with input and expected output
test(
"classify sentiment correctly",
{
input: { text: "I love this product!" },
expected: { sentiment: "positive", confidence: 0.95 }
},
async (input) => {
const result = await classifySentiment(input.text);
expect(result.sentiment).toBe("positive");
expect(result.confidence).toBeGreaterThan(0.9);
return result;
}
);
// Test with custom dataset name
test(
"answer question from context",
{
input: {
context: "Paris is the capital of France.",
question: "What is the capital of France?"
},
expected: { answer: "Paris" },
datasetName: "qa-dataset"
},
async (input) => {
const result = await answerQuestion(input.context, input.question);
return result;
}
);
// Test with custom evaluators
const accuracyEvaluator = wrapEvaluator((input, output, expected) => ({
key: "accuracy",
score: output.answer === expected.answer ? 1 : 0
}));
test(
"extract entities correctly",
{
input: { text: "Apple CEO Tim Cook announced new products." },
expected: {
entities: [
{ name: "Apple", type: "organization" },
{ name: "Tim Cook", type: "person" }
]
},
evaluators: [accuracyEvaluator]
},
async (input) => {
const result = await extractEntities(input.text);
return result;
}
);
// Test with custom LangSmith client and project
import { Client } from "langsmith";
const client = new Client({ apiKey: process.env.LANGSMITH_API_KEY });
test(
"summarize document",
{
input: { document: "Long technical document..." },
expected: { summary: "Brief technical summary" },
client,
projectName: "summarization-tests"
},
async (input) => {
const result = await summarizeDocument(input.document);
return result;
}
);
// Test with timeout
test(
"generate response within time limit",
{
input: { prompt: "Explain quantum computing" },
expected: { response: "Quantum computing explanation..." }
},
async (input) => {
const result = await generateResponse(input.prompt);
expect(result.response).toBeTruthy();
return result;
},
5000 // 5 second timeout
);Alias for test() function following Vitest/Jest naming conventions.
/**
* Alias for test() function
* @param name - Test name/description
* @param lsParams - LangSmith parameters including input, expected output, and evaluators
* @param fn - Test function that receives input and returns output
* @param timeout - Optional test timeout in milliseconds
*/
function it<I, O>(
name: string,
lsParams: LangSmithJestlikeWrapperParams<I, O>,
fn: (input: I) => O | Promise<O>,
timeout?: number
): void;Usage Examples:
import { it, expect } from "langsmith/vitest";
// Using 'it' instead of 'test' (same functionality)
it(
"should translate text correctly",
{
input: { text: "Hello", targetLang: "es" },
expected: { translation: "Hola" }
},
async (input) => {
const result = await translate(input.text, input.targetLang);
expect(result.translation).toBe("Hola");
return result;
}
);
// Nested in describe block (common pattern)
describe("Math Bot", () => {
it(
"should solve addition problems",
{
input: { expression: "2 + 2" },
expected: { result: 4 }
},
async (input) => {
const result = await solveMath(input.expression);
return result;
}
);
it(
"should solve multiplication problems",
{
input: { expression: "3 * 4" },
expected: { result: 12 }
},
async (input) => {
const result = await solveMath(input.expression);
return result;
}
);
});Define test suites with LangSmith integration for organizing related tests into logical groups.
Creates a test suite that groups related LangSmith-tracked tests.
/**
* Define a test suite with LangSmith integration
* @param name - Suite name/description
* @param fn - Function containing test definitions
* @param config - Optional configuration for the suite
*/
function describe(name: string, fn: () => void, config?: object): void;Usage Examples:
import { describe, test, expect } from "langsmith/vitest";
// Basic test suite
describe("Text Classification", () => {
test(
"classify positive sentiment",
{
input: { text: "Great product!" },
expected: { sentiment: "positive" }
},
async (input) => {
const result = await classify(input.text);
return result;
}
);
test(
"classify negative sentiment",
{
input: { text: "Terrible experience." },
expected: { sentiment: "negative" }
},
async (input) => {
const result = await classify(input.text);
return result;
}
);
});
// Nested describe blocks
describe("Language Model Tests", () => {
describe("Question Answering", () => {
test(
"answer factual questions",
{
input: { question: "What is 2+2?" },
expected: { answer: "4" }
},
async (input) => {
const result = await answerQuestion(input.question);
return result;
}
);
});
describe("Summarization", () => {
test(
"summarize news articles",
{
input: { article: "Long news article..." },
expected: { summary: "Brief summary" }
},
async (input) => {
const result = await summarize(input.article);
return result;
}
);
});
});
// Suite with shared setup
describe("Translation API", () => {
let translator: Translator;
beforeEach(() => {
translator = new Translator({ apiKey: "test-key" });
});
test(
"translate to Spanish",
{
input: { text: "Hello", lang: "es" },
expected: { translation: "Hola" }
},
async (input) => {
const result = await translator.translate(input.text, input.lang);
return result;
}
);
test(
"translate to French",
{
input: { text: "Hello", lang: "fr" },
expected: { translation: "Bonjour" }
},
async (input) => {
const result = await translator.translate(input.text, input.lang);
return result;
}
);
});
// Suite with custom configuration
describe(
"Slow LLM Tests",
() => {
test(
"generate long response",
{
input: { prompt: "Write a detailed essay..." },
expected: { response: "Essay content..." }
},
async (input) => {
const result = await generateLongResponse(input.prompt);
return result;
}
);
},
{ timeout: 30000 } // 30 second timeout for all tests in suite
);Enhanced expect assertion library with custom matchers specifically designed for validating LLM outputs and AI model results.
Returns an extended expect object with standard Vitest matchers plus custom LangSmith matchers.
/**
* Enhanced expect with custom matchers for LLM output validation
* @param value - The value to assert against
* @returns Extended expect object with custom matchers
*/
function expect(value: any): ExtendedExpect;Asserts that a string is similar to expected string based on relative edit distance (normalized by string length).
/**
* Assert relative string similarity using normalized edit distance
* @param expected - Expected string to compare against
* @param options - Options object
* @param options.threshold - Similarity threshold (0-1, default: 0.8)
*/
toBeRelativeCloseTo(expected: string, options?: { threshold?: number }): void;Usage Examples:
import { test, expect } from "langsmith/vitest";
test(
"generate similar output",
{
input: { prompt: "Summarize this text" },
expected: { summary: "This is a summary of the text" }
},
async (input) => {
const result = await generate(input.prompt);
// Check if result is relatively close (allows minor variations)
expect(result.summary).toBeRelativeCloseTo(
"This is a summary of the text",
{ threshold: 0.8 } // 80% similarity required
);
return result;
}
);
// Testing with different thresholds
test(
"paraphrase text",
{
input: { text: "The quick brown fox" },
expected: { paraphrase: "A fast brown fox" }
},
async (input) => {
const result = await paraphrase(input.text);
// Looser threshold for paraphrasing (allows more variation)
expect(result.paraphrase).toBeRelativeCloseTo("A fast brown fox", {
threshold: 0.6
});
return result;
}
);
// Strict similarity check
test(
"extract exact entity",
{
input: { text: "Apple Inc. is a company" },
expected: { entity: "Apple Inc." }
},
async (input) => {
const result = await extractEntity(input.text);
// Strict threshold for exact matching
expect(result.entity).toBeRelativeCloseTo("Apple Inc.", {
threshold: 0.95 // 95% similarity required
});
return result;
}
);Asserts that a string is similar to expected string based on absolute edit distance (character differences).
/**
* Assert absolute string similarity using raw edit distance
* @param expected - Expected string to compare against
* @param options - Options object
* @param options.threshold - Maximum edit distance allowed (default: 5)
*/
toBeAbsoluteCloseTo(expected: string, options?: { threshold?: number }): void;Usage Examples:
import { test, expect } from "langsmith/vitest";
test(
"correct spelling with minor errors",
{
input: { text: "recieve the package" },
expected: { corrected: "receive the package" }
},
async (input) => {
const result = await spellCheck(input.text);
// Allow up to 2 character differences
expect(result.corrected).toBeAbsoluteCloseTo("receive the package", {
threshold: 2
});
return result;
}
);
// Testing exact matches
test(
"extract exact quote",
{
input: { document: "The quote is 'Hello World'" },
expected: { quote: "Hello World" }
},
async (input) => {
const result = await extractQuote(input.document);
// Strict absolute threshold (0 = exact match)
expect(result.quote).toBeAbsoluteCloseTo("Hello World", {
threshold: 0
});
return result;
}
);
// Testing with tolerance for minor variations
test(
"generate code snippet",
{
input: { description: "Print hello world" },
expected: { code: 'console.log("Hello World");' }
},
async (input) => {
const result = await generateCode(input.description);
// Allow up to 5 character differences
expect(result.code).toBeAbsoluteCloseTo('console.log("Hello World");', {
threshold: 5
});
return result;
}
);Asserts that a string is semantically similar to expected string using embedding-based similarity.
/**
* Assert semantic similarity using embeddings
* @param expected - Expected string to compare against
* @param options - Options object
* @param options.threshold - Similarity threshold (0-1, default: 0.85)
* @param options.embeddings - Custom embeddings model/function
*/
toBeSemanticCloseTo(
expected: string,
options?: { threshold?: number; embeddings?: any }
): void;Usage Examples:
import { test, expect } from "langsmith/vitest";
test(
"paraphrase maintains semantic meaning",
{
input: { text: "The cat sat on the mat" },
expected: { paraphrase: "A feline rested on the rug" }
},
async (input) => {
const result = await paraphrase(input.text);
// Check semantic similarity (different words, same meaning)
expect(result.paraphrase).toBeSemanticCloseTo(
"A feline rested on the rug",
{ threshold: 0.85 } // 85% semantic similarity
);
return result;
}
);
// Testing answer equivalence
test(
"answer question semantically",
{
input: { question: "What is the capital of France?" },
expected: { answer: "Paris" }
},
async (input) => {
const result = await answerQuestion(input.question);
// Accept semantically equivalent answers
expect(result.answer).toBeSemanticCloseTo("The capital is Paris", {
threshold: 0.9
});
return result;
}
);
// Using custom embeddings model
import { OpenAIEmbeddings } from "custom-embeddings";
const embeddings = new OpenAIEmbeddings({ model: "text-embedding-3-small" });
test(
"summarize with semantic accuracy",
{
input: { article: "Long article about climate change..." },
expected: { summary: "Overview of climate change impacts" }
},
async (input) => {
const result = await summarize(input.article);
expect(result.summary).toBeSemanticCloseTo(
"Overview of climate change impacts",
{
threshold: 0.8,
embeddings
}
);
return result;
}
);
// Testing translation semantic equivalence
test(
"translate with semantic preservation",
{
input: { text: "Hello, how are you?", lang: "es" },
expected: { translation: "Hola, ¿cómo estás?" }
},
async (input) => {
const result = await translate(input.text, input.lang);
// Verify translation maintains semantic meaning
expect(result.translation).toBeSemanticCloseTo("Hola, ¿cómo estás?", {
threshold: 0.9
});
return result;
}
);Evaluates the value using a custom evaluator function and asserts based on the evaluation result.
/**
* Evaluate value using custom evaluator
* @param evaluator - Simple evaluator function wrapped with wrapEvaluator
*/
evaluatedBy(evaluator: SimpleEvaluator): void;Usage Examples:
import { test, expect, wrapEvaluator } from "langsmith/vitest";
// Create custom evaluator
const lengthEvaluator = wrapEvaluator((input, output, expected) => {
const length = output.length;
const isValid = length >= 10 && length <= 100;
return {
key: "length_check",
score: isValid ? 1 : 0,
comment: `Length: ${length} (expected 10-100)`
};
});
test(
"generate response with correct length",
{
input: { prompt: "Write a short description" },
expected: { text: "A short description text" }
},
async (input) => {
const result = await generate(input.prompt);
// Evaluate using custom evaluator
expect(result.text).evaluatedBy(lengthEvaluator);
return result;
}
);
// Evaluator with scoring logic
const qualityEvaluator = wrapEvaluator((input, output, expected) => {
let score = 0;
// Check for required elements
if (output.includes("Introduction")) score += 0.33;
if (output.includes("Body")) score += 0.33;
if (output.includes("Conclusion")) score += 0.34;
return {
key: "structure_quality",
score,
comment: `Structure score: ${(score * 100).toFixed(0)}%`
};
});
test(
"generate well-structured essay",
{
input: { topic: "Climate change" },
expected: { essay: "Introduction\nBody\nConclusion" }
},
async (input) => {
const result = await generateEssay(input.topic);
expect(result.essay).evaluatedBy(qualityEvaluator);
return result;
}
);
// Complex evaluator with multiple checks
const comprehensiveEvaluator = wrapEvaluator((input, output, expected) => {
const checks = {
hasAnswer: output.answer !== undefined,
hasCitations: output.citations && output.citations.length > 0,
correctLength: output.answer.length >= 50,
matchesExpected: output.answer.includes(expected.answer)
};
const passedChecks = Object.values(checks).filter(Boolean).length;
const score = passedChecks / Object.keys(checks).length;
return {
key: "comprehensive_check",
score,
value: checks,
comment: `Passed ${passedChecks}/${Object.keys(checks).length} checks`
};
});
test(
"generate comprehensive answer",
{
input: { question: "Explain quantum computing" },
expected: { answer: "quantum mechanics" }
},
async (input) => {
const result = await generateAnswer(input.question);
expect(result).evaluatedBy(comprehensiveEvaluator);
return result;
}
);
// Async evaluator with external validation
const toxicityEvaluator = wrapEvaluator(async (input, output, expected) => {
// Call external moderation API
const moderation = await checkToxicity(output.text);
return {
key: "toxicity_check",
score: moderation.isSafe ? 1 : 0,
value: moderation,
comment: moderation.isSafe ? "Content is safe" : "Toxic content detected"
};
});
test(
"generate safe content",
{
input: { prompt: "Write a friendly greeting" },
expected: { text: "Hello! How can I help you?" }
},
async (input) => {
const result = await generate(input.prompt);
expect(result.text).evaluatedBy(toxicityEvaluator);
return result;
}
);Functions for logging feedback and outputs during test execution, enabling detailed tracking and evaluation in LangSmith.
Logs feedback during test execution to track evaluation results in LangSmith.
/**
* Log feedback during test execution
* @param feedback - Single feedback object or array of feedback objects
*/
function logFeedback(feedback: FeedbackCreate | FeedbackCreate[]): void;
interface FeedbackCreate {
run_id?: string;
key: string;
score?: number | boolean | null;
value?: number | boolean | string | object | null;
comment?: string;
correction?: object;
feedbackSourceType?: FeedbackSourceType;
}Usage Examples:
import { test, expect, logFeedback } from "langsmith/vitest";
test(
"generate response with quality feedback",
{
input: { prompt: "Explain AI" },
expected: { response: "AI explanation..." }
},
async (input) => {
const result = await generate(input.prompt);
// Log single feedback
logFeedback({
key: "response_quality",
score: 0.9,
comment: "High quality response"
});
expect(result.response).toBeTruthy();
return result;
}
);
// Log multiple feedback items
test(
"analyze sentiment with detailed feedback",
{
input: { text: "Great product!" },
expected: { sentiment: "positive" }
},
async (input) => {
const result = await analyzeSentiment(input.text);
// Log multiple feedback items
logFeedback([
{
key: "accuracy",
score: result.sentiment === "positive" ? 1 : 0
},
{
key: "confidence",
score: result.confidence,
comment: `Confidence: ${result.confidence.toFixed(2)}`
},
{
key: "latency",
value: result.processingTime,
comment: `Processed in ${result.processingTime}ms`
}
]);
return result;
}
);
// Log feedback with corrections
test(
"extract entities with corrections",
{
input: { text: "Apple CEO Tim Cook announced..." },
expected: {
entities: [
{ name: "Apple", type: "organization" },
{ name: "Tim Cook", type: "person" }
]
}
},
async (input) => {
const result = await extractEntities(input.text);
const isCorrect = JSON.stringify(result.entities) ===
JSON.stringify(input.expected.entities);
if (!isCorrect) {
logFeedback({
key: "entity_extraction",
score: 0,
comment: "Incorrect entity extraction",
correction: {
expected: input.expected.entities,
actual: result.entities
}
});
} else {
logFeedback({
key: "entity_extraction",
score: 1,
comment: "Perfect entity extraction"
});
}
return result;
}
);
// Log boolean feedback
test(
"validate output format",
{
input: { data: "raw data" },
expected: { formatted: true }
},
async (input) => {
const result = await formatData(input.data);
logFeedback({
key: "valid_json",
score: isValidJSON(result.formatted),
comment: isValidJSON(result.formatted)
? "Valid JSON output"
: "Invalid JSON output"
});
return result;
}
);
// Log structured feedback values
test(
"analyze text with structured feedback",
{
input: { text: "Sample text for analysis" },
expected: { metrics: {} }
},
async (input) => {
const result = await analyzeText(input.text);
logFeedback({
key: "text_metrics",
value: {
wordCount: result.wordCount,
readabilityScore: result.readability,
sentiment: result.sentiment
},
comment: "Detailed text analysis metrics"
});
return result;
}
);Logs outputs during test execution for tracking intermediate results and debugging.
/**
* Log outputs during test execution
* @param output - Output value to log (any type)
*/
function logOutputs(output: any): void;Usage Examples:
import { test, expect, logOutputs } from "langsmith/vitest";
test(
"multi-step processing with output logging",
{
input: { text: "Input text" },
expected: { result: "Final result" }
},
async (input) => {
// Step 1: Preprocess
const preprocessed = await preprocess(input.text);
logOutputs({ step: "preprocess", data: preprocessed });
// Step 2: Transform
const transformed = await transform(preprocessed);
logOutputs({ step: "transform", data: transformed });
// Step 3: Postprocess
const result = await postprocess(transformed);
logOutputs({ step: "postprocess", data: result });
return result;
}
);
// Log intermediate LLM calls
test(
"chain of thought reasoning",
{
input: { problem: "Math problem" },
expected: { answer: "42" }
},
async (input) => {
// Step 1: Analyze problem
const analysis = await analyzeProblem(input.problem);
logOutputs({ phase: "analysis", reasoning: analysis });
// Step 2: Generate solution steps
const steps = await generateSteps(analysis);
logOutputs({ phase: "steps", steps });
// Step 3: Execute and get answer
const result = await execute(steps);
logOutputs({ phase: "final", answer: result.answer });
return result;
}
);
// Log model responses
test(
"iterative refinement",
{
input: { prompt: "Write a story" },
expected: { story: "Once upon a time..." }
},
async (input) => {
let draft = await generateDraft(input.prompt);
logOutputs({ iteration: 1, draft });
for (let i = 0; i < 3; i++) {
draft = await refine(draft);
logOutputs({ iteration: i + 2, draft });
}
return { story: draft };
}
);
// Log error states
test(
"robust processing with error tracking",
{
input: { data: "Input data" },
expected: { processed: true }
},
async (input) => {
try {
const result = await riskyOperation(input.data);
logOutputs({ status: "success", result });
return result;
} catch (error) {
logOutputs({
status: "error",
error: error.message,
stack: error.stack
});
throw error;
}
}
);
// Log performance metrics
test(
"process with performance tracking",
{
input: { items: [1, 2, 3, 4, 5] },
expected: { processed: [2, 4, 6, 8, 10] }
},
async (input) => {
const startTime = Date.now();
const result = await processItems(input.items);
const endTime = Date.now();
const duration = endTime - startTime;
logOutputs({
processingTime: duration,
itemsProcessed: result.processed.length,
averageTimePerItem: duration / result.processed.length
});
return result;
}
);Wrap evaluator functions for use with custom matchers and test evaluation. The wrapper converts simple evaluation functions into the format expected by LangSmith's evaluation system.
Wraps an evaluator function to make it compatible with the LangSmith Vitest integration.
/**
* Wrap evaluator function for use with custom matchers
* @param evaluator - Function that takes (input, output, expected) and returns evaluation result
* @returns Wrapped evaluator compatible with SimpleEvaluator interface
*/
function wrapEvaluator(evaluator: (args: any) => any): SimpleEvaluator;
type SimpleEvaluator = (
input: any,
output: any,
expected?: any
) => EvaluationResult | Promise<EvaluationResult>;
interface EvaluationResult {
key?: string;
score?: number | boolean;
value?: string | number | boolean | object;
comment?: string;
correction?: object;
evaluatorInfo?: object;
sourceRunId?: string;
}Usage Examples:
import { wrapEvaluator } from "langsmith/vitest";
// Simple pass/fail evaluator
const exactMatchEvaluator = wrapEvaluator((input, output, expected) => ({
key: "exact_match",
score: output === expected ? 1 : 0
}));
// Evaluator with detailed scoring
const similarityEvaluator = wrapEvaluator((input, output, expected) => {
const similarity = computeSimilarity(output, expected);
return {
key: "similarity",
score: similarity,
value: { similarity, threshold: 0.8 },
comment: `Similarity: ${(similarity * 100).toFixed(1)}%`
};
});
// Evaluator with conditional logic
const lengthEvaluator = wrapEvaluator((input, output, expected) => {
const outputLength = output.length;
const expectedMin = expected.minLength || 0;
const expectedMax = expected.maxLength || Infinity;
const isValid = outputLength >= expectedMin && outputLength <= expectedMax;
return {
key: "length_validation",
score: isValid ? 1 : 0,
value: {
actualLength: outputLength,
minLength: expectedMin,
maxLength: expectedMax
},
comment: isValid
? `Length ${outputLength} is within range`
: `Length ${outputLength} is outside range [${expectedMin}, ${expectedMax}]`
};
});
// Async evaluator with external API calls
const toxicityEvaluator = wrapEvaluator(async (input, output, expected) => {
// Call moderation API
const result = await moderationAPI.check(output.text);
return {
key: "toxicity",
score: result.isSafe ? 1 : 0,
value: result.scores,
comment: result.isSafe ? "Content is safe" : "Toxic content detected",
evaluatorInfo: {
model: "toxicity-detector-v2",
version: "1.0"
}
};
});
// Evaluator with corrections
const grammarEvaluator = wrapEvaluator(async (input, output, expected) => {
const check = await grammarChecker.check(output.text);
if (check.errors.length > 0) {
return {
key: "grammar",
score: 0,
value: { errorCount: check.errors.length },
comment: `Found ${check.errors.length} grammar errors`,
correction: {
correctedText: check.corrected,
errors: check.errors
}
};
}
return {
key: "grammar",
score: 1,
comment: "No grammar errors"
};
});
// Multi-criteria evaluator
const qualityEvaluator = wrapEvaluator((input, output, expected) => {
const criteria = {
accuracy: computeAccuracy(output, expected),
completeness: computeCompleteness(output, expected),
clarity: computeClarity(output)
};
const overallScore =
(criteria.accuracy + criteria.completeness + criteria.clarity) / 3;
return {
key: "quality",
score: overallScore,
value: criteria,
comment: `Overall quality: ${(overallScore * 100).toFixed(0)}%`,
evaluatorInfo: {
criteria: ["accuracy", "completeness", "clarity"],
weights: [0.33, 0.33, 0.34]
}
};
});
// Use evaluators in tests
import { test, expect } from "langsmith/vitest";
test(
"validate output quality",
{
input: { prompt: "Explain AI" },
expected: { response: "AI is..." },
evaluators: [
exactMatchEvaluator,
similarityEvaluator,
lengthEvaluator,
toxicityEvaluator,
grammarEvaluator,
qualityEvaluator
]
},
async (input) => {
const result = await generate(input.prompt);
return result;
}
);
// Use evaluator with custom matcher
test(
"validate with custom matcher",
{
input: { text: "Input text" },
expected: { output: "Expected output" }
},
async (input) => {
const result = await process(input.text);
expect(result.output).evaluatedBy(qualityEvaluator);
return result;
}
);Configuration parameters for LangSmith-tracked Vitest tests, controlling how tests are logged and evaluated.
Parameters for configuring LangSmith integration in Vitest tests.
/**
* Configuration parameters for LangSmith-tracked tests
*/
interface LangSmithJestlikeWrapperParams<I, O> {
/**
* Input data to pass to the test function
* This becomes the dataset example input in LangSmith
*/
input: I;
/**
* Expected output for comparison and evaluation
* Optional - used for comparison in evaluators
*/
expected?: O;
/**
* Array of evaluators to run on test results
* Each evaluator receives input, output, and expected values
*/
evaluators?: SimpleEvaluator[];
/**
* Custom LangSmith client instance
* If not provided, uses default client from environment
*/
client?: Client;
/**
* Name of the dataset to store this test example
* If not provided, uses test suite name or auto-generated name
*/
datasetName?: string;
/**
* Name of the LangSmith project for this test run
* If not provided, uses default project name
*/
projectName?: string;
}Usage Examples:
import { test, wrapEvaluator } from "langsmith/vitest";
import { Client } from "langsmith";
// Minimal configuration (input only)
test(
"basic test",
{
input: { text: "Hello" }
},
async (input) => {
const result = await process(input.text);
return result;
}
);
// With expected output
test(
"test with expected",
{
input: { question: "What is 2+2?" },
expected: { answer: "4" }
},
async (input) => {
const result = await answerQuestion(input.question);
return result;
}
);
// With custom evaluators
const accuracyEvaluator = wrapEvaluator((input, output, expected) => ({
key: "accuracy",
score: output.answer === expected?.answer ? 1 : 0
}));
test(
"test with evaluators",
{
input: { question: "What is AI?" },
expected: { answer: "Artificial Intelligence" },
evaluators: [accuracyEvaluator]
},
async (input) => {
const result = await answerQuestion(input.question);
return result;
}
);
// With custom client
const customClient = new Client({
apiUrl: "https://api.langsmith.com",
apiKey: process.env.LANGSMITH_API_KEY
});
test(
"test with custom client",
{
input: { prompt: "Generate text" },
expected: { text: "Generated text" },
client: customClient
},
async (input) => {
const result = await generate(input.prompt);
return result;
}
);
// With custom dataset name
test(
"test with dataset name",
{
input: { text: "Sample text" },
expected: { sentiment: "neutral" },
datasetName: "sentiment-test-suite"
},
async (input) => {
const result = await classifySentiment(input.text);
return result;
}
);
// With custom project name
test(
"test with project name",
{
input: { document: "Long document" },
expected: { summary: "Brief summary" },
projectName: "summarization-experiments"
},
async (input) => {
const result = await summarize(input.document);
return result;
}
);
// Complete configuration
test(
"fully configured test",
{
input: {
context: "Context text",
question: "What is the main topic?"
},
expected: {
answer: "The main topic",
confidence: 0.9
},
evaluators: [
accuracyEvaluator,
wrapEvaluator((input, output, expected) => ({
key: "confidence",
score: output.confidence >= 0.8 ? 1 : 0
}))
],
client: customClient,
datasetName: "qa-golden-set",
projectName: "qa-model-v2-tests"
},
async (input) => {
const result = await answerFromContext(input.context, input.question);
return result;
}
);
// Type-safe parameters
interface QuestionInput {
question: string;
context?: string;
}
interface AnswerOutput {
answer: string;
confidence: number;
sources?: string[];
}
test<QuestionInput, AnswerOutput>(
"type-safe test",
{
input: {
question: "What is TypeScript?",
context: "TypeScript is a superset of JavaScript"
},
expected: {
answer: "A superset of JavaScript",
confidence: 0.95,
sources: ["documentation"]
}
},
async (input) => {
const result = await answerWithContext(input.question, input.context);
return result;
}
);Configure the LangSmith Vitest reporter to automatically synchronize test results with LangSmith.
Add the LangSmith reporter to your Vitest configuration to enable automatic dataset and experiment creation.
// vitest.config.ts
import { defineConfig } from "vitest/config";
export default defineConfig({
test: {
reporters: [
"default", // Keep default Vitest reporter
["langsmith/vitest/reporter", {}] // Add LangSmith reporter
]
}
});Usage Examples:
// vitest.config.ts - Basic configuration
import { defineConfig } from "vitest/config";
export default defineConfig({
test: {
reporters: ["default", "langsmith/vitest/reporter"]
}
});
// vitest.config.ts - With custom configuration
import { defineConfig } from "vitest/config";
export default defineConfig({
test: {
reporters: [
"default",
[
"langsmith/vitest/reporter",
{
projectName: "my-vitest-tests",
datasetPrefix: "test-"
}
]
]
}
});
// vitest.config.ts - Multiple reporters
import { defineConfig } from "vitest/config";
export default defineConfig({
test: {
reporters: [
"default",
"json",
"html",
"langsmith/vitest/reporter"
]
}
});Create multiple test cases with different inputs using Vitest's test.each.
import { describe, test, expect } from "langsmith/vitest";
const testCases = [
{ text: "I love this!", sentiment: "positive" },
{ text: "This is terrible", sentiment: "negative" },
{ text: "It's okay", sentiment: "neutral" }
];
describe("Sentiment Classification", () => {
testCases.forEach(({ text, sentiment }) => {
test(
`classify "${text}" as ${sentiment}`,
{
input: { text },
expected: { sentiment }
},
async (input) => {
const result = await classifySentiment(input.text);
expect(result.sentiment).toBe(sentiment);
return result;
}
);
});
});Use Vitest's beforeEach/afterEach with LangSmith integration.
import { describe, test, beforeEach, afterEach } from "langsmith/vitest";
describe("Translation API Tests", () => {
let translator: Translator;
beforeEach(async () => {
translator = new Translator({ apiKey: process.env.API_KEY });
await translator.initialize();
});
afterEach(async () => {
await translator.cleanup();
});
test(
"translate to Spanish",
{
input: { text: "Hello", lang: "es" },
expected: { translation: "Hola" }
},
async (input) => {
const result = await translator.translate(input.text, input.lang);
return result;
}
);
});Combine Vitest snapshots with LangSmith tracking.
import { test, expect } from "langsmith/vitest";
test(
"generate consistent output",
{
input: { seed: 42, prompt: "Generate text" },
expected: { text: "Seeded output" }
},
async (input) => {
const result = await generateWithSeed(input.seed, input.prompt);
// Vitest snapshot
expect(result.text).toMatchSnapshot();
return result;
}
);Use Vitest mocking with LangSmith integration.
import { test, expect, vi } from "langsmith/vitest";
test(
"test with mocked LLM",
{
input: { prompt: "Test prompt" },
expected: { response: "Mocked response" }
},
async (input) => {
// Mock the LLM call
const mockLLM = vi.fn().mockResolvedValue({
response: "Mocked response"
});
const result = await myFunction(input.prompt, { llm: mockLLM });
expect(mockLLM).toHaveBeenCalledTimes(1);
expect(result.response).toBe("Mocked response");
return result;
}
);Run tests concurrently while maintaining LangSmith tracking.
import { describe, test } from "langsmith/vitest";
describe.concurrent("Parallel Tests", () => {
test(
"test 1",
{
input: { id: 1 },
expected: { processed: true }
},
async (input) => {
const result = await slowOperation(input.id);
return result;
}
);
test(
"test 2",
{
input: { id: 2 },
expected: { processed: true }
},
async (input) => {
const result = await slowOperation(input.id);
return result;
}
);
});Skip or run tests conditionally while preserving LangSmith integration.
import { test, expect } from "langsmith/vitest";
const shouldTest = process.env.RUN_EXPENSIVE_TESTS === "true";
test.skipIf(!shouldTest)(
"expensive LLM test",
{
input: { prompt: "Complex prompt" },
expected: { response: "Complex response" }
},
async (input) => {
const result = await expensiveLLMCall(input.prompt);
return result;
}
);Test error handling with LangSmith tracking.
import { test, expect, logFeedback } from "langsmith/vitest";
test(
"handle API errors gracefully",
{
input: { invalidInput: true },
expected: { error: "Invalid input error" }
},
async (input) => {
try {
const result = await processInput(input);
return result;
} catch (error) {
logFeedback({
key: "error_handling",
score: 1,
value: { errorType: error.name, errorMessage: error.message },
comment: "Error handled correctly"
});
expect(error.message).toContain("Invalid input");
throw error;
}
}
);Configure LangSmith Vitest tests for continuous integration.
// vitest.config.ci.ts
import { defineConfig } from "vitest/config";
export default defineConfig({
test: {
reporters: [
"default",
[
"langsmith/vitest/reporter",
{
projectName: process.env.CI_PIPELINE_ID
? `ci-${process.env.CI_PIPELINE_ID}`
: "local-tests"
}
]
],
environment: "node",
globals: true,
coverage: {
reporter: ["text", "json", "html"]
}
}
});Use LangSmith's tracing to debug failing tests.
import { test, expect, logOutputs } from "langsmith/vitest";
import { traceable } from "langsmith";
// Make internal functions traceable for debugging
const processStep1 = traceable(async (input: string) => {
// Processing logic
return processed;
}, { name: "process-step-1" });
const processStep2 = traceable(async (input: string) => {
// Processing logic
return processed;
}, { name: "process-step-2" });
test(
"debug complex pipeline",
{
input: { data: "Input data" },
expected: { result: "Expected result" }
},
async (input) => {
// Each step is traced separately in LangSmith
const step1Result = await processStep1(input.data);
logOutputs({ step1: step1Result });
const step2Result = await processStep2(step1Result);
logOutputs({ step2: step2Result });
return { result: step2Result };
}
);