Observability and analytics platform for LLM applications with hierarchical tracing, prompt management, dataset operations, and OpenAI integration
Comprehensive dataset management for evaluations, experiments, and testing workflows. Datasets contain items with input/output pairs that can be linked to observations for run tracking and analysis.
Retrieve datasets with all their items.
/**
* Fetches a dataset with all its items
* @param name - Dataset name
* @param options - Optional pagination settings
* @returns Dataset with items
*/
getDataset(
name: string,
options?: { fetchItemsPageSize: number }
): Promise<Dataset>;
interface Dataset {
/** Dataset ID */
id: string;
/** Dataset name */
name: string;
/** Optional description */
description?: string;
/** Custom metadata */
metadata?: any;
/** Project ID */
projectId: string;
/** Dataset items */
items: DatasetItem[];
}
interface DatasetItem {
/** Item ID */
id: string;
/** Status: ACTIVE or ARCHIVED */
status: ApiDatasetStatus;
/** Input data for the item */
input: any;
/** Expected output (ground truth) */
expectedOutput?: any;
/** Custom metadata */
metadata?: any;
/** Source trace ID if created from a trace */
sourceTraceId?: string;
/** Source observation ID if created from an observation */
sourceObservationId?: string;
/** Method to link this item to a run */
link: LinkDatasetItem;
}
type ApiDatasetStatus = "ACTIVE" | "ARCHIVED";
type LinkDatasetItem = (
obj: LangfuseObjectClient,
runName: string,
runArgs?: {
description?: string;
metadata?: any;
}
) => Promise<CreateLangfuseDatasetRunItemResponse>;
type LangfuseObjectClient =
| LangfuseTraceClient
| LangfuseSpanClient
| LangfuseGenerationClient
| LangfuseEventClient;Usage Example:
import { Langfuse } from 'langfuse';
const langfuse = new Langfuse();
// Fetch dataset with default page size
const dataset = await langfuse.getDataset('eval-dataset');
console.log(dataset.name); // "eval-dataset"
console.log(dataset.items.length); // Number of items
// Fetch with custom page size
const largeDataset = await langfuse.getDataset('large-dataset', {
fetchItemsPageSize: 100
});
// Access dataset items
for (const item of dataset.items) {
console.log(item.input);
console.log(item.expectedOutput);
}Create new datasets for organizing test cases and evaluations.
/**
* Creates a new dataset
* @param dataset - Dataset name as string or configuration object
* @returns Dataset creation response
*/
createDataset(dataset: string): Promise<CreateLangfuseDatasetResponse>;
createDataset(dataset: CreateLangfuseDatasetBody): Promise<CreateLangfuseDatasetResponse>;
interface CreateLangfuseDatasetBody {
/** Dataset name (must be unique) */
name: string;
/** Optional description */
description?: string;
/** Custom metadata */
metadata?: any;
}
interface CreateLangfuseDatasetResponse {
/** Dataset ID */
id: string;
/** Dataset name */
name: string;
/** Optional description */
description?: string;
/** Custom metadata */
metadata?: any;
/** Creation timestamp */
createdAt: string;
/** Last update timestamp */
updatedAt: string;
}Usage Example:
// Create a dataset with just a name
const simpleDataset = await langfuse.createDataset('qa-evaluation');
// Or create with full configuration
const dataset = await langfuse.createDataset({
name: 'qa-evaluation',
description: 'Question-answering evaluation dataset',
metadata: {
version: '1.0',
created_by: 'eval-team'
}
});
console.log(dataset.id); // Dataset ID
console.log(dataset.name); // "qa-evaluation"Add items to datasets with input data and expected outputs.
/**
* Creates a dataset item
* @param body - Dataset item configuration
* @returns Dataset item response
*/
createDatasetItem(body: CreateLangfuseDatasetItemBody): Promise<CreateLangfuseDatasetItemResponse>;
interface CreateLangfuseDatasetItemBody {
/** Dataset name to add item to */
datasetName: string;
/** Input data for the item */
input: any;
/** Expected output (ground truth) */
expectedOutput?: any;
/** Custom metadata */
metadata?: any;
/** Source trace ID if creating from a trace */
sourceTraceId?: string;
/** Source observation ID if creating from an observation */
sourceObservationId?: string;
}
interface CreateLangfuseDatasetItemResponse {
/** Item ID */
id: string;
/** Status */
status: ApiDatasetStatus;
/** Input data */
input: any;
/** Expected output */
expectedOutput?: any;
/** Custom metadata */
metadata?: any;
/** Source trace ID */
sourceTraceId?: string;
/** Source observation ID */
sourceObservationId?: string;
/** Dataset ID */
datasetId: string;
/** Dataset name */
datasetName: string;
/** Creation timestamp */
createdAt: string;
/** Last update timestamp */
updatedAt: string;
}Usage Example:
// Create a dataset item
const item = await langfuse.createDatasetItem({
datasetName: 'qa-evaluation',
input: {
question: 'What is the capital of France?'
},
expectedOutput: {
answer: 'Paris'
},
metadata: {
difficulty: 'easy',
category: 'geography'
}
});
// Create item from existing trace
const traceItem = await langfuse.createDatasetItem({
datasetName: 'production-samples',
input: { query: 'user question' },
expectedOutput: { response: 'correct answer' },
sourceTraceId: 'trace-123',
sourceObservationId: 'obs-456'
});Retrieve a specific dataset item by ID.
/**
* Fetches a specific dataset item
* @param id - Dataset item ID
* @returns Dataset item response
*/
getDatasetItem(id: string): Promise<CreateLangfuseDatasetItemResponse>;Usage Example:
const item = await langfuse.getDatasetItem('item-123');
console.log(item.input);
console.log(item.expectedOutput);
console.log(item.metadata);Dataset runs track executions of your system against dataset items, enabling evaluation and comparison.
/**
* Fetches a dataset run
* @param params - Run identifier parameters
* @returns Dataset run response
*/
getDatasetRun(params: GetLangfuseDatasetRunParams): Promise<GetLangfuseDatasetRunResponse>;
/**
* Fetches dataset runs for a dataset
* @param datasetName - Dataset name
* @param query - Optional filtering and pagination
* @returns Dataset runs response
*/
getDatasetRuns(
datasetName: string,
query?: GetLangfuseDatasetRunsQuery
): Promise<GetLangfuseDatasetRunsResponse>;
interface GetLangfuseDatasetRunParams {
/** Dataset name */
datasetName: string;
/** Run name */
runName: string;
}
interface GetLangfuseDatasetRunResponse {
/** Run ID */
id: string;
/** Run name */
name: string;
/** Optional description */
description?: string;
/** Custom metadata */
metadata?: any;
/** Dataset ID */
datasetId: string;
/** Dataset name */
datasetName: string;
/** Creation timestamp */
createdAt: string;
/** Last update timestamp */
updatedAt: string;
}
interface GetLangfuseDatasetRunsQuery {
/** Page number */
page?: number;
/** Page size */
limit?: number;
}
interface GetLangfuseDatasetRunsResponse {
/** Array of runs */
data: ApiDatasetRun[];
/** Pagination metadata */
meta: {
page: number;
limit: number;
totalItems: number;
totalPages: number;
};
}
interface ApiDatasetRun {
id: string;
name: string;
description?: string;
metadata?: any;
datasetId: string;
datasetName: string;
createdAt: string;
updatedAt: string;
}Usage Example:
// Fetch a specific run
const run = await langfuse.getDatasetRun({
datasetName: 'qa-evaluation',
runName: 'gpt4-run-1'
});
// Fetch all runs for a dataset
const runs = await langfuse.getDatasetRuns('qa-evaluation', {
page: 1,
limit: 50
});
for (const run of runs.data) {
console.log(run.name);
console.log(run.metadata);
}Link observations to dataset items to track execution runs.
/**
* Creates a dataset run item linking an observation to a dataset item
* @param body - Run item configuration
* @returns Run item response
*/
createDatasetRunItem(body: CreateLangfuseDatasetRunItemBody): Promise<CreateLangfuseDatasetRunItemResponse>;
interface CreateLangfuseDatasetRunItemBody {
/** Run name */
runName: string;
/** Dataset item ID */
datasetItemId: string;
/** Trace ID to link */
traceId?: string;
/** Observation ID to link */
observationId?: string;
/** Optional run description */
runDescription?: string;
/** Custom metadata */
metadata?: any;
}
interface CreateLangfuseDatasetRunItemResponse {
/** Run item ID */
id: string;
}Usage Example:
// Create run item manually
const runItem = await langfuse.createDatasetRunItem({
runName: 'experiment-1',
datasetItemId: 'item-123',
traceId: 'trace-456',
observationId: 'obs-789',
runDescription: 'GPT-4 evaluation run',
metadata: {
model: 'gpt-4',
temperature: 0.7
}
});Use the link method on dataset items for convenient run tracking.
interface DatasetItem {
/**
* Links this dataset item to an observation for run tracking
* @param obj - Trace, span, generation, or event client
* @param runName - Name of the run
* @param runArgs - Optional run configuration
* @returns Run item response
*/
link: (
obj: LangfuseObjectClient,
runName: string,
runArgs?: {
description?: string;
metadata?: any;
}
) => Promise<CreateLangfuseDatasetRunItemResponse>;
}Usage Example:
const dataset = await langfuse.getDataset('qa-evaluation');
for (const item of dataset.items) {
// Create a trace for this item
const trace = langfuse.trace({
name: 'eval-trace',
input: item.input
});
// Execute your system with the input
const generation = trace.generation({
name: 'qa-generation',
model: 'gpt-4',
input: item.input
});
// Simulate processing
const output = await processQuestion(item.input.question);
generation.end({
output: { answer: output }
});
// Link this execution to the dataset item
await item.link(trace, 'gpt4-evaluation', {
description: 'GPT-4 evaluation run',
metadata: {
temperature: 0.7,
model: 'gpt-4'
}
});
}
await langfuse.flushAsync();import { Langfuse } from 'langfuse';
const langfuse = new Langfuse();
// Step 1: Create a dataset
const dataset = await langfuse.createDataset({
name: 'customer-support-qa',
description: 'Customer support Q&A evaluation dataset',
metadata: { version: '1.0' }
});
// Step 2: Add items to the dataset
const items = [
{
question: 'How do I reset my password?',
expectedAnswer: 'You can reset your password by clicking the "Forgot Password" link on the login page.'
},
{
question: 'What are your business hours?',
expectedAnswer: 'We are open Monday-Friday, 9 AM to 5 PM EST.'
},
{
question: 'How do I cancel my subscription?',
expectedAnswer: 'You can cancel your subscription in the billing section of your account settings.'
}
];
for (const item of items) {
await langfuse.createDatasetItem({
datasetName: 'customer-support-qa',
input: { question: item.question },
expectedOutput: { answer: item.expectedAnswer },
metadata: { category: 'support' }
});
}
// Step 3: Run evaluation
const fetchedDataset = await langfuse.getDataset('customer-support-qa');
const runName = `eval-run-${Date.now()}`;
for (const item of fetchedDataset.items) {
// Create trace for this evaluation
const trace = langfuse.trace({
name: 'qa-evaluation',
input: item.input,
metadata: { runName }
});
// Get prompt
const prompt = await langfuse.getPrompt('support-qa-prompt', undefined, {
type: 'chat'
});
// Create generation
const messages = prompt.compile(
{ question: item.input.question },
{ history: [] }
);
const generation = trace.generation({
name: 'answer-generation',
prompt: prompt,
model: 'gpt-4',
input: messages
});
// Simulate LLM call
const response = await callLLM(messages);
generation.end({
output: { answer: response },
usage: { input: 50, output: 100, total: 150 }
});
// Update trace with output
trace.update({
output: { answer: response }
});
// Link to dataset run
await item.link(trace, runName, {
description: 'GPT-4 evaluation with support prompt',
metadata: {
model: 'gpt-4',
promptVersion: prompt.version
}
});
// Score the generation
const score = calculateSimilarity(response, item.expectedOutput.answer);
trace.score({
name: 'similarity',
value: score,
dataType: 'NUMERIC',
comment: 'Semantic similarity to expected output'
});
}
// Flush all events
await langfuse.flushAsync();
// Step 4: Analyze results
const runs = await langfuse.getDatasetRuns('customer-support-qa');
console.log(`Total runs: ${runs.data.length}`);
const latestRun = await langfuse.getDatasetRun({
datasetName: 'customer-support-qa',
runName: runName
});
console.log('Latest run:', latestRun);// Organize datasets by use case
await langfuse.createDataset({
name: 'prod-samples-2024-01',
description: 'Production samples from January 2024',
metadata: {
source: 'production',
month: '2024-01',
sample_rate: 0.1
}
});
// Use metadata for categorization
await langfuse.createDatasetItem({
datasetName: 'prod-samples-2024-01',
input: { query: 'user question' },
expectedOutput: { response: 'expected response' },
metadata: {
category: 'technical',
difficulty: 'medium',
language: 'en'
}
});// 1. Create dataset from production traces
const productionTraces = await langfuse.fetchTraces({
tags: ['production'],
fromTimestamp: '2024-01-01',
limit: 100
});
for (const trace of productionTraces.data) {
await langfuse.createDatasetItem({
datasetName: 'prod-golden-set',
input: trace.input,
expectedOutput: trace.output,
sourceTraceId: trace.id,
metadata: { userId: trace.userId }
});
}
// 2. Run experiments with different models
const models = ['gpt-4', 'gpt-3.5-turbo', 'claude-3-opus'];
const dataset = await langfuse.getDataset('prod-golden-set');
for (const model of models) {
for (const item of dataset.items) {
const trace = langfuse.trace({
name: 'model-comparison',
input: item.input,
metadata: { model }
});
const generation = trace.generation({
name: 'completion',
model: model,
input: item.input
});
const output = await generateWithModel(model, item.input);
generation.end({ output });
trace.update({ output });
await item.link(trace, `${model}-comparison`, {
metadata: { model }
});
}
}
await langfuse.flushAsync();
// 3. Compare results in Langfuse UI or via API
const gpt4Results = await langfuse.getDatasetRun({
datasetName: 'prod-golden-set',
runName: 'gpt-4-comparison'
});
const gpt35Results = await langfuse.getDatasetRun({
datasetName: 'prod-golden-set',
runName: 'gpt-3.5-turbo-comparison'
});Install with Tessl CLI
npx tessl i tessl/npm-langfuse