0
# Dataset Operations
1
2
Comprehensive dataset management for evaluations, experiments, and testing workflows. Datasets contain items with input/output pairs that can be linked to observations for run tracking and analysis.
3
4
## Capabilities
5
6
### Fetching Datasets
7
8
Retrieve datasets with all their items.
9
10
```typescript { .api }
11
/**
12
* Fetches a dataset with all its items
13
* @param name - Dataset name
14
* @param options - Optional pagination settings
15
* @returns Dataset with items
16
*/
17
getDataset(
18
name: string,
19
options?: { fetchItemsPageSize: number }
20
): Promise<Dataset>;
21
22
interface Dataset {
23
/** Dataset ID */
24
id: string;
25
/** Dataset name */
26
name: string;
27
/** Optional description */
28
description?: string;
29
/** Custom metadata */
30
metadata?: any;
31
/** Project ID */
32
projectId: string;
33
/** Dataset items */
34
items: DatasetItem[];
35
}
36
37
interface DatasetItem {
38
/** Item ID */
39
id: string;
40
/** Status: ACTIVE or ARCHIVED */
41
status: ApiDatasetStatus;
42
/** Input data for the item */
43
input: any;
44
/** Expected output (ground truth) */
45
expectedOutput?: any;
46
/** Custom metadata */
47
metadata?: any;
48
/** Source trace ID if created from a trace */
49
sourceTraceId?: string;
50
/** Source observation ID if created from an observation */
51
sourceObservationId?: string;
52
/** Method to link this item to a run */
53
link: LinkDatasetItem;
54
}
55
56
type ApiDatasetStatus = "ACTIVE" | "ARCHIVED";
57
58
type LinkDatasetItem = (
59
obj: LangfuseObjectClient,
60
runName: string,
61
runArgs?: {
62
description?: string;
63
metadata?: any;
64
}
65
) => Promise<CreateLangfuseDatasetRunItemResponse>;
66
67
type LangfuseObjectClient =
68
| LangfuseTraceClient
69
| LangfuseSpanClient
70
| LangfuseGenerationClient
71
| LangfuseEventClient;
72
```
73
74
**Usage Example:**
75
76
```typescript
77
import { Langfuse } from 'langfuse';
78
79
const langfuse = new Langfuse();
80
81
// Fetch dataset with default page size
82
const dataset = await langfuse.getDataset('eval-dataset');
83
84
console.log(dataset.name); // "eval-dataset"
85
console.log(dataset.items.length); // Number of items
86
87
// Fetch with custom page size
88
const largeDataset = await langfuse.getDataset('large-dataset', {
89
fetchItemsPageSize: 100
90
});
91
92
// Access dataset items
93
for (const item of dataset.items) {
94
console.log(item.input);
95
console.log(item.expectedOutput);
96
}
97
```
98
99
### Creating Datasets
100
101
Create new datasets for organizing test cases and evaluations.
102
103
```typescript { .api }
104
/**
105
* Creates a new dataset
106
* @param dataset - Dataset name as string or configuration object
107
* @returns Dataset creation response
108
*/
109
createDataset(dataset: string): Promise<CreateLangfuseDatasetResponse>;
110
createDataset(dataset: CreateLangfuseDatasetBody): Promise<CreateLangfuseDatasetResponse>;
111
112
interface CreateLangfuseDatasetBody {
113
/** Dataset name (must be unique) */
114
name: string;
115
/** Optional description */
116
description?: string;
117
/** Custom metadata */
118
metadata?: any;
119
}
120
121
interface CreateLangfuseDatasetResponse {
122
/** Dataset ID */
123
id: string;
124
/** Dataset name */
125
name: string;
126
/** Optional description */
127
description?: string;
128
/** Custom metadata */
129
metadata?: any;
130
/** Creation timestamp */
131
createdAt: string;
132
/** Last update timestamp */
133
updatedAt: string;
134
}
135
```
136
137
**Usage Example:**
138
139
```typescript
140
// Create a dataset with just a name
141
const simpleDataset = await langfuse.createDataset('qa-evaluation');
142
143
// Or create with full configuration
144
const dataset = await langfuse.createDataset({
145
name: 'qa-evaluation',
146
description: 'Question-answering evaluation dataset',
147
metadata: {
148
version: '1.0',
149
created_by: 'eval-team'
150
}
151
});
152
153
console.log(dataset.id); // Dataset ID
154
console.log(dataset.name); // "qa-evaluation"
155
```
156
157
### Creating Dataset Items
158
159
Add items to datasets with input data and expected outputs.
160
161
```typescript { .api }
162
/**
163
* Creates a dataset item
164
* @param body - Dataset item configuration
165
* @returns Dataset item response
166
*/
167
createDatasetItem(body: CreateLangfuseDatasetItemBody): Promise<CreateLangfuseDatasetItemResponse>;
168
169
interface CreateLangfuseDatasetItemBody {
170
/** Dataset name to add item to */
171
datasetName: string;
172
/** Input data for the item */
173
input: any;
174
/** Expected output (ground truth) */
175
expectedOutput?: any;
176
/** Custom metadata */
177
metadata?: any;
178
/** Source trace ID if creating from a trace */
179
sourceTraceId?: string;
180
/** Source observation ID if creating from an observation */
181
sourceObservationId?: string;
182
}
183
184
interface CreateLangfuseDatasetItemResponse {
185
/** Item ID */
186
id: string;
187
/** Status */
188
status: ApiDatasetStatus;
189
/** Input data */
190
input: any;
191
/** Expected output */
192
expectedOutput?: any;
193
/** Custom metadata */
194
metadata?: any;
195
/** Source trace ID */
196
sourceTraceId?: string;
197
/** Source observation ID */
198
sourceObservationId?: string;
199
/** Dataset ID */
200
datasetId: string;
201
/** Dataset name */
202
datasetName: string;
203
/** Creation timestamp */
204
createdAt: string;
205
/** Last update timestamp */
206
updatedAt: string;
207
}
208
```
209
210
**Usage Example:**
211
212
```typescript
213
// Create a dataset item
214
const item = await langfuse.createDatasetItem({
215
datasetName: 'qa-evaluation',
216
input: {
217
question: 'What is the capital of France?'
218
},
219
expectedOutput: {
220
answer: 'Paris'
221
},
222
metadata: {
223
difficulty: 'easy',
224
category: 'geography'
225
}
226
});
227
228
// Create item from existing trace
229
const traceItem = await langfuse.createDatasetItem({
230
datasetName: 'production-samples',
231
input: { query: 'user question' },
232
expectedOutput: { response: 'correct answer' },
233
sourceTraceId: 'trace-123',
234
sourceObservationId: 'obs-456'
235
});
236
```
237
238
### Fetching Dataset Items
239
240
Retrieve a specific dataset item by ID.
241
242
```typescript { .api }
243
/**
244
* Fetches a specific dataset item
245
* @param id - Dataset item ID
246
* @returns Dataset item response
247
*/
248
getDatasetItem(id: string): Promise<CreateLangfuseDatasetItemResponse>;
249
```
250
251
**Usage Example:**
252
253
```typescript
254
const item = await langfuse.getDatasetItem('item-123');
255
256
console.log(item.input);
257
console.log(item.expectedOutput);
258
console.log(item.metadata);
259
```
260
261
### Dataset Runs
262
263
Dataset runs track executions of your system against dataset items, enabling evaluation and comparison.
264
265
```typescript { .api }
266
/**
267
* Fetches a dataset run
268
* @param params - Run identifier parameters
269
* @returns Dataset run response
270
*/
271
getDatasetRun(params: GetLangfuseDatasetRunParams): Promise<GetLangfuseDatasetRunResponse>;
272
273
/**
274
* Fetches dataset runs for a dataset
275
* @param datasetName - Dataset name
276
* @param query - Optional filtering and pagination
277
* @returns Dataset runs response
278
*/
279
getDatasetRuns(
280
datasetName: string,
281
query?: GetLangfuseDatasetRunsQuery
282
): Promise<GetLangfuseDatasetRunsResponse>;
283
284
interface GetLangfuseDatasetRunParams {
285
/** Dataset name */
286
datasetName: string;
287
/** Run name */
288
runName: string;
289
}
290
291
interface GetLangfuseDatasetRunResponse {
292
/** Run ID */
293
id: string;
294
/** Run name */
295
name: string;
296
/** Optional description */
297
description?: string;
298
/** Custom metadata */
299
metadata?: any;
300
/** Dataset ID */
301
datasetId: string;
302
/** Dataset name */
303
datasetName: string;
304
/** Creation timestamp */
305
createdAt: string;
306
/** Last update timestamp */
307
updatedAt: string;
308
}
309
310
interface GetLangfuseDatasetRunsQuery {
311
/** Page number */
312
page?: number;
313
/** Page size */
314
limit?: number;
315
}
316
317
interface GetLangfuseDatasetRunsResponse {
318
/** Array of runs */
319
data: ApiDatasetRun[];
320
/** Pagination metadata */
321
meta: {
322
page: number;
323
limit: number;
324
totalItems: number;
325
totalPages: number;
326
};
327
}
328
329
interface ApiDatasetRun {
330
id: string;
331
name: string;
332
description?: string;
333
metadata?: any;
334
datasetId: string;
335
datasetName: string;
336
createdAt: string;
337
updatedAt: string;
338
}
339
```
340
341
**Usage Example:**
342
343
```typescript
344
// Fetch a specific run
345
const run = await langfuse.getDatasetRun({
346
datasetName: 'qa-evaluation',
347
runName: 'gpt4-run-1'
348
});
349
350
// Fetch all runs for a dataset
351
const runs = await langfuse.getDatasetRuns('qa-evaluation', {
352
page: 1,
353
limit: 50
354
});
355
356
for (const run of runs.data) {
357
console.log(run.name);
358
console.log(run.metadata);
359
}
360
```
361
362
### Creating Dataset Run Items
363
364
Link observations to dataset items to track execution runs.
365
366
```typescript { .api }
367
/**
368
* Creates a dataset run item linking an observation to a dataset item
369
* @param body - Run item configuration
370
* @returns Run item response
371
*/
372
createDatasetRunItem(body: CreateLangfuseDatasetRunItemBody): Promise<CreateLangfuseDatasetRunItemResponse>;
373
374
interface CreateLangfuseDatasetRunItemBody {
375
/** Run name */
376
runName: string;
377
/** Dataset item ID */
378
datasetItemId: string;
379
/** Trace ID to link */
380
traceId?: string;
381
/** Observation ID to link */
382
observationId?: string;
383
/** Optional run description */
384
runDescription?: string;
385
/** Custom metadata */
386
metadata?: any;
387
}
388
389
interface CreateLangfuseDatasetRunItemResponse {
390
/** Run item ID */
391
id: string;
392
}
393
```
394
395
**Usage Example:**
396
397
```typescript
398
// Create run item manually
399
const runItem = await langfuse.createDatasetRunItem({
400
runName: 'experiment-1',
401
datasetItemId: 'item-123',
402
traceId: 'trace-456',
403
observationId: 'obs-789',
404
runDescription: 'GPT-4 evaluation run',
405
metadata: {
406
model: 'gpt-4',
407
temperature: 0.7
408
}
409
});
410
```
411
412
### Linking Dataset Items to Runs
413
414
Use the `link` method on dataset items for convenient run tracking.
415
416
```typescript { .api }
417
interface DatasetItem {
418
/**
419
* Links this dataset item to an observation for run tracking
420
* @param obj - Trace, span, generation, or event client
421
* @param runName - Name of the run
422
* @param runArgs - Optional run configuration
423
* @returns Run item response
424
*/
425
link: (
426
obj: LangfuseObjectClient,
427
runName: string,
428
runArgs?: {
429
description?: string;
430
metadata?: any;
431
}
432
) => Promise<CreateLangfuseDatasetRunItemResponse>;
433
}
434
```
435
436
**Usage Example:**
437
438
```typescript
439
const dataset = await langfuse.getDataset('qa-evaluation');
440
441
for (const item of dataset.items) {
442
// Create a trace for this item
443
const trace = langfuse.trace({
444
name: 'eval-trace',
445
input: item.input
446
});
447
448
// Execute your system with the input
449
const generation = trace.generation({
450
name: 'qa-generation',
451
model: 'gpt-4',
452
input: item.input
453
});
454
455
// Simulate processing
456
const output = await processQuestion(item.input.question);
457
458
generation.end({
459
output: { answer: output }
460
});
461
462
// Link this execution to the dataset item
463
await item.link(trace, 'gpt4-evaluation', {
464
description: 'GPT-4 evaluation run',
465
metadata: {
466
temperature: 0.7,
467
model: 'gpt-4'
468
}
469
});
470
}
471
472
await langfuse.flushAsync();
473
```
474
475
## Complete Dataset Evaluation Example
476
477
```typescript
478
import { Langfuse } from 'langfuse';
479
480
const langfuse = new Langfuse();
481
482
// Step 1: Create a dataset
483
const dataset = await langfuse.createDataset({
484
name: 'customer-support-qa',
485
description: 'Customer support Q&A evaluation dataset',
486
metadata: { version: '1.0' }
487
});
488
489
// Step 2: Add items to the dataset
490
const items = [
491
{
492
question: 'How do I reset my password?',
493
expectedAnswer: 'You can reset your password by clicking the "Forgot Password" link on the login page.'
494
},
495
{
496
question: 'What are your business hours?',
497
expectedAnswer: 'We are open Monday-Friday, 9 AM to 5 PM EST.'
498
},
499
{
500
question: 'How do I cancel my subscription?',
501
expectedAnswer: 'You can cancel your subscription in the billing section of your account settings.'
502
}
503
];
504
505
for (const item of items) {
506
await langfuse.createDatasetItem({
507
datasetName: 'customer-support-qa',
508
input: { question: item.question },
509
expectedOutput: { answer: item.expectedAnswer },
510
metadata: { category: 'support' }
511
});
512
}
513
514
// Step 3: Run evaluation
515
const fetchedDataset = await langfuse.getDataset('customer-support-qa');
516
const runName = `eval-run-${Date.now()}`;
517
518
for (const item of fetchedDataset.items) {
519
// Create trace for this evaluation
520
const trace = langfuse.trace({
521
name: 'qa-evaluation',
522
input: item.input,
523
metadata: { runName }
524
});
525
526
// Get prompt
527
const prompt = await langfuse.getPrompt('support-qa-prompt', undefined, {
528
type: 'chat'
529
});
530
531
// Create generation
532
const messages = prompt.compile(
533
{ question: item.input.question },
534
{ history: [] }
535
);
536
537
const generation = trace.generation({
538
name: 'answer-generation',
539
prompt: prompt,
540
model: 'gpt-4',
541
input: messages
542
});
543
544
// Simulate LLM call
545
const response = await callLLM(messages);
546
547
generation.end({
548
output: { answer: response },
549
usage: { input: 50, output: 100, total: 150 }
550
});
551
552
// Update trace with output
553
trace.update({
554
output: { answer: response }
555
});
556
557
// Link to dataset run
558
await item.link(trace, runName, {
559
description: 'GPT-4 evaluation with support prompt',
560
metadata: {
561
model: 'gpt-4',
562
promptVersion: prompt.version
563
}
564
});
565
566
// Score the generation
567
const score = calculateSimilarity(response, item.expectedOutput.answer);
568
trace.score({
569
name: 'similarity',
570
value: score,
571
dataType: 'NUMERIC',
572
comment: 'Semantic similarity to expected output'
573
});
574
}
575
576
// Flush all events
577
await langfuse.flushAsync();
578
579
// Step 4: Analyze results
580
const runs = await langfuse.getDatasetRuns('customer-support-qa');
581
console.log(`Total runs: ${runs.data.length}`);
582
583
const latestRun = await langfuse.getDatasetRun({
584
datasetName: 'customer-support-qa',
585
runName: runName
586
});
587
588
console.log('Latest run:', latestRun);
589
```
590
591
## Best Practices
592
593
### Dataset Organization
594
595
```typescript
596
// Organize datasets by use case
597
await langfuse.createDataset({
598
name: 'prod-samples-2024-01',
599
description: 'Production samples from January 2024',
600
metadata: {
601
source: 'production',
602
month: '2024-01',
603
sample_rate: 0.1
604
}
605
});
606
607
// Use metadata for categorization
608
await langfuse.createDatasetItem({
609
datasetName: 'prod-samples-2024-01',
610
input: { query: 'user question' },
611
expectedOutput: { response: 'expected response' },
612
metadata: {
613
category: 'technical',
614
difficulty: 'medium',
615
language: 'en'
616
}
617
});
618
```
619
620
### Evaluation Workflow
621
622
```typescript
623
// 1. Create dataset from production traces
624
const productionTraces = await langfuse.fetchTraces({
625
tags: ['production'],
626
fromTimestamp: '2024-01-01',
627
limit: 100
628
});
629
630
for (const trace of productionTraces.data) {
631
await langfuse.createDatasetItem({
632
datasetName: 'prod-golden-set',
633
input: trace.input,
634
expectedOutput: trace.output,
635
sourceTraceId: trace.id,
636
metadata: { userId: trace.userId }
637
});
638
}
639
640
// 2. Run experiments with different models
641
const models = ['gpt-4', 'gpt-3.5-turbo', 'claude-3-opus'];
642
const dataset = await langfuse.getDataset('prod-golden-set');
643
644
for (const model of models) {
645
for (const item of dataset.items) {
646
const trace = langfuse.trace({
647
name: 'model-comparison',
648
input: item.input,
649
metadata: { model }
650
});
651
652
const generation = trace.generation({
653
name: 'completion',
654
model: model,
655
input: item.input
656
});
657
658
const output = await generateWithModel(model, item.input);
659
660
generation.end({ output });
661
trace.update({ output });
662
663
await item.link(trace, `${model}-comparison`, {
664
metadata: { model }
665
});
666
}
667
}
668
669
await langfuse.flushAsync();
670
671
// 3. Compare results in Langfuse UI or via API
672
const gpt4Results = await langfuse.getDatasetRun({
673
datasetName: 'prod-golden-set',
674
runName: 'gpt-4-comparison'
675
});
676
677
const gpt35Results = await langfuse.getDatasetRun({
678
datasetName: 'prod-golden-set',
679
runName: 'gpt-3.5-turbo-comparison'
680
});
681
```
682