0
# Datasets
1
2
The Datasets API provides management of datasets for evaluation and testing. Datasets contain items (test cases) and runs (evaluation results). This enables systematic testing and evaluation of LLM applications.
3
4
## Capabilities
5
6
### DatasetsClient
7
8
Client for managing datasets and dataset runs.
9
10
```java { .api }
11
/**
12
* List all datasets
13
*
14
* @param request Optional pagination parameters
15
* @param requestOptions Optional request configuration
16
*/
17
PaginatedDatasets list();
18
PaginatedDatasets list(GetDatasetsRequest request);
19
PaginatedDatasets list(GetDatasetsRequest request, RequestOptions requestOptions);
20
21
/**
22
* Get a dataset by name
23
*
24
* @param datasetName Name of the dataset
25
* @param requestOptions Optional request configuration
26
*/
27
Dataset get(String datasetName);
28
Dataset get(String datasetName, RequestOptions requestOptions);
29
30
/**
31
* Create a new dataset
32
*
33
* @param request Dataset definition
34
* @param requestOptions Optional request configuration
35
*/
36
Dataset create(CreateDatasetRequest request);
37
Dataset create(CreateDatasetRequest request, RequestOptions requestOptions);
38
39
/**
40
* Get a dataset run with all its items
41
*
42
* @param datasetName Name of the dataset
43
* @param runName Name of the run
44
* @param requestOptions Optional request configuration
45
*/
46
DatasetRunWithItems getRun(String datasetName, String runName);
47
DatasetRunWithItems getRun(String datasetName, String runName, RequestOptions requestOptions);
48
49
/**
50
* Delete a dataset run and all its items
51
* Irreversible operation
52
*
53
* @param datasetName Name of the dataset
54
* @param runName Name of the run
55
* @param requestOptions Optional request configuration
56
*/
57
DeleteDatasetRunResponse deleteRun(String datasetName, String runName);
58
DeleteDatasetRunResponse deleteRun(String datasetName, String runName, RequestOptions requestOptions);
59
60
/**
61
* Get all runs for a dataset
62
*
63
* @param datasetName Name of the dataset
64
* @param request Optional pagination parameters
65
* @param requestOptions Optional request configuration
66
*/
67
PaginatedDatasetRuns getRuns(String datasetName);
68
PaginatedDatasetRuns getRuns(String datasetName, GetDatasetRunsRequest request);
69
PaginatedDatasetRuns getRuns(String datasetName, GetDatasetRunsRequest request, RequestOptions requestOptions);
70
```
71
72
**Usage Examples:**
73
74
```java
75
import com.langfuse.client.LangfuseClient;
76
import com.langfuse.client.resources.datasets.requests.*;
77
import com.langfuse.client.resources.datasets.types.*;
78
import com.langfuse.client.resources.commons.types.*;
79
import java.util.Map;
80
81
LangfuseClient client = LangfuseClient.builder()
82
.url("https://cloud.langfuse.com")
83
.credentials("pk-lf-...", "sk-lf-...")
84
.build();
85
86
// Create a dataset
87
CreateDatasetRequest createRequest = CreateDatasetRequest.builder()
88
.name("qa-evaluation")
89
.description("Question answering test cases")
90
.metadata(Map.of("domain", "customer-support", "version", "1.0"))
91
.build();
92
93
Dataset dataset = client.datasets().create(createRequest);
94
95
// Get a dataset
96
Dataset retrieved = client.datasets().get("qa-evaluation");
97
98
// List all datasets
99
PaginatedDatasets datasets = client.datasets().list();
100
for (Dataset ds : datasets.getData()) {
101
System.out.println(ds.getName() + ": " + ds.getDescription().orElse(""));
102
}
103
104
// Get runs for a dataset
105
PaginatedDatasetRuns runs = client.datasets().getRuns("qa-evaluation");
106
for (DatasetRun run : runs.getData()) {
107
System.out.println("Run: " + run.getName() + " (" + run.getCreatedAt() + ")");
108
}
109
110
// Get a specific run with items
111
DatasetRunWithItems runDetails = client.datasets()
112
.getRun("qa-evaluation", "eval-2025-10-14");
113
114
System.out.println("Run items: " + runDetails.getDatasetRunItems().size());
115
116
// Delete a run
117
DeleteDatasetRunResponse deleteResp = client.datasets()
118
.deleteRun("qa-evaluation", "old-run");
119
```
120
121
### DatasetItemsClient
122
123
Client for managing dataset items (test cases).
124
125
```java { .api }
126
/**
127
* Create a dataset item
128
*
129
* @param request Item definition with input/output
130
* @param requestOptions Optional request configuration
131
*/
132
DatasetItem create(CreateDatasetItemRequest request);
133
DatasetItem create(CreateDatasetItemRequest request, RequestOptions requestOptions);
134
135
/**
136
* Get a dataset item by ID
137
*
138
* @param id Item ID
139
* @param requestOptions Optional request configuration
140
*/
141
DatasetItem get(String id);
142
DatasetItem get(String id, RequestOptions requestOptions);
143
144
/**
145
* List dataset items
146
*
147
* @param request Optional filters and pagination
148
* @param requestOptions Optional request configuration
149
*/
150
PaginatedDatasetItems list();
151
PaginatedDatasetItems list(GetDatasetItemsRequest request);
152
PaginatedDatasetItems list(GetDatasetItemsRequest request, RequestOptions requestOptions);
153
154
/**
155
* Delete a dataset item and all its run items
156
* Irreversible operation
157
*
158
* @param id Item ID
159
* @param requestOptions Optional request configuration
160
*/
161
DeleteDatasetItemResponse delete(String id);
162
DeleteDatasetItemResponse delete(String id, RequestOptions requestOptions);
163
```
164
165
**Usage Examples:**
166
167
```java
168
import com.langfuse.client.resources.datasetitems.requests.*;
169
import com.langfuse.client.resources.datasetitems.types.*;
170
import com.langfuse.client.resources.commons.types.DatasetStatus;
171
172
// Create dataset items
173
CreateDatasetItemRequest item1 = CreateDatasetItemRequest.builder()
174
.datasetName("qa-evaluation")
175
.input(Map.of("question", "What is the return policy?"))
176
.expectedOutput(Map.of("answer", "30-day money back guarantee"))
177
.metadata(Map.of("category", "returns"))
178
.status(DatasetStatus.ACTIVE)
179
.build();
180
181
DatasetItem created1 = client.datasetItems().create(item1);
182
183
// Create from a trace
184
CreateDatasetItemRequest fromTrace = CreateDatasetItemRequest.builder()
185
.datasetName("qa-evaluation")
186
.sourceTraceId("trace-123")
187
.sourceObservationId("obs-456")
188
.build();
189
190
DatasetItem created2 = client.datasetItems().create(fromTrace);
191
192
// List items for a dataset
193
GetDatasetItemsRequest listRequest = GetDatasetItemsRequest.builder()
194
.datasetName("qa-evaluation")
195
.limit(50)
196
.build();
197
198
PaginatedDatasetItems items = client.datasetItems().list(listRequest);
199
for (DatasetItem item : items.getData()) {
200
System.out.println("Item: " + item.getId());
201
}
202
203
// Get a specific item
204
DatasetItem item = client.datasetItems().get(created1.getId());
205
206
// Delete an item
207
DeleteDatasetItemResponse deleteResp = client.datasetItems().delete(item.getId());
208
```
209
210
### DatasetRunItemsClient
211
212
Client for managing dataset run items (evaluation results).
213
214
```java { .api }
215
/**
216
* Create a dataset run item
217
* Links a dataset item to a trace/observation from an evaluation run
218
*
219
* @param request Run item definition
220
* @param requestOptions Optional request configuration
221
*/
222
DatasetRunItem create(CreateDatasetRunItemRequest request);
223
DatasetRunItem create(CreateDatasetRunItemRequest request, RequestOptions requestOptions);
224
225
/**
226
* List dataset run items
227
*
228
* @param request Filters and pagination
229
* @param requestOptions Optional request configuration
230
*/
231
void list(ListDatasetRunItemsRequest request);
232
void list(ListDatasetRunItemsRequest request, RequestOptions requestOptions);
233
```
234
235
**Usage Examples:**
236
237
```java
238
import com.langfuse.client.resources.datasetrunitems.requests.*;
239
import com.langfuse.client.resources.datasetrunitems.types.*;
240
241
// Create a run item linking dataset item to evaluation result
242
// Note: Staged builder requires runName() -> datasetItemId() in that order, then optional fields
243
CreateDatasetRunItemRequest runItem = CreateDatasetRunItemRequest.builder()
244
.runName("eval-2025-10-14") // Required first: run name
245
.datasetItemId("item-123") // Required second: dataset item ID
246
.runDescription("Automated evaluation with GPT-4") // Optional fields after required ones
247
.traceId("trace-789")
248
.observationId("obs-101")
249
.metadata(Map.of("model", "gpt-4", "temperature", 0.7))
250
.build();
251
252
DatasetRunItem created = client.datasetRunItems().create(runItem);
253
254
// List run items
255
ListDatasetRunItemsRequest listRequest = ListDatasetRunItemsRequest.builder()
256
.datasetId("dataset-id")
257
.runName("eval-2025-10-14")
258
.limit(100)
259
.build();
260
261
client.datasetRunItems().list(listRequest);
262
```
263
264
## Request Types
265
266
### CreateDatasetRequest
267
268
```java { .api }
269
/**
270
* Request for creating a dataset
271
*/
272
public final class CreateDatasetRequest {
273
String getName(); // Dataset name (unique)
274
Optional<String> getDescription(); // Description
275
Optional<Object> getMetadata(); // Custom metadata
276
277
static Builder builder();
278
}
279
```
280
281
### GetDatasetsRequest
282
283
```java { .api }
284
/**
285
* Request parameters for listing datasets
286
*/
287
public final class GetDatasetsRequest {
288
Optional<Integer> getPage(); // Page number (default: 1)
289
Optional<Integer> getLimit(); // Items per page (default: 50)
290
291
static Builder builder();
292
}
293
```
294
295
### GetDatasetRunsRequest
296
297
```java { .api }
298
/**
299
* Request parameters for listing dataset runs
300
*/
301
public final class GetDatasetRunsRequest {
302
Optional<Integer> getPage(); // Page number (default: 1)
303
Optional<Integer> getLimit(); // Items per page (default: 50)
304
305
static Builder builder();
306
}
307
```
308
309
### CreateDatasetItemRequest
310
311
```java { .api }
312
/**
313
* Request for creating a dataset item
314
*/
315
public final class CreateDatasetItemRequest {
316
String getDatasetName(); // Dataset name
317
Optional<Object> getInput(); // Input data
318
Optional<Object> getExpectedOutput(); // Expected output
319
Optional<Object> getMetadata(); // Custom metadata
320
Optional<String> getSourceTraceId(); // Copy from trace
321
Optional<String> getSourceObservationId(); // Copy from observation
322
Optional<DatasetStatus> getStatus(); // ACTIVE or ARCHIVED
323
324
static Builder builder();
325
}
326
```
327
328
### GetDatasetItemsRequest
329
330
```java { .api }
331
/**
332
* Request parameters for listing dataset items
333
*/
334
public final class GetDatasetItemsRequest {
335
Optional<String> getDatasetName(); // Filter by dataset
336
Optional<String> getSourceTraceId(); // Filter by source trace
337
Optional<String> getSourceObservationId(); // Filter by source observation
338
Optional<Integer> getPage(); // Page number (default: 1)
339
Optional<Integer> getLimit(); // Items per page (default: 50)
340
341
static Builder builder();
342
}
343
```
344
345
### CreateDatasetRunItemRequest
346
347
```java { .api }
348
/**
349
* Request for creating a dataset run item
350
*
351
* Staged Builder Pattern (required order):
352
* 1. runName(String) - Run name (required first)
353
* 2. datasetItemId(String) - Dataset item ID (required second)
354
* 3. Optional fields: runDescription, metadata, traceId, observationId
355
* 4. build() - Build the request
356
*/
357
public final class CreateDatasetRunItemRequest {
358
String getRunName(); // Run name
359
Optional<String> getRunDescription(); // Run description
360
Optional<Object> getMetadata(); // Custom metadata
361
String getDatasetItemId(); // Dataset item ID
362
Optional<String> getObservationId(); // Observation from evaluation
363
Optional<String> getTraceId(); // Trace from evaluation
364
365
static RunNameStage builder(); // Returns staged builder starting with runName()
366
}
367
```
368
369
### ListDatasetRunItemsRequest
370
371
```java { .api }
372
/**
373
* Request parameters for listing dataset run items
374
*/
375
public final class ListDatasetRunItemsRequest {
376
Optional<String> getDatasetId(); // Filter by dataset
377
Optional<String> getRunName(); // Filter by run name
378
Optional<Integer> getPage(); // Page number (default: 1)
379
Optional<Integer> getLimit(); // Items per page (default: 50)
380
Optional<String> getResponse(); // Additional response data
381
382
static Builder builder();
383
}
384
```
385
386
## Response Types
387
388
### Dataset
389
390
```java { .api }
391
import java.time.OffsetDateTime;
392
393
/**
394
* Dataset definition
395
*/
396
public final class Dataset {
397
String getId();
398
String getName();
399
Optional<String> getDescription();
400
Optional<Object> getMetadata();
401
String getProjectId();
402
OffsetDateTime getCreatedAt(); // Creation timestamp
403
OffsetDateTime getUpdatedAt(); // Last update timestamp
404
405
static Builder builder();
406
}
407
```
408
409
### PaginatedDatasets
410
411
```java { .api }
412
/**
413
* Paginated list of datasets
414
*/
415
public final class PaginatedDatasets {
416
List<Dataset> getData();
417
MetaResponse getMeta(); // Pagination metadata
418
419
static Builder builder();
420
}
421
```
422
423
### DatasetItem
424
425
```java { .api }
426
import java.time.OffsetDateTime;
427
428
/**
429
* Dataset item (test case)
430
*/
431
public final class DatasetItem {
432
String getId();
433
DatasetStatus getStatus(); // ACTIVE or ARCHIVED
434
Optional<Object> getInput(); // Input data
435
Optional<Object> getExpectedOutput(); // Expected output
436
Optional<Object> getMetadata(); // Custom metadata
437
Optional<String> getSourceTraceId();
438
Optional<String> getSourceObservationId();
439
String getDatasetId();
440
String getDatasetName();
441
OffsetDateTime getCreatedAt(); // Creation timestamp
442
OffsetDateTime getUpdatedAt(); // Last update timestamp
443
444
static Builder builder();
445
}
446
```
447
448
### PaginatedDatasetItems
449
450
```java { .api }
451
/**
452
* Paginated list of dataset items
453
*/
454
public final class PaginatedDatasetItems {
455
List<DatasetItem> getData();
456
MetaResponse getMeta(); // Pagination metadata
457
458
static Builder builder();
459
}
460
```
461
462
### DatasetRun
463
464
```java { .api }
465
import java.time.OffsetDateTime;
466
467
/**
468
* Dataset run (evaluation run)
469
*/
470
public final class DatasetRun {
471
String getId();
472
String getName();
473
Optional<String> getDescription();
474
Optional<Object> getMetadata();
475
String getDatasetId();
476
String getDatasetName();
477
OffsetDateTime getCreatedAt(); // Creation timestamp
478
OffsetDateTime getUpdatedAt(); // Last update timestamp
479
480
static Builder builder();
481
}
482
```
483
484
### DatasetRunWithItems
485
486
```java { .api }
487
import java.time.OffsetDateTime;
488
489
/**
490
* Dataset run with all its items
491
*/
492
public final class DatasetRunWithItems {
493
String getId();
494
String getName();
495
Optional<String> getDescription();
496
Optional<Object> getMetadata();
497
String getDatasetId();
498
String getDatasetName();
499
List<DatasetRunItem> getDatasetRunItems();
500
OffsetDateTime getCreatedAt(); // Creation timestamp
501
OffsetDateTime getUpdatedAt(); // Last update timestamp
502
503
static Builder builder();
504
}
505
```
506
507
### DatasetRunItem
508
509
```java { .api }
510
import java.time.OffsetDateTime;
511
512
/**
513
* Dataset run item (links dataset item to evaluation result)
514
*/
515
public final class DatasetRunItem {
516
String getId();
517
String getDatasetRunId();
518
String getDatasetRunName();
519
String getDatasetItemId();
520
String getTraceId(); // Required trace ID
521
Optional<String> getObservationId();
522
OffsetDateTime getCreatedAt(); // Creation timestamp
523
OffsetDateTime getUpdatedAt(); // Last update timestamp
524
525
static Builder builder();
526
}
527
```
528
529
### PaginatedDatasetRuns
530
531
```java { .api }
532
/**
533
* Paginated list of dataset runs
534
*/
535
public final class PaginatedDatasetRuns {
536
List<DatasetRun> getData();
537
MetaResponse getMeta(); // Pagination metadata
538
539
static Builder builder();
540
}
541
```
542
543
### DeleteDatasetItemResponse
544
545
```java { .api }
546
/**
547
* Response after deleting a dataset item
548
*/
549
public final class DeleteDatasetItemResponse {
550
boolean getSuccess();
551
552
static Builder builder();
553
}
554
```
555
556
### DeleteDatasetRunResponse
557
558
```java { .api }
559
/**
560
* Response after deleting a dataset run
561
*/
562
public final class DeleteDatasetRunResponse {
563
boolean getSuccess();
564
565
static Builder builder();
566
}
567
```
568
569
## Enums
570
571
### DatasetStatus
572
573
```java { .api }
574
/**
575
* Status of a dataset item
576
*/
577
public enum DatasetStatus {
578
ACTIVE, // Active item, included in evaluations
579
ARCHIVED // Archived item, excluded from evaluations
580
}
581
```
582
583
## Complete Dataset Evaluation Example
584
585
```java
586
import com.langfuse.client.LangfuseClient;
587
import com.langfuse.client.resources.datasets.requests.*;
588
import com.langfuse.client.resources.datasets.types.*;
589
import com.langfuse.client.resources.datasetitems.requests.*;
590
import com.langfuse.client.resources.datasetitems.types.*;
591
import com.langfuse.client.resources.datasetrunitems.requests.*;
592
import com.langfuse.client.resources.datasetrunitems.types.*;
593
import com.langfuse.client.resources.commons.types.*;
594
import java.time.LocalDate;
595
import java.util.Map;
596
597
public class DatasetEvaluationExample {
598
public static void main(String[] args) {
599
LangfuseClient client = LangfuseClient.builder()
600
.url("https://cloud.langfuse.com")
601
.credentials("pk-lf-...", "sk-lf-...")
602
.build();
603
604
// 1. Create a dataset
605
CreateDatasetRequest datasetRequest = CreateDatasetRequest.builder()
606
.name("customer-qa-v1")
607
.description("Customer support Q&A test cases")
608
.metadata(Map.of(
609
"domain", "customer-support",
610
"language", "en",
611
"version", "1.0"
612
))
613
.build();
614
615
Dataset dataset = client.datasets().create(datasetRequest);
616
System.out.println("Created dataset: " + dataset.getName());
617
618
// 2. Add test cases to the dataset
619
String[] questions = {
620
"How do I reset my password?",
621
"What is your return policy?",
622
"How long does shipping take?"
623
};
624
625
String[] expectedAnswers = {
626
"Click 'Forgot Password' on the login page",
627
"30-day money-back guarantee on all items",
628
"Standard shipping takes 5-7 business days"
629
};
630
631
for (int i = 0; i < questions.length; i++) {
632
CreateDatasetItemRequest itemRequest = CreateDatasetItemRequest.builder()
633
.datasetName(dataset.getName())
634
.input(Map.of("question", questions[i]))
635
.expectedOutput(Map.of("answer", expectedAnswers[i]))
636
.metadata(Map.of("index", i))
637
.status(DatasetStatus.ACTIVE)
638
.build();
639
640
DatasetItem item = client.datasetItems().create(itemRequest);
641
System.out.println("Created item: " + item.getId());
642
}
643
644
// 3. Run evaluation (simulated)
645
String runName = "eval-" + LocalDate.now();
646
647
GetDatasetItemsRequest listRequest = GetDatasetItemsRequest.builder()
648
.datasetName(dataset.getName())
649
.build();
650
651
PaginatedDatasetItems items = client.datasetItems().list(listRequest);
652
653
System.out.println("\nRunning evaluation...");
654
for (DatasetItem item : items.getData()) {
655
// In real usage, you would:
656
// 1. Get the input from the item
657
// 2. Run your LLM application with that input
658
// 3. Create a trace for the run
659
// 4. Link the trace to the dataset item
660
661
// Simulated trace ID (in real usage, from actual tracing)
662
String traceId = "trace-eval-" + item.getId();
663
664
// Create run item - Note: Staged builder requires runName() -> datasetItemId() first
665
CreateDatasetRunItemRequest runItemRequest = CreateDatasetRunItemRequest.builder()
666
.runName(runName) // Required first: run name
667
.datasetItemId(item.getId()) // Required second: dataset item ID
668
.runDescription("Automated evaluation with GPT-4") // Optional fields
669
.traceId(traceId)
670
.metadata(Map.of(
671
"model", "gpt-4",
672
"temperature", 0.7,
673
"evaluated_at", System.currentTimeMillis()
674
))
675
.build();
676
677
DatasetRunItem runItem = client.datasetRunItems().create(runItemRequest);
678
System.out.println("Created run item for: " + item.getId());
679
}
680
681
// 4. Retrieve run results
682
DatasetRunWithItems runResults = client.datasets()
683
.getRun(dataset.getName(), runName);
684
685
System.out.println("\nEvaluation Results:");
686
System.out.println("Run: " + runResults.getName());
687
System.out.println("Items evaluated: " + runResults.getDatasetRunItems().size());
688
689
for (DatasetRunItem runItem : runResults.getDatasetRunItems()) {
690
System.out.println(" - Item: " + runItem.getDatasetItemId() +
691
" -> Trace: " + runItem.getTraceId().orElse("none"));
692
}
693
694
// 5. List all runs for the dataset
695
PaginatedDatasetRuns runs = client.datasets().getRuns(dataset.getName());
696
System.out.println("\nAll runs for dataset:");
697
for (DatasetRun run : runs.getData()) {
698
System.out.println(" - " + run.getName() + " (" + run.getCreatedAt() + ")");
699
}
700
}
701
}
702
```
703
704
## Best Practices
705
706
1. **Version Datasets**: Use versioned names (e.g., "qa-v1", "qa-v2") for dataset evolution
707
2. **Metadata for Context**: Store rich metadata about test cases (category, difficulty, etc.)
708
3. **Archive Old Items**: Use DatasetStatus.ARCHIVED instead of deleting items
709
4. **Run Naming Convention**: Use consistent run names (e.g., "eval-YYYY-MM-DD-HHmm")
710
5. **Link to Production**: Create dataset items from production traces using sourceTraceId
711
6. **Batch Evaluations**: Process dataset items in batches for efficiency
712
7. **Track Metrics**: Store evaluation metrics in run item metadata
713
8. **Compare Runs**: Use multiple runs to compare different model versions or parameters
714
715
## Related Documentation
716
717
- [Traces and Observations](./traces-observations.md) - Linking evaluation results
718
- [Scores](./scores.md) - Scoring evaluation results
719
- [Common Types](./common-types.md) - Shared type definitions
720
- [Pagination](./pagination.md) - Pagination utilities
721