or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

client-configuration.mdcomments-annotations.mdcommon-types.mddatasets.mdexceptions.mdhealth.mdindex.mdingestion.mdmedia.mdmetrics.mdmodels.mdpagination.mdprojects-organizations.mdprompts.mdscim.mdscores.mdsessions.mdtraces-observations.md

datasets.mddocs/

0

# Datasets

1

2

The Datasets API provides management of datasets for evaluation and testing. Datasets contain items (test cases) and runs (evaluation results). This enables systematic testing and evaluation of LLM applications.

3

4

## Capabilities

5

6

### DatasetsClient

7

8

Client for managing datasets and dataset runs.

9

10

```java { .api }

11

/**

12

* List all datasets

13

*

14

* @param request Optional pagination parameters

15

* @param requestOptions Optional request configuration

16

*/

17

PaginatedDatasets list();

18

PaginatedDatasets list(GetDatasetsRequest request);

19

PaginatedDatasets list(GetDatasetsRequest request, RequestOptions requestOptions);

20

21

/**

22

* Get a dataset by name

23

*

24

* @param datasetName Name of the dataset

25

* @param requestOptions Optional request configuration

26

*/

27

Dataset get(String datasetName);

28

Dataset get(String datasetName, RequestOptions requestOptions);

29

30

/**

31

* Create a new dataset

32

*

33

* @param request Dataset definition

34

* @param requestOptions Optional request configuration

35

*/

36

Dataset create(CreateDatasetRequest request);

37

Dataset create(CreateDatasetRequest request, RequestOptions requestOptions);

38

39

/**

40

* Get a dataset run with all its items

41

*

42

* @param datasetName Name of the dataset

43

* @param runName Name of the run

44

* @param requestOptions Optional request configuration

45

*/

46

DatasetRunWithItems getRun(String datasetName, String runName);

47

DatasetRunWithItems getRun(String datasetName, String runName, RequestOptions requestOptions);

48

49

/**

50

* Delete a dataset run and all its items

51

* Irreversible operation

52

*

53

* @param datasetName Name of the dataset

54

* @param runName Name of the run

55

* @param requestOptions Optional request configuration

56

*/

57

DeleteDatasetRunResponse deleteRun(String datasetName, String runName);

58

DeleteDatasetRunResponse deleteRun(String datasetName, String runName, RequestOptions requestOptions);

59

60

/**

61

* Get all runs for a dataset

62

*

63

* @param datasetName Name of the dataset

64

* @param request Optional pagination parameters

65

* @param requestOptions Optional request configuration

66

*/

67

PaginatedDatasetRuns getRuns(String datasetName);

68

PaginatedDatasetRuns getRuns(String datasetName, GetDatasetRunsRequest request);

69

PaginatedDatasetRuns getRuns(String datasetName, GetDatasetRunsRequest request, RequestOptions requestOptions);

70

```

71

72

**Usage Examples:**

73

74

```java

75

import com.langfuse.client.LangfuseClient;

76

import com.langfuse.client.resources.datasets.requests.*;

77

import com.langfuse.client.resources.datasets.types.*;

78

import com.langfuse.client.resources.commons.types.*;

79

import java.util.Map;

80

81

LangfuseClient client = LangfuseClient.builder()

82

.url("https://cloud.langfuse.com")

83

.credentials("pk-lf-...", "sk-lf-...")

84

.build();

85

86

// Create a dataset

87

CreateDatasetRequest createRequest = CreateDatasetRequest.builder()

88

.name("qa-evaluation")

89

.description("Question answering test cases")

90

.metadata(Map.of("domain", "customer-support", "version", "1.0"))

91

.build();

92

93

Dataset dataset = client.datasets().create(createRequest);

94

95

// Get a dataset

96

Dataset retrieved = client.datasets().get("qa-evaluation");

97

98

// List all datasets

99

PaginatedDatasets datasets = client.datasets().list();

100

for (Dataset ds : datasets.getData()) {

101

System.out.println(ds.getName() + ": " + ds.getDescription().orElse(""));

102

}

103

104

// Get runs for a dataset

105

PaginatedDatasetRuns runs = client.datasets().getRuns("qa-evaluation");

106

for (DatasetRun run : runs.getData()) {

107

System.out.println("Run: " + run.getName() + " (" + run.getCreatedAt() + ")");

108

}

109

110

// Get a specific run with items

111

DatasetRunWithItems runDetails = client.datasets()

112

.getRun("qa-evaluation", "eval-2025-10-14");

113

114

System.out.println("Run items: " + runDetails.getDatasetRunItems().size());

115

116

// Delete a run

117

DeleteDatasetRunResponse deleteResp = client.datasets()

118

.deleteRun("qa-evaluation", "old-run");

119

```

120

121

### DatasetItemsClient

122

123

Client for managing dataset items (test cases).

124

125

```java { .api }

126

/**

127

* Create a dataset item

128

*

129

* @param request Item definition with input/output

130

* @param requestOptions Optional request configuration

131

*/

132

DatasetItem create(CreateDatasetItemRequest request);

133

DatasetItem create(CreateDatasetItemRequest request, RequestOptions requestOptions);

134

135

/**

136

* Get a dataset item by ID

137

*

138

* @param id Item ID

139

* @param requestOptions Optional request configuration

140

*/

141

DatasetItem get(String id);

142

DatasetItem get(String id, RequestOptions requestOptions);

143

144

/**

145

* List dataset items

146

*

147

* @param request Optional filters and pagination

148

* @param requestOptions Optional request configuration

149

*/

150

PaginatedDatasetItems list();

151

PaginatedDatasetItems list(GetDatasetItemsRequest request);

152

PaginatedDatasetItems list(GetDatasetItemsRequest request, RequestOptions requestOptions);

153

154

/**

155

* Delete a dataset item and all its run items

156

* Irreversible operation

157

*

158

* @param id Item ID

159

* @param requestOptions Optional request configuration

160

*/

161

DeleteDatasetItemResponse delete(String id);

162

DeleteDatasetItemResponse delete(String id, RequestOptions requestOptions);

163

```

164

165

**Usage Examples:**

166

167

```java

168

import com.langfuse.client.resources.datasetitems.requests.*;

169

import com.langfuse.client.resources.datasetitems.types.*;

170

import com.langfuse.client.resources.commons.types.DatasetStatus;

171

172

// Create dataset items

173

CreateDatasetItemRequest item1 = CreateDatasetItemRequest.builder()

174

.datasetName("qa-evaluation")

175

.input(Map.of("question", "What is the return policy?"))

176

.expectedOutput(Map.of("answer", "30-day money back guarantee"))

177

.metadata(Map.of("category", "returns"))

178

.status(DatasetStatus.ACTIVE)

179

.build();

180

181

DatasetItem created1 = client.datasetItems().create(item1);

182

183

// Create from a trace

184

CreateDatasetItemRequest fromTrace = CreateDatasetItemRequest.builder()

185

.datasetName("qa-evaluation")

186

.sourceTraceId("trace-123")

187

.sourceObservationId("obs-456")

188

.build();

189

190

DatasetItem created2 = client.datasetItems().create(fromTrace);

191

192

// List items for a dataset

193

GetDatasetItemsRequest listRequest = GetDatasetItemsRequest.builder()

194

.datasetName("qa-evaluation")

195

.limit(50)

196

.build();

197

198

PaginatedDatasetItems items = client.datasetItems().list(listRequest);

199

for (DatasetItem item : items.getData()) {

200

System.out.println("Item: " + item.getId());

201

}

202

203

// Get a specific item

204

DatasetItem item = client.datasetItems().get(created1.getId());

205

206

// Delete an item

207

DeleteDatasetItemResponse deleteResp = client.datasetItems().delete(item.getId());

208

```

209

210

### DatasetRunItemsClient

211

212

Client for managing dataset run items (evaluation results).

213

214

```java { .api }

215

/**

216

* Create a dataset run item

217

* Links a dataset item to a trace/observation from an evaluation run

218

*

219

* @param request Run item definition

220

* @param requestOptions Optional request configuration

221

*/

222

DatasetRunItem create(CreateDatasetRunItemRequest request);

223

DatasetRunItem create(CreateDatasetRunItemRequest request, RequestOptions requestOptions);

224

225

/**

226

* List dataset run items

227

*

228

* @param request Filters and pagination

229

* @param requestOptions Optional request configuration

230

*/

231

void list(ListDatasetRunItemsRequest request);

232

void list(ListDatasetRunItemsRequest request, RequestOptions requestOptions);

233

```

234

235

**Usage Examples:**

236

237

```java

238

import com.langfuse.client.resources.datasetrunitems.requests.*;

239

import com.langfuse.client.resources.datasetrunitems.types.*;

240

241

// Create a run item linking dataset item to evaluation result

242

// Note: Staged builder requires runName() -> datasetItemId() in that order, then optional fields

243

CreateDatasetRunItemRequest runItem = CreateDatasetRunItemRequest.builder()

244

.runName("eval-2025-10-14") // Required first: run name

245

.datasetItemId("item-123") // Required second: dataset item ID

246

.runDescription("Automated evaluation with GPT-4") // Optional fields after required ones

247

.traceId("trace-789")

248

.observationId("obs-101")

249

.metadata(Map.of("model", "gpt-4", "temperature", 0.7))

250

.build();

251

252

DatasetRunItem created = client.datasetRunItems().create(runItem);

253

254

// List run items

255

ListDatasetRunItemsRequest listRequest = ListDatasetRunItemsRequest.builder()

256

.datasetId("dataset-id")

257

.runName("eval-2025-10-14")

258

.limit(100)

259

.build();

260

261

client.datasetRunItems().list(listRequest);

262

```

263

264

## Request Types

265

266

### CreateDatasetRequest

267

268

```java { .api }

269

/**

270

* Request for creating a dataset

271

*/

272

public final class CreateDatasetRequest {

273

String getName(); // Dataset name (unique)

274

Optional<String> getDescription(); // Description

275

Optional<Object> getMetadata(); // Custom metadata

276

277

static Builder builder();

278

}

279

```

280

281

### GetDatasetsRequest

282

283

```java { .api }

284

/**

285

* Request parameters for listing datasets

286

*/

287

public final class GetDatasetsRequest {

288

Optional<Integer> getPage(); // Page number (default: 1)

289

Optional<Integer> getLimit(); // Items per page (default: 50)

290

291

static Builder builder();

292

}

293

```

294

295

### GetDatasetRunsRequest

296

297

```java { .api }

298

/**

299

* Request parameters for listing dataset runs

300

*/

301

public final class GetDatasetRunsRequest {

302

Optional<Integer> getPage(); // Page number (default: 1)

303

Optional<Integer> getLimit(); // Items per page (default: 50)

304

305

static Builder builder();

306

}

307

```

308

309

### CreateDatasetItemRequest

310

311

```java { .api }

312

/**

313

* Request for creating a dataset item

314

*/

315

public final class CreateDatasetItemRequest {

316

String getDatasetName(); // Dataset name

317

Optional<Object> getInput(); // Input data

318

Optional<Object> getExpectedOutput(); // Expected output

319

Optional<Object> getMetadata(); // Custom metadata

320

Optional<String> getSourceTraceId(); // Copy from trace

321

Optional<String> getSourceObservationId(); // Copy from observation

322

Optional<DatasetStatus> getStatus(); // ACTIVE or ARCHIVED

323

324

static Builder builder();

325

}

326

```

327

328

### GetDatasetItemsRequest

329

330

```java { .api }

331

/**

332

* Request parameters for listing dataset items

333

*/

334

public final class GetDatasetItemsRequest {

335

Optional<String> getDatasetName(); // Filter by dataset

336

Optional<String> getSourceTraceId(); // Filter by source trace

337

Optional<String> getSourceObservationId(); // Filter by source observation

338

Optional<Integer> getPage(); // Page number (default: 1)

339

Optional<Integer> getLimit(); // Items per page (default: 50)

340

341

static Builder builder();

342

}

343

```

344

345

### CreateDatasetRunItemRequest

346

347

```java { .api }

348

/**

349

* Request for creating a dataset run item

350

*

351

* Staged Builder Pattern (required order):

352

* 1. runName(String) - Run name (required first)

353

* 2. datasetItemId(String) - Dataset item ID (required second)

354

* 3. Optional fields: runDescription, metadata, traceId, observationId

355

* 4. build() - Build the request

356

*/

357

public final class CreateDatasetRunItemRequest {

358

String getRunName(); // Run name

359

Optional<String> getRunDescription(); // Run description

360

Optional<Object> getMetadata(); // Custom metadata

361

String getDatasetItemId(); // Dataset item ID

362

Optional<String> getObservationId(); // Observation from evaluation

363

Optional<String> getTraceId(); // Trace from evaluation

364

365

static RunNameStage builder(); // Returns staged builder starting with runName()

366

}

367

```

368

369

### ListDatasetRunItemsRequest

370

371

```java { .api }

372

/**

373

* Request parameters for listing dataset run items

374

*/

375

public final class ListDatasetRunItemsRequest {

376

Optional<String> getDatasetId(); // Filter by dataset

377

Optional<String> getRunName(); // Filter by run name

378

Optional<Integer> getPage(); // Page number (default: 1)

379

Optional<Integer> getLimit(); // Items per page (default: 50)

380

Optional<String> getResponse(); // Additional response data

381

382

static Builder builder();

383

}

384

```

385

386

## Response Types

387

388

### Dataset

389

390

```java { .api }

391

import java.time.OffsetDateTime;

392

393

/**

394

* Dataset definition

395

*/

396

public final class Dataset {

397

String getId();

398

String getName();

399

Optional<String> getDescription();

400

Optional<Object> getMetadata();

401

String getProjectId();

402

OffsetDateTime getCreatedAt(); // Creation timestamp

403

OffsetDateTime getUpdatedAt(); // Last update timestamp

404

405

static Builder builder();

406

}

407

```

408

409

### PaginatedDatasets

410

411

```java { .api }

412

/**

413

* Paginated list of datasets

414

*/

415

public final class PaginatedDatasets {

416

List<Dataset> getData();

417

MetaResponse getMeta(); // Pagination metadata

418

419

static Builder builder();

420

}

421

```

422

423

### DatasetItem

424

425

```java { .api }

426

import java.time.OffsetDateTime;

427

428

/**

429

* Dataset item (test case)

430

*/

431

public final class DatasetItem {

432

String getId();

433

DatasetStatus getStatus(); // ACTIVE or ARCHIVED

434

Optional<Object> getInput(); // Input data

435

Optional<Object> getExpectedOutput(); // Expected output

436

Optional<Object> getMetadata(); // Custom metadata

437

Optional<String> getSourceTraceId();

438

Optional<String> getSourceObservationId();

439

String getDatasetId();

440

String getDatasetName();

441

OffsetDateTime getCreatedAt(); // Creation timestamp

442

OffsetDateTime getUpdatedAt(); // Last update timestamp

443

444

static Builder builder();

445

}

446

```

447

448

### PaginatedDatasetItems

449

450

```java { .api }

451

/**

452

* Paginated list of dataset items

453

*/

454

public final class PaginatedDatasetItems {

455

List<DatasetItem> getData();

456

MetaResponse getMeta(); // Pagination metadata

457

458

static Builder builder();

459

}

460

```

461

462

### DatasetRun

463

464

```java { .api }

465

import java.time.OffsetDateTime;

466

467

/**

468

* Dataset run (evaluation run)

469

*/

470

public final class DatasetRun {

471

String getId();

472

String getName();

473

Optional<String> getDescription();

474

Optional<Object> getMetadata();

475

String getDatasetId();

476

String getDatasetName();

477

OffsetDateTime getCreatedAt(); // Creation timestamp

478

OffsetDateTime getUpdatedAt(); // Last update timestamp

479

480

static Builder builder();

481

}

482

```

483

484

### DatasetRunWithItems

485

486

```java { .api }

487

import java.time.OffsetDateTime;

488

489

/**

490

* Dataset run with all its items

491

*/

492

public final class DatasetRunWithItems {

493

String getId();

494

String getName();

495

Optional<String> getDescription();

496

Optional<Object> getMetadata();

497

String getDatasetId();

498

String getDatasetName();

499

List<DatasetRunItem> getDatasetRunItems();

500

OffsetDateTime getCreatedAt(); // Creation timestamp

501

OffsetDateTime getUpdatedAt(); // Last update timestamp

502

503

static Builder builder();

504

}

505

```

506

507

### DatasetRunItem

508

509

```java { .api }

510

import java.time.OffsetDateTime;

511

512

/**

513

* Dataset run item (links dataset item to evaluation result)

514

*/

515

public final class DatasetRunItem {

516

String getId();

517

String getDatasetRunId();

518

String getDatasetRunName();

519

String getDatasetItemId();

520

String getTraceId(); // Required trace ID

521

Optional<String> getObservationId();

522

OffsetDateTime getCreatedAt(); // Creation timestamp

523

OffsetDateTime getUpdatedAt(); // Last update timestamp

524

525

static Builder builder();

526

}

527

```

528

529

### PaginatedDatasetRuns

530

531

```java { .api }

532

/**

533

* Paginated list of dataset runs

534

*/

535

public final class PaginatedDatasetRuns {

536

List<DatasetRun> getData();

537

MetaResponse getMeta(); // Pagination metadata

538

539

static Builder builder();

540

}

541

```

542

543

### DeleteDatasetItemResponse

544

545

```java { .api }

546

/**

547

* Response after deleting a dataset item

548

*/

549

public final class DeleteDatasetItemResponse {

550

boolean getSuccess();

551

552

static Builder builder();

553

}

554

```

555

556

### DeleteDatasetRunResponse

557

558

```java { .api }

559

/**

560

* Response after deleting a dataset run

561

*/

562

public final class DeleteDatasetRunResponse {

563

boolean getSuccess();

564

565

static Builder builder();

566

}

567

```

568

569

## Enums

570

571

### DatasetStatus

572

573

```java { .api }

574

/**

575

* Status of a dataset item

576

*/

577

public enum DatasetStatus {

578

ACTIVE, // Active item, included in evaluations

579

ARCHIVED // Archived item, excluded from evaluations

580

}

581

```

582

583

## Complete Dataset Evaluation Example

584

585

```java

586

import com.langfuse.client.LangfuseClient;

587

import com.langfuse.client.resources.datasets.requests.*;

588

import com.langfuse.client.resources.datasets.types.*;

589

import com.langfuse.client.resources.datasetitems.requests.*;

590

import com.langfuse.client.resources.datasetitems.types.*;

591

import com.langfuse.client.resources.datasetrunitems.requests.*;

592

import com.langfuse.client.resources.datasetrunitems.types.*;

593

import com.langfuse.client.resources.commons.types.*;

594

import java.time.LocalDate;

595

import java.util.Map;

596

597

public class DatasetEvaluationExample {

598

public static void main(String[] args) {

599

LangfuseClient client = LangfuseClient.builder()

600

.url("https://cloud.langfuse.com")

601

.credentials("pk-lf-...", "sk-lf-...")

602

.build();

603

604

// 1. Create a dataset

605

CreateDatasetRequest datasetRequest = CreateDatasetRequest.builder()

606

.name("customer-qa-v1")

607

.description("Customer support Q&A test cases")

608

.metadata(Map.of(

609

"domain", "customer-support",

610

"language", "en",

611

"version", "1.0"

612

))

613

.build();

614

615

Dataset dataset = client.datasets().create(datasetRequest);

616

System.out.println("Created dataset: " + dataset.getName());

617

618

// 2. Add test cases to the dataset

619

String[] questions = {

620

"How do I reset my password?",

621

"What is your return policy?",

622

"How long does shipping take?"

623

};

624

625

String[] expectedAnswers = {

626

"Click 'Forgot Password' on the login page",

627

"30-day money-back guarantee on all items",

628

"Standard shipping takes 5-7 business days"

629

};

630

631

for (int i = 0; i < questions.length; i++) {

632

CreateDatasetItemRequest itemRequest = CreateDatasetItemRequest.builder()

633

.datasetName(dataset.getName())

634

.input(Map.of("question", questions[i]))

635

.expectedOutput(Map.of("answer", expectedAnswers[i]))

636

.metadata(Map.of("index", i))

637

.status(DatasetStatus.ACTIVE)

638

.build();

639

640

DatasetItem item = client.datasetItems().create(itemRequest);

641

System.out.println("Created item: " + item.getId());

642

}

643

644

// 3. Run evaluation (simulated)

645

String runName = "eval-" + LocalDate.now();

646

647

GetDatasetItemsRequest listRequest = GetDatasetItemsRequest.builder()

648

.datasetName(dataset.getName())

649

.build();

650

651

PaginatedDatasetItems items = client.datasetItems().list(listRequest);

652

653

System.out.println("\nRunning evaluation...");

654

for (DatasetItem item : items.getData()) {

655

// In real usage, you would:

656

// 1. Get the input from the item

657

// 2. Run your LLM application with that input

658

// 3. Create a trace for the run

659

// 4. Link the trace to the dataset item

660

661

// Simulated trace ID (in real usage, from actual tracing)

662

String traceId = "trace-eval-" + item.getId();

663

664

// Create run item - Note: Staged builder requires runName() -> datasetItemId() first

665

CreateDatasetRunItemRequest runItemRequest = CreateDatasetRunItemRequest.builder()

666

.runName(runName) // Required first: run name

667

.datasetItemId(item.getId()) // Required second: dataset item ID

668

.runDescription("Automated evaluation with GPT-4") // Optional fields

669

.traceId(traceId)

670

.metadata(Map.of(

671

"model", "gpt-4",

672

"temperature", 0.7,

673

"evaluated_at", System.currentTimeMillis()

674

))

675

.build();

676

677

DatasetRunItem runItem = client.datasetRunItems().create(runItemRequest);

678

System.out.println("Created run item for: " + item.getId());

679

}

680

681

// 4. Retrieve run results

682

DatasetRunWithItems runResults = client.datasets()

683

.getRun(dataset.getName(), runName);

684

685

System.out.println("\nEvaluation Results:");

686

System.out.println("Run: " + runResults.getName());

687

System.out.println("Items evaluated: " + runResults.getDatasetRunItems().size());

688

689

for (DatasetRunItem runItem : runResults.getDatasetRunItems()) {

690

System.out.println(" - Item: " + runItem.getDatasetItemId() +

691

" -> Trace: " + runItem.getTraceId().orElse("none"));

692

}

693

694

// 5. List all runs for the dataset

695

PaginatedDatasetRuns runs = client.datasets().getRuns(dataset.getName());

696

System.out.println("\nAll runs for dataset:");

697

for (DatasetRun run : runs.getData()) {

698

System.out.println(" - " + run.getName() + " (" + run.getCreatedAt() + ")");

699

}

700

}

701

}

702

```

703

704

## Best Practices

705

706

1. **Version Datasets**: Use versioned names (e.g., "qa-v1", "qa-v2") for dataset evolution

707

2. **Metadata for Context**: Store rich metadata about test cases (category, difficulty, etc.)

708

3. **Archive Old Items**: Use DatasetStatus.ARCHIVED instead of deleting items

709

4. **Run Naming Convention**: Use consistent run names (e.g., "eval-YYYY-MM-DD-HHmm")

710

5. **Link to Production**: Create dataset items from production traces using sourceTraceId

711

6. **Batch Evaluations**: Process dataset items in batches for efficiency

712

7. **Track Metrics**: Store evaluation metrics in run item metadata

713

8. **Compare Runs**: Use multiple runs to compare different model versions or parameters

714

715

## Related Documentation

716

717

- [Traces and Observations](./traces-observations.md) - Linking evaluation results

718

- [Scores](./scores.md) - Scoring evaluation results

719

- [Common Types](./common-types.md) - Shared type definitions

720

- [Pagination](./pagination.md) - Pagination utilities

721