CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-com-langfuse--langfuse-java

Java client for the Langfuse API providing access to observability and analytics features for LLM applications

Overview
Eval results
Files

datasets.mddocs/

Datasets

The Datasets API provides management of datasets for evaluation and testing. Datasets contain items (test cases) and runs (evaluation results). This enables systematic testing and evaluation of LLM applications.

Capabilities

DatasetsClient

Client for managing datasets and dataset runs.

/**
 * List all datasets
 *
 * @param request Optional pagination parameters
 * @param requestOptions Optional request configuration
 */
PaginatedDatasets list();
PaginatedDatasets list(GetDatasetsRequest request);
PaginatedDatasets list(GetDatasetsRequest request, RequestOptions requestOptions);

/**
 * Get a dataset by name
 *
 * @param datasetName Name of the dataset
 * @param requestOptions Optional request configuration
 */
Dataset get(String datasetName);
Dataset get(String datasetName, RequestOptions requestOptions);

/**
 * Create a new dataset
 *
 * @param request Dataset definition
 * @param requestOptions Optional request configuration
 */
Dataset create(CreateDatasetRequest request);
Dataset create(CreateDatasetRequest request, RequestOptions requestOptions);

/**
 * Get a dataset run with all its items
 *
 * @param datasetName Name of the dataset
 * @param runName Name of the run
 * @param requestOptions Optional request configuration
 */
DatasetRunWithItems getRun(String datasetName, String runName);
DatasetRunWithItems getRun(String datasetName, String runName, RequestOptions requestOptions);

/**
 * Delete a dataset run and all its items
 * Irreversible operation
 *
 * @param datasetName Name of the dataset
 * @param runName Name of the run
 * @param requestOptions Optional request configuration
 */
DeleteDatasetRunResponse deleteRun(String datasetName, String runName);
DeleteDatasetRunResponse deleteRun(String datasetName, String runName, RequestOptions requestOptions);

/**
 * Get all runs for a dataset
 *
 * @param datasetName Name of the dataset
 * @param request Optional pagination parameters
 * @param requestOptions Optional request configuration
 */
PaginatedDatasetRuns getRuns(String datasetName);
PaginatedDatasetRuns getRuns(String datasetName, GetDatasetRunsRequest request);
PaginatedDatasetRuns getRuns(String datasetName, GetDatasetRunsRequest request, RequestOptions requestOptions);

Usage Examples:

import com.langfuse.client.LangfuseClient;
import com.langfuse.client.resources.datasets.requests.*;
import com.langfuse.client.resources.datasets.types.*;
import com.langfuse.client.resources.commons.types.*;
import java.util.Map;

LangfuseClient client = LangfuseClient.builder()
    .url("https://cloud.langfuse.com")
    .credentials("pk-lf-...", "sk-lf-...")
    .build();

// Create a dataset
CreateDatasetRequest createRequest = CreateDatasetRequest.builder()
    .name("qa-evaluation")
    .description("Question answering test cases")
    .metadata(Map.of("domain", "customer-support", "version", "1.0"))
    .build();

Dataset dataset = client.datasets().create(createRequest);

// Get a dataset
Dataset retrieved = client.datasets().get("qa-evaluation");

// List all datasets
PaginatedDatasets datasets = client.datasets().list();
for (Dataset ds : datasets.getData()) {
    System.out.println(ds.getName() + ": " + ds.getDescription().orElse(""));
}

// Get runs for a dataset
PaginatedDatasetRuns runs = client.datasets().getRuns("qa-evaluation");
for (DatasetRun run : runs.getData()) {
    System.out.println("Run: " + run.getName() + " (" + run.getCreatedAt() + ")");
}

// Get a specific run with items
DatasetRunWithItems runDetails = client.datasets()
    .getRun("qa-evaluation", "eval-2025-10-14");

System.out.println("Run items: " + runDetails.getDatasetRunItems().size());

// Delete a run
DeleteDatasetRunResponse deleteResp = client.datasets()
    .deleteRun("qa-evaluation", "old-run");

DatasetItemsClient

Client for managing dataset items (test cases).

/**
 * Create a dataset item
 *
 * @param request Item definition with input/output
 * @param requestOptions Optional request configuration
 */
DatasetItem create(CreateDatasetItemRequest request);
DatasetItem create(CreateDatasetItemRequest request, RequestOptions requestOptions);

/**
 * Get a dataset item by ID
 *
 * @param id Item ID
 * @param requestOptions Optional request configuration
 */
DatasetItem get(String id);
DatasetItem get(String id, RequestOptions requestOptions);

/**
 * List dataset items
 *
 * @param request Optional filters and pagination
 * @param requestOptions Optional request configuration
 */
PaginatedDatasetItems list();
PaginatedDatasetItems list(GetDatasetItemsRequest request);
PaginatedDatasetItems list(GetDatasetItemsRequest request, RequestOptions requestOptions);

/**
 * Delete a dataset item and all its run items
 * Irreversible operation
 *
 * @param id Item ID
 * @param requestOptions Optional request configuration
 */
DeleteDatasetItemResponse delete(String id);
DeleteDatasetItemResponse delete(String id, RequestOptions requestOptions);

Usage Examples:

import com.langfuse.client.resources.datasetitems.requests.*;
import com.langfuse.client.resources.datasetitems.types.*;
import com.langfuse.client.resources.commons.types.DatasetStatus;

// Create dataset items
CreateDatasetItemRequest item1 = CreateDatasetItemRequest.builder()
    .datasetName("qa-evaluation")
    .input(Map.of("question", "What is the return policy?"))
    .expectedOutput(Map.of("answer", "30-day money back guarantee"))
    .metadata(Map.of("category", "returns"))
    .status(DatasetStatus.ACTIVE)
    .build();

DatasetItem created1 = client.datasetItems().create(item1);

// Create from a trace
CreateDatasetItemRequest fromTrace = CreateDatasetItemRequest.builder()
    .datasetName("qa-evaluation")
    .sourceTraceId("trace-123")
    .sourceObservationId("obs-456")
    .build();

DatasetItem created2 = client.datasetItems().create(fromTrace);

// List items for a dataset
GetDatasetItemsRequest listRequest = GetDatasetItemsRequest.builder()
    .datasetName("qa-evaluation")
    .limit(50)
    .build();

PaginatedDatasetItems items = client.datasetItems().list(listRequest);
for (DatasetItem item : items.getData()) {
    System.out.println("Item: " + item.getId());
}

// Get a specific item
DatasetItem item = client.datasetItems().get(created1.getId());

// Delete an item
DeleteDatasetItemResponse deleteResp = client.datasetItems().delete(item.getId());

DatasetRunItemsClient

Client for managing dataset run items (evaluation results).

/**
 * Create a dataset run item
 * Links a dataset item to a trace/observation from an evaluation run
 *
 * @param request Run item definition
 * @param requestOptions Optional request configuration
 */
DatasetRunItem create(CreateDatasetRunItemRequest request);
DatasetRunItem create(CreateDatasetRunItemRequest request, RequestOptions requestOptions);

/**
 * List dataset run items
 *
 * @param request Filters and pagination
 * @param requestOptions Optional request configuration
 */
void list(ListDatasetRunItemsRequest request);
void list(ListDatasetRunItemsRequest request, RequestOptions requestOptions);

Usage Examples:

import com.langfuse.client.resources.datasetrunitems.requests.*;
import com.langfuse.client.resources.datasetrunitems.types.*;

// Create a run item linking dataset item to evaluation result
// Note: Staged builder requires runName() -> datasetItemId() in that order, then optional fields
CreateDatasetRunItemRequest runItem = CreateDatasetRunItemRequest.builder()
    .runName("eval-2025-10-14")  // Required first: run name
    .datasetItemId("item-123")  // Required second: dataset item ID
    .runDescription("Automated evaluation with GPT-4")  // Optional fields after required ones
    .traceId("trace-789")
    .observationId("obs-101")
    .metadata(Map.of("model", "gpt-4", "temperature", 0.7))
    .build();

DatasetRunItem created = client.datasetRunItems().create(runItem);

// List run items
ListDatasetRunItemsRequest listRequest = ListDatasetRunItemsRequest.builder()
    .datasetId("dataset-id")
    .runName("eval-2025-10-14")
    .limit(100)
    .build();

client.datasetRunItems().list(listRequest);

Request Types

CreateDatasetRequest

/**
 * Request for creating a dataset
 */
public final class CreateDatasetRequest {
    String getName();                   // Dataset name (unique)
    Optional<String> getDescription();  // Description
    Optional<Object> getMetadata();     // Custom metadata

    static Builder builder();
}

GetDatasetsRequest

/**
 * Request parameters for listing datasets
 */
public final class GetDatasetsRequest {
    Optional<Integer> getPage();   // Page number (default: 1)
    Optional<Integer> getLimit();  // Items per page (default: 50)

    static Builder builder();
}

GetDatasetRunsRequest

/**
 * Request parameters for listing dataset runs
 */
public final class GetDatasetRunsRequest {
    Optional<Integer> getPage();   // Page number (default: 1)
    Optional<Integer> getLimit();  // Items per page (default: 50)

    static Builder builder();
}

CreateDatasetItemRequest

/**
 * Request for creating a dataset item
 */
public final class CreateDatasetItemRequest {
    String getDatasetName();                  // Dataset name
    Optional<Object> getInput();              // Input data
    Optional<Object> getExpectedOutput();     // Expected output
    Optional<Object> getMetadata();           // Custom metadata
    Optional<String> getSourceTraceId();      // Copy from trace
    Optional<String> getSourceObservationId(); // Copy from observation
    Optional<DatasetStatus> getStatus();      // ACTIVE or ARCHIVED

    static Builder builder();
}

GetDatasetItemsRequest

/**
 * Request parameters for listing dataset items
 */
public final class GetDatasetItemsRequest {
    Optional<String> getDatasetName();        // Filter by dataset
    Optional<String> getSourceTraceId();      // Filter by source trace
    Optional<String> getSourceObservationId(); // Filter by source observation
    Optional<Integer> getPage();              // Page number (default: 1)
    Optional<Integer> getLimit();             // Items per page (default: 50)

    static Builder builder();
}

CreateDatasetRunItemRequest

/**
 * Request for creating a dataset run item
 *
 * Staged Builder Pattern (required order):
 * 1. runName(String) - Run name (required first)
 * 2. datasetItemId(String) - Dataset item ID (required second)
 * 3. Optional fields: runDescription, metadata, traceId, observationId
 * 4. build() - Build the request
 */
public final class CreateDatasetRunItemRequest {
    String getRunName();                    // Run name
    Optional<String> getRunDescription();   // Run description
    Optional<Object> getMetadata();         // Custom metadata
    String getDatasetItemId();              // Dataset item ID
    Optional<String> getObservationId();    // Observation from evaluation
    Optional<String> getTraceId();          // Trace from evaluation

    static RunNameStage builder();  // Returns staged builder starting with runName()
}

ListDatasetRunItemsRequest

/**
 * Request parameters for listing dataset run items
 */
public final class ListDatasetRunItemsRequest {
    Optional<String> getDatasetId();  // Filter by dataset
    Optional<String> getRunName();    // Filter by run name
    Optional<Integer> getPage();      // Page number (default: 1)
    Optional<Integer> getLimit();     // Items per page (default: 50)
    Optional<String> getResponse();   // Additional response data

    static Builder builder();
}

Response Types

Dataset

import java.time.OffsetDateTime;

/**
 * Dataset definition
 */
public final class Dataset {
    String getId();
    String getName();
    Optional<String> getDescription();
    Optional<Object> getMetadata();
    String getProjectId();
    OffsetDateTime getCreatedAt();  // Creation timestamp
    OffsetDateTime getUpdatedAt();  // Last update timestamp

    static Builder builder();
}

PaginatedDatasets

/**
 * Paginated list of datasets
 */
public final class PaginatedDatasets {
    List<Dataset> getData();
    MetaResponse getMeta();  // Pagination metadata

    static Builder builder();
}

DatasetItem

import java.time.OffsetDateTime;

/**
 * Dataset item (test case)
 */
public final class DatasetItem {
    String getId();
    DatasetStatus getStatus();            // ACTIVE or ARCHIVED
    Optional<Object> getInput();          // Input data
    Optional<Object> getExpectedOutput(); // Expected output
    Optional<Object> getMetadata();       // Custom metadata
    Optional<String> getSourceTraceId();
    Optional<String> getSourceObservationId();
    String getDatasetId();
    String getDatasetName();
    OffsetDateTime getCreatedAt();  // Creation timestamp
    OffsetDateTime getUpdatedAt();  // Last update timestamp

    static Builder builder();
}

PaginatedDatasetItems

/**
 * Paginated list of dataset items
 */
public final class PaginatedDatasetItems {
    List<DatasetItem> getData();
    MetaResponse getMeta();  // Pagination metadata

    static Builder builder();
}

DatasetRun

import java.time.OffsetDateTime;

/**
 * Dataset run (evaluation run)
 */
public final class DatasetRun {
    String getId();
    String getName();
    Optional<String> getDescription();
    Optional<Object> getMetadata();
    String getDatasetId();
    String getDatasetName();
    OffsetDateTime getCreatedAt();  // Creation timestamp
    OffsetDateTime getUpdatedAt();  // Last update timestamp

    static Builder builder();
}

DatasetRunWithItems

import java.time.OffsetDateTime;

/**
 * Dataset run with all its items
 */
public final class DatasetRunWithItems {
    String getId();
    String getName();
    Optional<String> getDescription();
    Optional<Object> getMetadata();
    String getDatasetId();
    String getDatasetName();
    List<DatasetRunItem> getDatasetRunItems();
    OffsetDateTime getCreatedAt();  // Creation timestamp
    OffsetDateTime getUpdatedAt();  // Last update timestamp

    static Builder builder();
}

DatasetRunItem

import java.time.OffsetDateTime;

/**
 * Dataset run item (links dataset item to evaluation result)
 */
public final class DatasetRunItem {
    String getId();
    String getDatasetRunId();
    String getDatasetRunName();
    String getDatasetItemId();
    String getTraceId();                  // Required trace ID
    Optional<String> getObservationId();
    OffsetDateTime getCreatedAt();  // Creation timestamp
    OffsetDateTime getUpdatedAt();  // Last update timestamp

    static Builder builder();
}

PaginatedDatasetRuns

/**
 * Paginated list of dataset runs
 */
public final class PaginatedDatasetRuns {
    List<DatasetRun> getData();
    MetaResponse getMeta();  // Pagination metadata

    static Builder builder();
}

DeleteDatasetItemResponse

/**
 * Response after deleting a dataset item
 */
public final class DeleteDatasetItemResponse {
    boolean getSuccess();

    static Builder builder();
}

DeleteDatasetRunResponse

/**
 * Response after deleting a dataset run
 */
public final class DeleteDatasetRunResponse {
    boolean getSuccess();

    static Builder builder();
}

Enums

DatasetStatus

/**
 * Status of a dataset item
 */
public enum DatasetStatus {
    ACTIVE,     // Active item, included in evaluations
    ARCHIVED    // Archived item, excluded from evaluations
}

Complete Dataset Evaluation Example

import com.langfuse.client.LangfuseClient;
import com.langfuse.client.resources.datasets.requests.*;
import com.langfuse.client.resources.datasets.types.*;
import com.langfuse.client.resources.datasetitems.requests.*;
import com.langfuse.client.resources.datasetitems.types.*;
import com.langfuse.client.resources.datasetrunitems.requests.*;
import com.langfuse.client.resources.datasetrunitems.types.*;
import com.langfuse.client.resources.commons.types.*;
import java.time.LocalDate;
import java.util.Map;

public class DatasetEvaluationExample {
    public static void main(String[] args) {
        LangfuseClient client = LangfuseClient.builder()
            .url("https://cloud.langfuse.com")
            .credentials("pk-lf-...", "sk-lf-...")
            .build();

        // 1. Create a dataset
        CreateDatasetRequest datasetRequest = CreateDatasetRequest.builder()
            .name("customer-qa-v1")
            .description("Customer support Q&A test cases")
            .metadata(Map.of(
                "domain", "customer-support",
                "language", "en",
                "version", "1.0"
            ))
            .build();

        Dataset dataset = client.datasets().create(datasetRequest);
        System.out.println("Created dataset: " + dataset.getName());

        // 2. Add test cases to the dataset
        String[] questions = {
            "How do I reset my password?",
            "What is your return policy?",
            "How long does shipping take?"
        };

        String[] expectedAnswers = {
            "Click 'Forgot Password' on the login page",
            "30-day money-back guarantee on all items",
            "Standard shipping takes 5-7 business days"
        };

        for (int i = 0; i < questions.length; i++) {
            CreateDatasetItemRequest itemRequest = CreateDatasetItemRequest.builder()
                .datasetName(dataset.getName())
                .input(Map.of("question", questions[i]))
                .expectedOutput(Map.of("answer", expectedAnswers[i]))
                .metadata(Map.of("index", i))
                .status(DatasetStatus.ACTIVE)
                .build();

            DatasetItem item = client.datasetItems().create(itemRequest);
            System.out.println("Created item: " + item.getId());
        }

        // 3. Run evaluation (simulated)
        String runName = "eval-" + LocalDate.now();

        GetDatasetItemsRequest listRequest = GetDatasetItemsRequest.builder()
            .datasetName(dataset.getName())
            .build();

        PaginatedDatasetItems items = client.datasetItems().list(listRequest);

        System.out.println("\nRunning evaluation...");
        for (DatasetItem item : items.getData()) {
            // In real usage, you would:
            // 1. Get the input from the item
            // 2. Run your LLM application with that input
            // 3. Create a trace for the run
            // 4. Link the trace to the dataset item

            // Simulated trace ID (in real usage, from actual tracing)
            String traceId = "trace-eval-" + item.getId();

            // Create run item - Note: Staged builder requires runName() -> datasetItemId() first
            CreateDatasetRunItemRequest runItemRequest = CreateDatasetRunItemRequest.builder()
                .runName(runName)  // Required first: run name
                .datasetItemId(item.getId())  // Required second: dataset item ID
                .runDescription("Automated evaluation with GPT-4")  // Optional fields
                .traceId(traceId)
                .metadata(Map.of(
                    "model", "gpt-4",
                    "temperature", 0.7,
                    "evaluated_at", System.currentTimeMillis()
                ))
                .build();

            DatasetRunItem runItem = client.datasetRunItems().create(runItemRequest);
            System.out.println("Created run item for: " + item.getId());
        }

        // 4. Retrieve run results
        DatasetRunWithItems runResults = client.datasets()
            .getRun(dataset.getName(), runName);

        System.out.println("\nEvaluation Results:");
        System.out.println("Run: " + runResults.getName());
        System.out.println("Items evaluated: " + runResults.getDatasetRunItems().size());

        for (DatasetRunItem runItem : runResults.getDatasetRunItems()) {
            System.out.println("  - Item: " + runItem.getDatasetItemId() +
                             " -> Trace: " + runItem.getTraceId().orElse("none"));
        }

        // 5. List all runs for the dataset
        PaginatedDatasetRuns runs = client.datasets().getRuns(dataset.getName());
        System.out.println("\nAll runs for dataset:");
        for (DatasetRun run : runs.getData()) {
            System.out.println("  - " + run.getName() + " (" + run.getCreatedAt() + ")");
        }
    }
}

Best Practices

  1. Version Datasets: Use versioned names (e.g., "qa-v1", "qa-v2") for dataset evolution
  2. Metadata for Context: Store rich metadata about test cases (category, difficulty, etc.)
  3. Archive Old Items: Use DatasetStatus.ARCHIVED instead of deleting items
  4. Run Naming Convention: Use consistent run names (e.g., "eval-YYYY-MM-DD-HHmm")
  5. Link to Production: Create dataset items from production traces using sourceTraceId
  6. Batch Evaluations: Process dataset items in batches for efficiency
  7. Track Metrics: Store evaluation metrics in run item metadata
  8. Compare Runs: Use multiple runs to compare different model versions or parameters

Related Documentation

  • Traces and Observations - Linking evaluation results
  • Scores - Scoring evaluation results
  • Common Types - Shared type definitions
  • Pagination - Pagination utilities

Install with Tessl CLI

npx tessl i tessl/maven-com-langfuse--langfuse-java

docs

client-configuration.md

comments-annotations.md

common-types.md

datasets.md

exceptions.md

health.md

index.md

ingestion.md

media.md

metrics.md

models.md

pagination.md

projects-organizations.md

prompts.md

scim.md

scores.md

sessions.md

traces-observations.md

tile.json