A library for summarizing data in streams for which it is infeasible to store all events
—
Algorithms for computing quantiles and percentiles in streaming data with memory-efficient approximation techniques. These data structures provide approximate quantile queries with configurable accuracy guarantees.
Common interface implemented by all quantile estimation algorithms.
/**
* Interface for quantile estimation algorithms
*/
public interface IQuantileEstimator {
/**
* Add value to the estimator
* @param value long value to add to the data stream
*/
void offer(long value);
/**
* Get quantile estimate
* @param q quantile to compute (0.0 to 1.0, e.g., 0.5 for median)
* @return estimated value at the specified quantile
*/
long getQuantile(double q);
}Q-Digest data structure for quantile estimation with configurable compression factor that determines accuracy vs memory trade-off.
/**
* Q-Digest quantile estimator
*
* Answers approximate quantile queries where actual rank of query result
* is in q-eps .. q+eps, where eps = log(sigma)/compressionFactor
* and log(sigma) is ceiling of binary log of largest value inserted.
*/
public class QDigest implements IQuantileEstimator {
/**
* Create Q-Digest with specified compression factor
* @param compressionFactor higher values give better accuracy but use more memory
*/
public QDigest(double compressionFactor);
/**
* Get current size (number of nodes in the digest)
* @return size of the digest structure
*/
public long size();
/**
* Get compression factor
* @return compression factor used
*/
public double getCompressionFactor();
/**
* Manually trigger compression of the digest
*/
public void compress();
/**
* Create union of multiple Q-Digests
* @param digests Q-Digests to union (must have same compression factor)
* @return new Q-Digest representing union of input digests
*/
public static QDigest unionOf(QDigest... digests);
/**
* Serialize Q-Digest to byte array
* @param digest digest to serialize
* @return serialized bytes
* @throws IOException if serialization fails
*/
public static byte[] serialize(QDigest digest) throws IOException;
/**
* Deserialize Q-Digest from byte array
* @param data serialized digest data
* @return deserialized Q-Digest
* @throws IOException if deserialization fails
*/
public static QDigest deserialize(byte[] data) throws IOException;
}Usage Examples:
import com.clearspring.analytics.stream.quantile.QDigest;
// Create Q-Digest with compression factor 100
QDigest qd = new QDigest(100.0);
// Add values from data stream
qd.offer(10);
qd.offer(20);
qd.offer(15);
qd.offer(30);
qd.offer(25);
// Query quantiles
long median = qd.getQuantile(0.5); // 50th percentile (median)
long p90 = qd.getQuantile(0.9); // 90th percentile
long p99 = qd.getQuantile(0.99); // 99th percentile
long min = qd.getQuantile(0.0); // minimum value
long max = qd.getQuantile(1.0); // maximum value
System.out.println("Median: " + median);
System.out.println("90th percentile: " + p90);T-Digest quantile estimator with better accuracy at extreme quantiles (near 0.0 and 1.0) compared to uniform accuracy across all quantiles.
/**
* T-Digest quantile estimator with improved accuracy at extremes
*
* Particularly good for computing accurate tail quantiles like 99th, 99.9th percentile
*/
public class TDigest implements IQuantileEstimator {
/**
* Create T-Digest with specified compression parameter
* @param compression compression parameter (higher = more accurate but more memory)
*/
public TDigest(double compression);
/**
* Get compression parameter
* @return compression parameter used
*/
public double getCompression();
/**
* Manually trigger compression of the digest
*/
public void compress();
/**
* Merge another T-Digest into this one
* @param other T-Digest to merge
*/
public void add(TDigest other);
/**
* Serialize T-Digest to byte array
* @param digest digest to serialize
* @return serialized bytes
* @throws IOException if serialization fails
*/
public static byte[] serialize(TDigest digest) throws IOException;
/**
* Deserialize T-Digest from byte array
* @param data serialized digest data
* @return deserialized T-Digest
* @throws IOException if deserialization fails
*/
public static TDigest deserialize(byte[] data) throws IOException;
}Usage Examples:
import com.clearspring.analytics.stream.quantile.TDigest;
// Create T-Digest with compression 100
TDigest td = new TDigest(100.0);
// Process response times (in milliseconds)
for (long responseTime : responseTimesStream) {
td.offer(responseTime);
}
// Query tail latencies (T-Digest excels at these)
long p95 = td.getQuantile(0.95); // 95th percentile
long p99 = td.getQuantile(0.99); // 99th percentile
long p999 = td.getQuantile(0.999); // 99.9th percentile
long p9999 = td.getQuantile(0.9999); // 99.99th percentile
System.out.println("P95 latency: " + p95 + "ms");
System.out.println("P99 latency: " + p99 + "ms");
System.out.println("P99.9 latency: " + p999 + "ms");Supporting data structure used internally by T-Digest for organizing centroids.
/**
* Internal data structure for T-Digest implementation
* Organizes centroids in a tree structure for efficient quantile queries
*/
public class GroupTree {
/**
* Create empty group tree
*/
public GroupTree();
/**
* Add weighted value to the tree
* @param x value to add
* @param w weight of the value
*/
public void add(double x, long w);
/**
* Get size of the tree (total weight)
* @return total weight of all values
*/
public long size();
/**
* Compute quantile from the tree
* @param q quantile to compute (0.0 to 1.0)
* @return estimated quantile value
*/
public double quantile(double q);
/**
* Compress the tree by merging nearby centroids
*/
public void compress();
}// For general-purpose quantile estimation
QDigest qd = new QDigest(100.0);
// Process data stream
for (long value : dataStream) {
qd.offer(value);
}
// Get common quantiles
long q25 = qd.getQuantile(0.25); // 25th percentile
long median = qd.getQuantile(0.5); // 50th percentile (median)
long q75 = qd.getQuantile(0.75); // 75th percentile
System.out.println("IQR: [" + q25 + ", " + q75 + "]");
System.out.println("Median: " + median);// T-Digest is better for extreme quantiles
TDigest latencyDigest = new TDigest(200.0);
// Process request latencies
for (long latency : requestLatencies) {
latencyDigest.offer(latency);
}
// Monitor SLA compliance (T-Digest excels at tail quantiles)
long p99 = latencyDigest.getQuantile(0.99);
long p999 = latencyDigest.getQuantile(0.999);
if (p99 > SLA_P99_THRESHOLD) {
System.out.println("P99 SLA violation: " + p99 + "ms");
}
if (p999 > SLA_P999_THRESHOLD) {
System.out.println("P99.9 SLA violation: " + p999 + "ms");
}// Create compatible digests on different nodes
QDigest digest1 = new QDigest(100.0);
QDigest digest2 = new QDigest(100.0);
// Process data on each node
for (long value : node1Data) {
digest1.offer(value);
}
for (long value : node2Data) {
digest2.offer(value);
}
// Combine digests to get global quantiles
QDigest globalDigest = QDigest.unionOf(digest1, digest2);
// Query global quantiles
long globalMedian = globalDigest.getQuantile(0.5);
long globalP95 = globalDigest.getQuantile(0.95);QDigest digest = new QDigest(100.0);
// Process current batch
for (long value : currentBatch) {
digest.offer(value);
}
// Serialize for storage
byte[] serialized = QDigest.serialize(digest);
saveToStorage(serialized);
// Later, restore and continue processing
byte[] restored = loadFromStorage();
QDigest restoredDigest = QDigest.deserialize(restored);
// Continue adding data
for (long value : nextBatch) {
restoredDigest.offer(value);
}// Different digests for different metrics
TDigest responseTimeDigest = new TDigest(100.0);
QDigest requestSizeDigest = new QDigest(50.0);
QDigest errorRateDigest = new QDigest(50.0);
// Process metrics
for (MetricEvent event : metricsStream) {
responseTimeDigest.offer(event.responseTime);
requestSizeDigest.offer(event.requestSize);
errorRateDigest.offer(event.errorCount);
}
// Generate dashboard metrics
Map<String, Long> dashboardMetrics = new HashMap<>();
// Response time percentiles (use T-Digest for tail accuracy)
dashboardMetrics.put("response_p50", responseTimeDigest.getQuantile(0.5));
dashboardMetrics.put("response_p95", responseTimeDigest.getQuantile(0.95));
dashboardMetrics.put("response_p99", responseTimeDigest.getQuantile(0.99));
// Request size percentiles
dashboardMetrics.put("request_size_p50", requestSizeDigest.getQuantile(0.5));
dashboardMetrics.put("request_size_p90", requestSizeDigest.getQuantile(0.9));
// Error rate percentiles
dashboardMetrics.put("error_rate_p95", errorRateDigest.getQuantile(0.95));Use QDigest when:
Use TDigest when:
QDigest Compression Factor:
TDigest Compression:
QDigest:
TDigest:
Both structures support efficient merging for distributed scenarios and provide serialization for persistence.
Install with Tessl CLI
npx tessl i tessl/maven-com-clearspring-analytics--stream