Elasticsearch plugin that adds matrix statistics aggregation functionality for computing statistical measures and relationships across multiple numeric fields
—
Core interface and implementation classes for accessing computed statistical measures from matrix stats aggregation results.
Primary interface for accessing statistical results from matrix stats aggregations.
/**
* Interface for MatrixStats Metric Aggregation results
*/
public interface MatrixStats extends Aggregation {
/**
* Return the total document count processed by this aggregation
* @return Total number of documents
*/
long getDocCount();
/**
* Return total field count (differs from docCount if there are missing values)
* @param field The field name to get count for
* @return Number of documents with non-missing values for this field
*/
long getFieldCount(String field);
/**
* Return the field mean (average)
* @param field The field name to get mean for
* @return Arithmetic mean of the field values
*/
double getMean(String field);
/**
* Return the field variance
* @param field The field name to get variance for
* @return Statistical variance of the field values
*/
double getVariance(String field);
/**
* Return the skewness of the distribution
* @param field The field name to get skewness for
* @return Skewness measure (positive = right-skewed, negative = left-skewed)
*/
double getSkewness(String field);
/**
* Return the kurtosis of the distribution
* @param field The field name to get kurtosis for
* @return Kurtosis measure (measures tail heaviness)
*/
double getKurtosis(String field);
/**
* Return the covariance between field x and field y
* @param fieldX First field name
* @param fieldY Second field name
* @return Covariance between the two fields
*/
double getCovariance(String fieldX, String fieldY);
/**
* Return the correlation coefficient of field x and field y
* @param fieldX First field name
* @param fieldY Second field name
* @return Pearson correlation coefficient (-1 to 1)
*/
double getCorrelation(String fieldX, String fieldY);
}Usage Examples:
import org.elasticsearch.search.aggregations.matrix.stats.MatrixStats;
// Extract aggregation results from search response
MatrixStats stats = searchResponse.getAggregations().get("price_analysis");
// Basic statistics for individual fields
long totalDocs = stats.getDocCount();
long priceCount = stats.getFieldCount("price");
double avgPrice = stats.getMean("price");
double priceVariability = stats.getVariance("price");
// Distribution shape analysis
double priceSkew = stats.getSkewness("price"); // Distribution asymmetry
double priceKurtosis = stats.getKurtosis("price"); // Tail heaviness
// Cross-field relationship analysis
double priceQuantityCovariance = stats.getCovariance("price", "quantity");
double priceQuantityCorrelation = stats.getCorrelation("price", "quantity");
// Self-correlation always returns 1.0
double selfCorr = stats.getCorrelation("price", "price"); // Returns 1.0Internal implementation class used for distributed computation across Elasticsearch shards.
/**
* Internal implementation of MatrixStats for shard-level computation
* Computes distribution statistics over multiple fields
*/
public class InternalMatrixStats extends InternalAggregation implements MatrixStats {
/**
* Constructor for per-shard statistics
* @param name Aggregation name
* @param count Document count
* @param multiFieldStatsResults Running statistics from this shard
* @param results Final computed results (null for intermediate reductions)
* @param metadata Aggregation metadata
*/
InternalMatrixStats(
String name,
long count,
RunningStats multiFieldStatsResults,
MatrixStatsResults results,
Map<String, Object> metadata
);
// Implements all MatrixStats interface methods
/**
* Get the running statistics object (for internal use)
* @return Running statistics instance
*/
RunningStats getStats();
/**
* Get the computed results object (for internal use)
* @return Final results instance, may be null for intermediate reductions
*/
MatrixStatsResults getResults();
/**
* Reduce multiple shard results into a single result
* @param aggregations List of shard-level aggregation results
* @param reduceContext Context for the reduction operation
* @return Combined aggregation result
*/
public InternalAggregation reduce(List<InternalAggregation> aggregations, ReduceContext reduceContext);
}Parsed version of MatrixStats for client-side usage, typically used when parsing aggregation results from JSON responses.
/**
* Parsed version of MatrixStats for client-side usage
*/
public class ParsedMatrixStats extends ParsedAggregation implements MatrixStats {
// Implements all MatrixStats interface methods
/**
* Create ParsedMatrixStats from XContent parser
* @param parser XContent parser positioned at the aggregation data
* @param name Name of the aggregation
* @return Parsed MatrixStats instance
*/
public static ParsedMatrixStats fromXContent(XContentParser parser, String name) throws IOException;
}Container class for computed statistical results (package-private, used internally).
/**
* Container for computed matrix statistics results
* Descriptive stats gathered per shard, with final correlation and covariance computed on coordinating node
*/
class MatrixStatsResults implements Writeable {
/**
* Default constructor for empty results
*/
MatrixStatsResults();
/**
* Constructor that computes results from running statistics
* @param stats Running statistics to compute final results from
*/
MatrixStatsResults(RunningStats stats);
/**
* Return document count
* @return Total number of documents processed
*/
public final long getDocCount();
/**
* Return the field count for the requested field
* @param field Field name
* @return Number of non-missing values for this field
*/
public long getFieldCount(String field);
/**
* Return the mean for the requested field
* @param field Field name
* @return Arithmetic mean of field values
*/
public double getMean(String field);
/**
* Return the variance for the requested field
* @param field Field name
* @return Statistical variance of field values
*/
public double getVariance(String field);
/**
* Return the skewness for the requested field
* @param field Field name
* @return Skewness measure of the distribution
*/
public double getSkewness(String field);
/**
* Return the kurtosis for the requested field
* @param field Field name
* @return Kurtosis measure of the distribution
*/
public double getKurtosis(String field);
/**
* Return the covariance between two fields
* @param fieldX First field name
* @param fieldY Second field name
* @return Covariance between the fields
*/
public double getCovariance(String fieldX, String fieldY);
/**
* Return the correlation coefficient between two fields
* @param fieldX First field name
* @param fieldY Second field name
* @return Pearson correlation coefficient
*/
public Double getCorrelation(String fieldX, String fieldY);
}Internal class for accumulating statistical data during aggregation processing using a single-pass algorithm.
/**
* Running statistics computation for matrix stats aggregation
* Implements single-pass algorithm for computing statistics across large datasets
* Based on parallel statistical computation algorithms
*/
public class RunningStats implements Writeable, Cloneable {
/**
* Constructor for deserialization from stream
* @param in Stream input to read from
*/
public RunningStats(StreamInput in) throws IOException;
/**
* Serialize running statistics to stream
* @param out Stream output to write to
*/
public void writeTo(StreamOutput out) throws IOException;
/**
* Add a document's field values to the running statistics
* @param fieldNames Array of field names
* @param fieldVals Array of corresponding field values
*/
public void add(String[] fieldNames, double[] fieldVals);
/**
* Merge another RunningStats instance (from different shard) into this one
* @param other RunningStats instance to merge
*/
public void merge(RunningStats other);
/**
* Create a deep copy of this RunningStats instance
* @return Cloned RunningStats instance
*/
public RunningStats clone();
}Skewness: Measures asymmetry of the distribution:
Kurtosis: Measures tail heaviness:
Covariance: Measures how two variables change together:
Correlation: Normalized covariance (-1 to 1):
Methods may throw exceptions for invalid field names:
Install with Tessl CLI
npx tessl i tessl/maven-org-elasticsearch-plugin--aggs-matrix-stats-client