Statistical utilities including multivariate Gaussian distributions for probabilistic machine learning applications. These operations provide essential statistical functionality for ML algorithms.
Multivariate Gaussian (Normal) distribution implementation providing probability density function calculations for high-dimensional data.
/**
* Multivariate Gaussian (Normal) distribution implementation
*/
public class MultivariateGaussian {
/**
* Constructor with mean vector and covariance matrix
* @param mean Mean vector of the distribution
* @param cov Covariance matrix of the distribution
*/
public MultivariateGaussian(DenseVector mean, DenseMatrix cov);
/**
* Compute probability density function value
* @param x Input vector
* @return Probability density at point x
*/
public double pdf(Vector x);
/**
* Compute log probability density function value
* @param x Input vector
* @return Log probability density at point x
*/
public double logpdf(Vector x);
}Usage Examples:
import org.apache.flink.ml.common.statistics.basicstatistic.MultivariateGaussian;
import org.apache.flink.ml.common.linalg.DenseVector;
import org.apache.flink.ml.common.linalg.DenseMatrix;
// Create multivariate Gaussian distribution
DenseVector mean = new DenseVector(new double[]{0.0, 0.0});
DenseMatrix covariance = DenseMatrix.eye(2); // Identity covariance matrix
MultivariateGaussian gaussian = new MultivariateGaussian(mean, covariance);
// Evaluate probability density
DenseVector point = new DenseVector(new double[]{1.0, 1.0});
double probability = gaussian.pdf(point);
double logProbability = gaussian.logpdf(point);
System.out.println("PDF at point (1,1): " + probability);
System.out.println("Log PDF at point (1,1): " + logProbability);Common patterns for using multivariate Gaussian distributions in machine learning contexts.
Usage Examples:
// Gaussian Mixture Model component example
public class GaussianComponent {
private MultivariateGaussian gaussian;
private double weight;
public GaussianComponent(DenseVector mean, DenseMatrix covariance, double weight) {
this.gaussian = new MultivariateGaussian(mean, covariance);
this.weight = weight;
}
public double computeWeightedProbability(Vector x) {
return weight * gaussian.pdf(x);
}
public double computeLogLikelihood(Vector x) {
return Math.log(weight) + gaussian.logpdf(x);
}
}
// Anomaly detection using Gaussian distribution
public class GaussianAnomalyDetector {
private MultivariateGaussian normalDistribution;
private double threshold;
public GaussianAnomalyDetector(DenseVector mean, DenseMatrix covariance, double threshold) {
this.normalDistribution = new MultivariateGaussian(mean, covariance);
this.threshold = threshold;
}
public boolean isAnomaly(Vector point) {
double probability = normalDistribution.pdf(point);
return probability < threshold;
}
public double getAnomalyScore(Vector point) {
// Lower probability = higher anomaly score
return -normalDistribution.logpdf(point);
}
}
// Usage examples
DenseVector trainingMean = new DenseVector(new double[]{5.0, 10.0});
DenseMatrix trainingCov = new DenseMatrix(new double[][]{{2.0, 0.5}, {0.5, 3.0}});
// Anomaly detection
GaussianAnomalyDetector detector = new GaussianAnomalyDetector(
trainingMean, trainingCov, 0.01);
DenseVector testPoint = new DenseVector(new double[]{5.1, 9.8});
boolean isAnomalous = detector.isAnomaly(testPoint);
double anomalyScore = detector.getAnomalyScore(testPoint);
// Gaussian mixture component
GaussianComponent component = new GaussianComponent(trainingMean, trainingCov, 0.3);
double weightedProb = component.computeWeightedProbability(testPoint);Advanced probability calculations and statistical analysis using multivariate Gaussian distributions.
Usage Examples:
// Maximum likelihood estimation helper
public class GaussianMLEstimator {
public static MultivariateGaussian estimate(List<DenseVector> data) {
int n = data.size();
int dimensions = data.get(0).size();
// Compute sample mean
DenseVector mean = DenseVector.zeros(dimensions);
for (DenseVector point : data) {
mean.plusEqual(point);
}
mean.scaleEqual(1.0 / n);
// Compute sample covariance
DenseMatrix covariance = DenseMatrix.zeros(dimensions, dimensions);
for (DenseVector point : data) {
DenseVector centered = point.minus(mean);
DenseMatrix outer = centered.outer();
covariance.plusEquals(outer);
}
covariance.scaleEqual(1.0 / (n - 1));
return new MultivariateGaussian(mean, covariance);
}
}
// Probability comparison and classification
public class GaussianClassifier {
private MultivariateGaussian[] classDistributions;
private double[] classPriors;
public GaussianClassifier(MultivariateGaussian[] distributions, double[] priors) {
this.classDistributions = distributions;
this.classPriors = priors;
}
public int classify(Vector point) {
double maxLogPosterior = Double.NEGATIVE_INFINITY;
int bestClass = -1;
for (int i = 0; i < classDistributions.length; i++) {
double logPosterior = Math.log(classPriors[i]) +
classDistributions[i].logpdf(point);
if (logPosterior > maxLogPosterior) {
maxLogPosterior = logPosterior;
bestClass = i;
}
}
return bestClass;
}
public double[] getClassProbabilities(Vector point) {
double[] logProbs = new double[classDistributions.length];
double maxLogProb = Double.NEGATIVE_INFINITY;
// Compute log probabilities
for (int i = 0; i < classDistributions.length; i++) {
logProbs[i] = Math.log(classPriors[i]) + classDistributions[i].logpdf(point);
maxLogProb = Math.max(maxLogProb, logProbs[i]);
}
// Convert to probabilities with numerical stability
double[] probs = new double[classDistributions.length];
double sum = 0.0;
for (int i = 0; i < logProbs.length; i++) {
probs[i] = Math.exp(logProbs[i] - maxLogProb);
sum += probs[i];
}
// Normalize
for (int i = 0; i < probs.length; i++) {
probs[i] /= sum;
}
return probs;
}
}
// Usage
List<DenseVector> class1Data = getClass1TrainingData();
List<DenseVector> class2Data = getClass2TrainingData();
// Estimate distributions
MultivariateGaussian dist1 = GaussianMLEstimator.estimate(class1Data);
MultivariateGaussian dist2 = GaussianMLEstimator.estimate(class2Data);
// Create classifier
MultivariateGaussian[] distributions = {dist1, dist2};
double[] priors = {0.6, 0.4}; // Class priors
GaussianClassifier classifier = new GaussianClassifier(distributions, priors);
// Classify new point
DenseVector newPoint = new DenseVector(new double[]{3.0, 7.0});
int predictedClass = classifier.classify(newPoint);
double[] classProbabilities = classifier.getClassProbabilities(newPoint);
System.out.println("Predicted class: " + predictedClass);
System.out.println("Class probabilities: " + Arrays.toString(classProbabilities));Important numerical considerations when working with multivariate Gaussian distributions.
Usage Examples:
// Numerically stable Gaussian operations
public class NumericallyStableGaussian {
public static boolean isPositiveDefinite(DenseMatrix matrix) {
// Check if covariance matrix is positive definite
// Implementation would use eigenvalue decomposition or Cholesky decomposition
try {
// Attempt Cholesky decomposition
// If successful, matrix is positive definite
return true;
} catch (Exception e) {
return false;
}
}
public static DenseMatrix regularizeCovariance(DenseMatrix covariance, double regularization) {
// Add regularization to diagonal to ensure positive definiteness
DenseMatrix regularized = covariance.clone();
for (int i = 0; i < covariance.numRows(); i++) {
regularized.add(i, i, regularization);
}
return regularized;
}
public static MultivariateGaussian createStableGaussian(DenseVector mean, DenseMatrix covariance) {
// Ensure numerical stability
final double MIN_VARIANCE = 1e-6;
DenseMatrix stableCovariance = covariance.clone();
// Regularize if needed
if (!isPositiveDefinite(stableCovariance)) {
stableCovariance = regularizeCovariance(stableCovariance, MIN_VARIANCE);
}
return new MultivariateGaussian(mean, stableCovariance);
}
}
// Safe probability computations
public class SafeProbabilityCalculator {
public static double safeLogPdf(MultivariateGaussian gaussian, Vector point) {
try {
double logPdf = gaussian.logpdf(point);
// Check for numerical issues
if (Double.isNaN(logPdf) || Double.isInfinite(logPdf)) {
return Double.NEGATIVE_INFINITY; // Very low probability
}
return logPdf;
} catch (Exception e) {
// Handle numerical exceptions
return Double.NEGATIVE_INFINITY;
}
}
public static double safePdf(MultivariateGaussian gaussian, Vector point) {
double logPdf = safeLogPdf(gaussian, point);
return logPdf == Double.NEGATIVE_INFINITY ? 0.0 : Math.exp(logPdf);
}
}
// Usage with numerical safety
DenseVector mean = new DenseVector(new double[]{0.0, 0.0});
DenseMatrix covariance = new DenseMatrix(new double[][]{{1e-10, 0}, {0, 1e-10}}); // Very small variance
// Create numerically stable Gaussian
MultivariateGaussian stableGaussian = NumericallyStableGaussian.createStableGaussian(mean, covariance);
// Safe probability calculations
DenseVector testPoint = new DenseVector(new double[]{1.0, 1.0});
double safeProbability = SafeProbabilityCalculator.safePdf(stableGaussian, testPoint);
double safeLogProbability = SafeProbabilityCalculator.safeLogPdf(stableGaussian, testPoint);