or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

algorithm-operators.mdindex.mdlinear-algebra.mdml-environment.mdml-pipeline.mdstatistical-operations.mdtable-utilities.md

statistical-operations.mddocs/

0

# Statistical Operations

1

2

Statistical utilities including multivariate Gaussian distributions for probabilistic machine learning applications. These operations provide essential statistical functionality for ML algorithms.

3

4

## Capabilities

5

6

### MultivariateGaussian Class

7

8

Multivariate Gaussian (Normal) distribution implementation providing probability density function calculations for high-dimensional data.

9

10

```java { .api }

11

/**

12

* Multivariate Gaussian (Normal) distribution implementation

13

*/

14

public class MultivariateGaussian {

15

16

/**

17

* Constructor with mean vector and covariance matrix

18

* @param mean Mean vector of the distribution

19

* @param cov Covariance matrix of the distribution

20

*/

21

public MultivariateGaussian(DenseVector mean, DenseMatrix cov);

22

23

/**

24

* Compute probability density function value

25

* @param x Input vector

26

* @return Probability density at point x

27

*/

28

public double pdf(Vector x);

29

30

/**

31

* Compute log probability density function value

32

* @param x Input vector

33

* @return Log probability density at point x

34

*/

35

public double logpdf(Vector x);

36

}

37

```

38

39

**Usage Examples:**

40

41

```java

42

import org.apache.flink.ml.common.statistics.basicstatistic.MultivariateGaussian;

43

import org.apache.flink.ml.common.linalg.DenseVector;

44

import org.apache.flink.ml.common.linalg.DenseMatrix;

45

46

// Create multivariate Gaussian distribution

47

DenseVector mean = new DenseVector(new double[]{0.0, 0.0});

48

DenseMatrix covariance = DenseMatrix.eye(2); // Identity covariance matrix

49

50

MultivariateGaussian gaussian = new MultivariateGaussian(mean, covariance);

51

52

// Evaluate probability density

53

DenseVector point = new DenseVector(new double[]{1.0, 1.0});

54

double probability = gaussian.pdf(point);

55

double logProbability = gaussian.logpdf(point);

56

57

System.out.println("PDF at point (1,1): " + probability);

58

System.out.println("Log PDF at point (1,1): " + logProbability);

59

```

60

61

### Statistical Computation Patterns

62

63

Common patterns for using multivariate Gaussian distributions in machine learning contexts.

64

65

**Usage Examples:**

66

67

```java

68

// Gaussian Mixture Model component example

69

public class GaussianComponent {

70

private MultivariateGaussian gaussian;

71

private double weight;

72

73

public GaussianComponent(DenseVector mean, DenseMatrix covariance, double weight) {

74

this.gaussian = new MultivariateGaussian(mean, covariance);

75

this.weight = weight;

76

}

77

78

public double computeWeightedProbability(Vector x) {

79

return weight * gaussian.pdf(x);

80

}

81

82

public double computeLogLikelihood(Vector x) {

83

return Math.log(weight) + gaussian.logpdf(x);

84

}

85

}

86

87

// Anomaly detection using Gaussian distribution

88

public class GaussianAnomalyDetector {

89

private MultivariateGaussian normalDistribution;

90

private double threshold;

91

92

public GaussianAnomalyDetector(DenseVector mean, DenseMatrix covariance, double threshold) {

93

this.normalDistribution = new MultivariateGaussian(mean, covariance);

94

this.threshold = threshold;

95

}

96

97

public boolean isAnomaly(Vector point) {

98

double probability = normalDistribution.pdf(point);

99

return probability < threshold;

100

}

101

102

public double getAnomalyScore(Vector point) {

103

// Lower probability = higher anomaly score

104

return -normalDistribution.logpdf(point);

105

}

106

}

107

108

// Usage examples

109

DenseVector trainingMean = new DenseVector(new double[]{5.0, 10.0});

110

DenseMatrix trainingCov = new DenseMatrix(new double[][]{{2.0, 0.5}, {0.5, 3.0}});

111

112

// Anomaly detection

113

GaussianAnomalyDetector detector = new GaussianAnomalyDetector(

114

trainingMean, trainingCov, 0.01);

115

116

DenseVector testPoint = new DenseVector(new double[]{5.1, 9.8});

117

boolean isAnomalous = detector.isAnomaly(testPoint);

118

double anomalyScore = detector.getAnomalyScore(testPoint);

119

120

// Gaussian mixture component

121

GaussianComponent component = new GaussianComponent(trainingMean, trainingCov, 0.3);

122

double weightedProb = component.computeWeightedProbability(testPoint);

123

```

124

125

### Probability Calculations

126

127

Advanced probability calculations and statistical analysis using multivariate Gaussian distributions.

128

129

**Usage Examples:**

130

131

```java

132

// Maximum likelihood estimation helper

133

public class GaussianMLEstimator {

134

135

public static MultivariateGaussian estimate(List<DenseVector> data) {

136

int n = data.size();

137

int dimensions = data.get(0).size();

138

139

// Compute sample mean

140

DenseVector mean = DenseVector.zeros(dimensions);

141

for (DenseVector point : data) {

142

mean.plusEqual(point);

143

}

144

mean.scaleEqual(1.0 / n);

145

146

// Compute sample covariance

147

DenseMatrix covariance = DenseMatrix.zeros(dimensions, dimensions);

148

for (DenseVector point : data) {

149

DenseVector centered = point.minus(mean);

150

DenseMatrix outer = centered.outer();

151

covariance.plusEquals(outer);

152

}

153

covariance.scaleEqual(1.0 / (n - 1));

154

155

return new MultivariateGaussian(mean, covariance);

156

}

157

}

158

159

// Probability comparison and classification

160

public class GaussianClassifier {

161

private MultivariateGaussian[] classDistributions;

162

private double[] classPriors;

163

164

public GaussianClassifier(MultivariateGaussian[] distributions, double[] priors) {

165

this.classDistributions = distributions;

166

this.classPriors = priors;

167

}

168

169

public int classify(Vector point) {

170

double maxLogPosterior = Double.NEGATIVE_INFINITY;

171

int bestClass = -1;

172

173

for (int i = 0; i < classDistributions.length; i++) {

174

double logPosterior = Math.log(classPriors[i]) +

175

classDistributions[i].logpdf(point);

176

177

if (logPosterior > maxLogPosterior) {

178

maxLogPosterior = logPosterior;

179

bestClass = i;

180

}

181

}

182

183

return bestClass;

184

}

185

186

public double[] getClassProbabilities(Vector point) {

187

double[] logProbs = new double[classDistributions.length];

188

double maxLogProb = Double.NEGATIVE_INFINITY;

189

190

// Compute log probabilities

191

for (int i = 0; i < classDistributions.length; i++) {

192

logProbs[i] = Math.log(classPriors[i]) + classDistributions[i].logpdf(point);

193

maxLogProb = Math.max(maxLogProb, logProbs[i]);

194

}

195

196

// Convert to probabilities with numerical stability

197

double[] probs = new double[classDistributions.length];

198

double sum = 0.0;

199

200

for (int i = 0; i < logProbs.length; i++) {

201

probs[i] = Math.exp(logProbs[i] - maxLogProb);

202

sum += probs[i];

203

}

204

205

// Normalize

206

for (int i = 0; i < probs.length; i++) {

207

probs[i] /= sum;

208

}

209

210

return probs;

211

}

212

}

213

214

// Usage

215

List<DenseVector> class1Data = getClass1TrainingData();

216

List<DenseVector> class2Data = getClass2TrainingData();

217

218

// Estimate distributions

219

MultivariateGaussian dist1 = GaussianMLEstimator.estimate(class1Data);

220

MultivariateGaussian dist2 = GaussianMLEstimator.estimate(class2Data);

221

222

// Create classifier

223

MultivariateGaussian[] distributions = {dist1, dist2};

224

double[] priors = {0.6, 0.4}; // Class priors

225

GaussianClassifier classifier = new GaussianClassifier(distributions, priors);

226

227

// Classify new point

228

DenseVector newPoint = new DenseVector(new double[]{3.0, 7.0});

229

int predictedClass = classifier.classify(newPoint);

230

double[] classProbabilities = classifier.getClassProbabilities(newPoint);

231

232

System.out.println("Predicted class: " + predictedClass);

233

System.out.println("Class probabilities: " + Arrays.toString(classProbabilities));

234

```

235

236

### Numerical Considerations

237

238

Important numerical considerations when working with multivariate Gaussian distributions.

239

240

**Usage Examples:**

241

242

```java

243

// Numerically stable Gaussian operations

244

public class NumericallyStableGaussian {

245

246

public static boolean isPositiveDefinite(DenseMatrix matrix) {

247

// Check if covariance matrix is positive definite

248

// Implementation would use eigenvalue decomposition or Cholesky decomposition

249

try {

250

// Attempt Cholesky decomposition

251

// If successful, matrix is positive definite

252

return true;

253

} catch (Exception e) {

254

return false;

255

}

256

}

257

258

public static DenseMatrix regularizeCovariance(DenseMatrix covariance, double regularization) {

259

// Add regularization to diagonal to ensure positive definiteness

260

DenseMatrix regularized = covariance.clone();

261

for (int i = 0; i < covariance.numRows(); i++) {

262

regularized.add(i, i, regularization);

263

}

264

return regularized;

265

}

266

267

public static MultivariateGaussian createStableGaussian(DenseVector mean, DenseMatrix covariance) {

268

// Ensure numerical stability

269

final double MIN_VARIANCE = 1e-6;

270

271

DenseMatrix stableCovariance = covariance.clone();

272

273

// Regularize if needed

274

if (!isPositiveDefinite(stableCovariance)) {

275

stableCovariance = regularizeCovariance(stableCovariance, MIN_VARIANCE);

276

}

277

278

return new MultivariateGaussian(mean, stableCovariance);

279

}

280

}

281

282

// Safe probability computations

283

public class SafeProbabilityCalculator {

284

285

public static double safeLogPdf(MultivariateGaussian gaussian, Vector point) {

286

try {

287

double logPdf = gaussian.logpdf(point);

288

289

// Check for numerical issues

290

if (Double.isNaN(logPdf) || Double.isInfinite(logPdf)) {

291

return Double.NEGATIVE_INFINITY; // Very low probability

292

}

293

294

return logPdf;

295

} catch (Exception e) {

296

// Handle numerical exceptions

297

return Double.NEGATIVE_INFINITY;

298

}

299

}

300

301

public static double safePdf(MultivariateGaussian gaussian, Vector point) {

302

double logPdf = safeLogPdf(gaussian, point);

303

return logPdf == Double.NEGATIVE_INFINITY ? 0.0 : Math.exp(logPdf);

304

}

305

}

306

307

// Usage with numerical safety

308

DenseVector mean = new DenseVector(new double[]{0.0, 0.0});

309

DenseMatrix covariance = new DenseMatrix(new double[][]{{1e-10, 0}, {0, 1e-10}}); // Very small variance

310

311

// Create numerically stable Gaussian

312

MultivariateGaussian stableGaussian = NumericallyStableGaussian.createStableGaussian(mean, covariance);

313

314

// Safe probability calculations

315

DenseVector testPoint = new DenseVector(new double[]{1.0, 1.0});

316

double safeProbability = SafeProbabilityCalculator.safePdf(stableGaussian, testPoint);

317

double safeLogProbability = SafeProbabilityCalculator.safeLogPdf(stableGaussian, testPoint);

318

```