0
# Advanced Analytics
1
2
Specialized algorithms for manifold learning, time series analysis, sequence modeling, association rule mining, anomaly detection, and other advanced machine learning tasks. Smile Core provides sophisticated tools for complex analytical scenarios.
3
4
## Capabilities
5
6
### Manifold Learning
7
8
Algorithms for discovering low-dimensional structure in high-dimensional data.
9
10
```java { .api }
11
/**
12
* Multi-Dimensional Scaling for manifold learning
13
*/
14
class MDS implements Serializable {
15
/** Perform classical MDS */
16
public static MDS fit(double[][] proximity);
17
18
/** Perform MDS with specified dimensions */
19
public static MDS fit(double[][] proximity, int k);
20
21
/** Perform metric MDS with stress minimization */
22
public static MDS fit(double[][] proximity, int k, boolean metric);
23
24
/** Get embedded coordinates */
25
public final double[][] coordinates;
26
27
/** Get eigenvalues */
28
public final double[] eigenvalues;
29
30
/** Get stress value */
31
public final double stress;
32
}
33
34
/**
35
* Isotonic MDS with monotonic distance constraints
36
*/
37
class IsotonicMDS implements Serializable {
38
/** Perform isotonic MDS */
39
public static IsotonicMDS fit(double[][] proximity, int k);
40
41
/** Get embedded coordinates */
42
public final double[][] coordinates;
43
44
/** Get stress value */
45
public final double stress;
46
}
47
48
/**
49
* Sammon's mapping for non-linear dimensionality reduction
50
*/
51
class SammonMapping implements Serializable {
52
/** Perform Sammon mapping */
53
public static SammonMapping fit(double[][] data, int k);
54
55
/** Perform with custom parameters */
56
public static SammonMapping fit(double[][] data, int k, double learningRate, int maxIter);
57
58
/** Get embedded coordinates */
59
public final double[][] coordinates;
60
61
/** Get final stress */
62
public final double stress;
63
}
64
65
/**
66
* Isomap for non-linear manifold learning
67
*/
68
class IsoMap implements Serializable {
69
/** Perform Isomap with k-nearest neighbors */
70
public static IsoMap fit(double[][] data, int k, int d);
71
72
/** Perform with epsilon neighborhood */
73
public static IsoMap fit(double[][] data, double epsilon, int d);
74
75
/** Get embedded coordinates */
76
public final double[][] coordinates;
77
78
/** Get geodesic distances */
79
public final double[][] distances;
80
}
81
82
/**
83
* Locally Linear Embedding
84
*/
85
class LLE implements Serializable {
86
/** Perform LLE with k neighbors */
87
public static LLE fit(double[][] data, int k, int d);
88
89
/** Get embedded coordinates */
90
public final double[][] coordinates;
91
92
/** Get reconstruction weights */
93
public final double[][] weights;
94
}
95
96
/**
97
* Laplacian Eigenmap for spectral manifold learning
98
*/
99
class LaplacianEigenmap implements Serializable {
100
/** Perform Laplacian Eigenmap */
101
public static LaplacianEigenmap fit(double[][] data, int k, int d);
102
103
/** Perform with RBF kernel */
104
public static LaplacianEigenmap fit(double[][] data, int k, int d, double sigma);
105
106
/** Get embedded coordinates */
107
public final double[][] coordinates;
108
109
/** Get eigenvalues */
110
public final double[] eigenvalues;
111
}
112
113
/**
114
* t-SNE for visualization and non-linear embedding
115
*/
116
class TSNE implements Serializable {
117
/** Perform t-SNE with default parameters */
118
public static TSNE fit(double[][] data);
119
120
/** Perform t-SNE with custom parameters */
121
public static TSNE fit(double[][] data, int d, double perplexity, double learningRate, int maxIter);
122
123
/** Get embedded coordinates */
124
public final double[][] coordinates;
125
126
/** Get final KL divergence */
127
public final double klDivergence;
128
}
129
130
/**
131
* UMAP for uniform manifold approximation
132
*/
133
class UMAP implements Serializable {
134
/** Perform UMAP with default parameters */
135
public static UMAP fit(double[][] data);
136
137
/** Perform UMAP with custom parameters */
138
public static UMAP fit(double[][] data, int d, int nNeighbors, double minDist, int nEpochs);
139
140
/** Get embedded coordinates */
141
public final double[][] coordinates;
142
143
/** Transform new data points */
144
public double[] transform(double[] x);
145
}
146
```
147
148
**Usage Example:**
149
150
```java
151
import smile.manifold.*;
152
153
// t-SNE for visualization
154
TSNE tsne = TSNE.fit(highDimData, 2, 30.0, 200.0, 1000);
155
double[][] embedding = tsne.coordinates;
156
157
// UMAP for general manifold learning
158
UMAP umap = UMAP.fit(data, 10, 15, 0.1, 200);
159
double[][] reducedData = umap.coordinates;
160
double[] newPoint = umap.transform(testSample);
161
162
// Isomap for geodesic distances
163
IsoMap isomap = IsoMap.fit(data, 10, 5); // 10 neighbors, 5 dimensions
164
double[][] manifoldCoords = isomap.coordinates;
165
```
166
167
### Time Series Analysis
168
169
Algorithms for analyzing temporal data patterns and forecasting.
170
171
```java { .api }
172
/**
173
* Time series utilities and analysis tools
174
*/
175
class TimeSeries {
176
/** Calculate autocorrelation function */
177
public static double[] autocorrelation(double[] data);
178
179
/** Calculate autocorrelation with max lag */
180
public static double[] autocorrelation(double[] data, int maxLag);
181
182
/** Calculate cross-correlation between two series */
183
public static double[] crosscorrelation(double[] x, double[] y);
184
185
/** Calculate partial autocorrelation function */
186
public static double[] pacf(double[] data, int maxLag);
187
188
/** Differencing for stationarity */
189
public static double[] difference(double[] data);
190
191
/** Seasonal differencing */
192
public static double[] seasonalDifference(double[] data, int period);
193
194
/** Moving average smoothing */
195
public static double[] movingAverage(double[] data, int window);
196
197
/** Exponential smoothing */
198
public static double[] exponentialSmoothing(double[] data, double alpha);
199
}
200
201
/**
202
* Autoregressive model for time series forecasting
203
*/
204
class AR implements Serializable {
205
/** Estimation methods */
206
enum Method { BURG, OLS, MLE }
207
208
/** Fit AR model using Burg method */
209
public static AR fit(double[] data, int p);
210
211
/** Fit AR model with specified method */
212
public static AR fit(double[] data, int p, Method method);
213
214
/** Get AR coefficients */
215
public double[] coefficients();
216
217
/** Get model order */
218
public int order();
219
220
/** Get white noise variance */
221
public double variance();
222
223
/** Forecast future values */
224
public double[] forecast(int steps);
225
226
/** One-step ahead prediction */
227
public double predict(double[] history);
228
}
229
230
/**
231
* ARMA model combining autoregressive and moving average
232
*/
233
class ARMA implements Serializable {
234
/** Fit ARMA model */
235
public static ARMA fit(double[] data, int p, int q);
236
237
/** Fit with custom initialization */
238
public static ARMA fit(double[] data, int p, int q, boolean includeIntercept);
239
240
/** Get AR coefficients */
241
public double[] arCoefficients();
242
243
/** Get MA coefficients */
244
public double[] maCoefficients();
245
246
/** Get intercept term */
247
public double intercept();
248
249
/** Forecast future values */
250
public double[] forecast(int steps);
251
252
/** Calculate residuals */
253
public double[] residuals();
254
255
/** Get AIC (Akaike Information Criterion) */
256
public double aic();
257
}
258
259
/**
260
* Box test for time series diagnostics
261
*/
262
class BoxTest {
263
/** Test types */
264
enum Type { LJUNG_BOX, BOX_PIERCE }
265
266
/** Perform Ljung-Box test */
267
public static BoxTest ljungBox(double[] residuals, int lags);
268
269
/** Perform Box-Pierce test */
270
public static BoxTest boxPierce(double[] residuals, int lags);
271
272
/** Test statistic */
273
public final double statistic;
274
275
/** P-value */
276
public final double pvalue;
277
278
/** Degrees of freedom */
279
public final int df;
280
}
281
```
282
283
### Sequence Modeling
284
285
Algorithms for labeling and analyzing sequential data.
286
287
```java { .api }
288
/**
289
* Base interface for sequence labeling
290
* @param <T> the type of sequence elements
291
*/
292
interface SequenceLabeler<T> {
293
/** Predict labels for sequence */
294
int[] predict(T[] sequence);
295
296
/** Get label vocabulary */
297
default String[] labels();
298
}
299
300
/**
301
* Hidden Markov Model for sequence analysis
302
*/
303
class HMM implements Serializable {
304
/** Train HMM from observation sequences */
305
public static HMM fit(int[][] observations, int numStates);
306
307
/** Train with known state sequences */
308
public static HMM fit(int[][] observations, int[][] states, int numStates, int numSymbols);
309
310
/** Predict most likely state sequence (Viterbi) */
311
public int[] predict(int[] observations);
312
313
/** Calculate sequence probability (forward algorithm) */
314
public double probability(int[] observations);
315
316
/** Get transition probabilities */
317
public double[][] transitionProbabilities();
318
319
/** Get emission probabilities */
320
public double[][] emissionProbabilities();
321
322
/** Get initial state probabilities */
323
public double[] initialProbabilities();
324
}
325
326
/**
327
* HMM-based sequence labeler
328
* @param <T> the type of sequence elements
329
*/
330
class HMMLabeler<T> implements SequenceLabeler<T> {
331
/** Train HMM labeler */
332
public static <T> HMMLabeler<T> fit(T[][] sequences, int[][] labels, Function<T, Integer> encoder);
333
334
/** Predict labels for sequence */
335
public int[] predict(T[] sequence);
336
337
/** Get underlying HMM */
338
public HMM hmm();
339
}
340
341
/**
342
* Conditional Random Field for sequence labeling
343
*/
344
class CRF implements Serializable {
345
/** Train CRF from feature sequences and labels */
346
public static CRF fit(double[][][] features, int[][] labels);
347
348
/** Train with regularization */
349
public static CRF fit(double[][][] features, int[][] labels, double lambda);
350
351
/** Predict label sequence */
352
public int[] predict(double[][] features);
353
354
/** Calculate sequence probability */
355
public double probability(double[][] features, int[] labels);
356
357
/** Get feature weights */
358
public double[] weights();
359
360
/** Get number of labels */
361
public int numLabels();
362
}
363
364
/**
365
* CRF-based sequence labeler
366
* @param <T> the type of sequence elements
367
*/
368
class CRFLabeler<T> implements SequenceLabeler<T> {
369
/** Train CRF labeler with feature extractor */
370
public static <T> CRFLabeler<T> fit(T[][] sequences, int[][] labels, Function<T[], double[][]> featureExtractor);
371
372
/** Predict labels for sequence */
373
public int[] predict(T[] sequence);
374
375
/** Get underlying CRF */
376
public CRF crf();
377
}
378
379
/**
380
* Trellis for dynamic programming in sequence algorithms
381
*/
382
class Trellis {
383
/** Create trellis for sequence length and states */
384
public static Trellis of(int length, int states);
385
386
/** Forward algorithm for HMM */
387
public double forward(HMM hmm, int[] observations);
388
389
/** Backward algorithm for HMM */
390
public double backward(HMM hmm, int[] observations);
391
392
/** Viterbi algorithm for best path */
393
public int[] viterbi(HMM hmm, int[] observations);
394
}
395
```
396
397
### Association Rule Mining
398
399
Algorithms for discovering frequent patterns and association rules in transactional data.
400
401
```java { .api }
402
/**
403
* Association Rule Mining implementing Iterable<AssociationRule>
404
*/
405
class ARM implements Iterable<AssociationRule>, Serializable {
406
/** Mine association rules from transactions */
407
public static ARM fit(int[][] transactions, double minSupport, double minConfidence);
408
409
/** Mine with additional constraints */
410
public static ARM fit(int[][] transactions, double minSupport, double minConfidence, int maxRuleLength);
411
412
/** Iterate over discovered rules */
413
public Iterator<AssociationRule> iterator();
414
415
/** Get number of rules */
416
public int size();
417
418
/** Get all rules as array */
419
public AssociationRule[] rules();
420
}
421
422
/**
423
* Association rule representation
424
*/
425
class AssociationRule implements Serializable {
426
/** Rule antecedent (if part) */
427
public final int[] antecedent;
428
429
/** Rule consequent (then part) */
430
public final int[] consequent;
431
432
/** Rule support (frequency) */
433
public final double support;
434
435
/** Rule confidence */
436
public final double confidence;
437
438
/** Rule lift */
439
public final double lift;
440
441
/** Rule conviction */
442
public final double conviction;
443
444
/** Convert to string representation */
445
public String toString();
446
}
447
448
/**
449
* FP-Growth algorithm for frequent pattern mining
450
*/
451
class FPGrowth implements Iterable<ItemSet>, Serializable {
452
/** Mine frequent patterns */
453
public static FPGrowth fit(int[][] transactions, double minSupport);
454
455
/** Mine with minimum pattern length */
456
public static FPGrowth fit(int[][] transactions, double minSupport, int minLength);
457
458
/** Iterate over frequent itemsets */
459
public Iterator<ItemSet> iterator();
460
461
/** Get number of frequent itemsets */
462
public int size();
463
464
/** Get all itemsets as array */
465
public ItemSet[] itemsets();
466
}
467
468
/**
469
* Frequent Pattern Tree for FP-Growth
470
*/
471
class FPTree implements Serializable {
472
/** Build FP-tree from transactions */
473
public static FPTree of(int[][] transactions, double minSupport);
474
475
/** Add transaction to tree */
476
public void add(int[] transaction);
477
478
/** Mine patterns from tree */
479
public ItemSet[] mine(double minSupport);
480
481
/** Get header table */
482
public Map<Integer, Integer> headerTable();
483
}
484
485
/**
486
* Item set representation
487
*/
488
class ItemSet implements Serializable {
489
/** Items in the set */
490
public final int[] items;
491
492
/** Support count */
493
public final int support;
494
495
/** Support frequency */
496
public final double frequency;
497
498
/** Get itemset size */
499
public int size();
500
501
/** Check if contains item */
502
public boolean contains(int item);
503
}
504
505
/**
506
* Total Support Tree for association mining
507
*/
508
class TotalSupportTree implements Serializable {
509
/** Build total support tree */
510
public static TotalSupportTree of(int[][] transactions);
511
512
/** Add transaction */
513
public void add(int[] transaction);
514
515
/** Get total support for itemset */
516
public int support(int[] itemset);
517
}
518
```
519
520
### Anomaly Detection
521
522
Algorithms for identifying outliers and anomalous patterns in data.
523
524
```java { .api }
525
/**
526
* Isolation Forest for anomaly detection
527
*/
528
class IsolationForest implements Serializable {
529
/** Train isolation forest */
530
public static IsolationForest fit(double[][] data);
531
532
/** Train with custom parameters */
533
public static IsolationForest fit(double[][] data, int numTrees, int subsampleSize);
534
535
/** Calculate anomaly score (higher = more anomalous) */
536
public double score(double[] x);
537
538
/** Predict if sample is anomaly */
539
public boolean predict(double[] x, double threshold);
540
541
/** Get isolation trees */
542
public IsolationTree[] trees();
543
544
/** Calculate average path length for normalization */
545
public double averagePathLength(int n);
546
}
547
548
/**
549
* Individual isolation tree
550
*/
551
class IsolationTree implements Serializable {
552
/** Build isolation tree from data */
553
public static IsolationTree fit(double[][] data, int maxDepth);
554
555
/** Calculate path length for sample */
556
public double pathLength(double[] x);
557
558
/** Get tree height */
559
public int height();
560
561
/** Get number of leaves */
562
public int leaves();
563
}
564
565
/**
566
* One-class SVM for anomaly detection
567
*/
568
class SVM {
569
/** Train one-class SVM */
570
public static SVM fit(double[][] data, double nu);
571
572
/** Train with RBF kernel */
573
public static SVM fit(double[][] data, double nu, double gamma);
574
575
/** Predict if sample is normal (1) or anomaly (-1) */
576
public int predict(double[] x);
577
578
/** Calculate decision function value */
579
public double score(double[] x);
580
581
/** Get support vectors */
582
public double[][] supportVectors();
583
}
584
```
585
586
### Vector Quantization
587
588
Self-organizing algorithms for data compression and visualization.
589
590
```java { .api }
591
/**
592
* Base vector quantizer interface
593
*/
594
interface VectorQuantizer {
595
/** Quantize input vector to nearest prototype */
596
int quantize(double[] x);
597
598
/** Get prototype vectors */
599
double[][] prototypes();
600
601
/** Get quantization error */
602
double quantizationError(double[][] data);
603
}
604
605
/**
606
* Self-Organizing Map for vector quantization
607
*/
608
class SOM implements VectorQuantizer, Serializable {
609
/** Train SOM with rectangular grid */
610
public static SOM fit(double[][] data, int width, int height);
611
612
/** Train with custom parameters */
613
public static SOM fit(double[][] data, int width, int height, double learningRate, int epochs);
614
615
/** Quantize vector to best matching unit */
616
public int quantize(double[] x);
617
618
/** Get prototype at grid position */
619
public double[] prototype(int x, int y);
620
621
/** Get all prototypes */
622
public double[][] prototypes();
623
624
/** Get grid dimensions */
625
public int[] dimensions();
626
627
/** Calculate topographic error */
628
public double topographicError(double[][] data);
629
}
630
631
/**
632
* Neural Gas algorithm
633
*/
634
class NeuralGas implements VectorQuantizer, Serializable {
635
/** Train Neural Gas */
636
public static NeuralGas fit(double[][] data, int numPrototypes);
637
638
/** Train with custom parameters */
639
public static NeuralGas fit(double[][] data, int numPrototypes, double learningRate, int epochs);
640
641
/** Quantize vector */
642
public int quantize(double[] x);
643
644
/** Get prototypes */
645
public double[][] prototypes();
646
647
/** Get prototype ages */
648
public int[] ages();
649
}
650
651
/**
652
* Growing Neural Gas with dynamic topology
653
*/
654
class GrowingNeuralGas implements VectorQuantizer, Serializable {
655
/** Train Growing Neural Gas */
656
public static GrowingNeuralGas fit(double[][] data, int maxNodes);
657
658
/** Quantize vector */
659
public int quantize(double[] x);
660
661
/** Get current prototypes */
662
public double[][] prototypes();
663
664
/** Get topology edges */
665
public int[][] edges();
666
667
/** Get number of nodes */
668
public int size();
669
}
670
```
671
672
### Hyperparameter Optimization
673
674
Tools for optimizing machine learning model hyperparameters.
675
676
```java { .api }
677
/**
678
* Hyperparameter optimization utilities
679
*/
680
class Hyperparameters {
681
/** Grid search over parameter combinations */
682
public static <T> T grid(Function<Map<String, Object>, T> trainer,
683
Map<String, Object[]> paramGrid,
684
Function<T, Double> evaluator);
685
686
/** Random search over parameter distributions */
687
public static <T> T random(Function<Map<String, Object>, T> trainer,
688
Map<String, Distribution> paramDist,
689
int nIter,
690
Function<T, Double> evaluator);
691
692
/** Bayesian optimization using Gaussian processes */
693
public static <T> T bayesian(Function<Map<String, Object>, T> trainer,
694
Map<String, Double[]> bounds,
695
int nIter,
696
Function<T, Double> evaluator);
697
698
/** Tree-structured Parzen Estimator optimization */
699
public static <T> T tpe(Function<Map<String, Object>, T> trainer,
700
Map<String, Distribution> space,
701
int nIter,
702
Function<T, Double> evaluator);
703
}
704
```
705
706
**Comprehensive Usage Example:**
707
708
```java
709
import smile.manifold.TSNE;
710
import smile.timeseries.AR;
711
import smile.association.ARM;
712
import smile.anomaly.IsolationForest;
713
714
// Manifold learning for visualization
715
TSNE tsne = TSNE.fit(highDimData, 2, 30.0, 200.0, 1000);
716
double[][] visualization = tsne.coordinates;
717
718
// Time series forecasting
719
AR arModel = AR.fit(timeSeries, 5); // AR(5) model
720
double[] forecast = arModel.forecast(10); // 10-step forecast
721
722
// Association rule mining
723
ARM arm = ARM.fit(transactions, 0.01, 0.5); // 1% support, 50% confidence
724
for (AssociationRule rule : arm) {
725
System.out.println(rule.toString() + " (lift: " + rule.lift + ")");
726
}
727
728
// Anomaly detection
729
IsolationForest iforest = IsolationForest.fit(normalData, 100, 256);
730
for (double[] sample : testData) {
731
double score = iforest.score(sample);
732
boolean isAnomaly = iforest.predict(sample, 0.1); // 10% anomaly threshold
733
System.out.println("Sample score: " + score + ", Anomaly: " + isAnomaly);
734
}
735
736
// Sequence labeling with HMM
737
HMM hmm = HMM.fit(observationSequences, 5); // 5 hidden states
738
int[] predictedStates = hmm.predict(newObservations);
739
```
740
741
### Advanced Analytics Integration
742
743
These advanced analytics capabilities integrate seamlessly with Smile's core machine learning framework:
744
745
- **Preprocessing**: Use manifold learning for dimensionality reduction before classification
746
- **Feature Engineering**: Extract time series features for predictive modeling
747
- **Pattern Discovery**: Mine association rules to understand data relationships
748
- **Quality Control**: Apply anomaly detection for data cleaning and monitoring
749
- **Evaluation**: Use sequence modeling metrics for temporal prediction tasks
750
- **Optimization**: Apply hyperparameter tuning to all model types