CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-com-github-haifengl--smile-core

Statistical Machine Intelligence and Learning Engine providing comprehensive machine learning algorithms for classification, regression, clustering, and feature engineering in Java

Pending
Overview
Eval results
Files

advanced-analytics.mddocs/

Advanced Analytics

Specialized algorithms for manifold learning, time series analysis, sequence modeling, association rule mining, anomaly detection, and other advanced machine learning tasks. Smile Core provides sophisticated tools for complex analytical scenarios.

Capabilities

Manifold Learning

Algorithms for discovering low-dimensional structure in high-dimensional data.

/**
 * Multi-Dimensional Scaling for manifold learning
 */
class MDS implements Serializable {
    /** Perform classical MDS */
    public static MDS fit(double[][] proximity);
    
    /** Perform MDS with specified dimensions */
    public static MDS fit(double[][] proximity, int k);
    
    /** Perform metric MDS with stress minimization */
    public static MDS fit(double[][] proximity, int k, boolean metric);
    
    /** Get embedded coordinates */
    public final double[][] coordinates;
    
    /** Get eigenvalues */
    public final double[] eigenvalues;
    
    /** Get stress value */
    public final double stress;
}

/**
 * Isotonic MDS with monotonic distance constraints
 */
class IsotonicMDS implements Serializable {
    /** Perform isotonic MDS */
    public static IsotonicMDS fit(double[][] proximity, int k);
    
    /** Get embedded coordinates */
    public final double[][] coordinates;
    
    /** Get stress value */
    public final double stress;
}

/**
 * Sammon's mapping for non-linear dimensionality reduction
 */
class SammonMapping implements Serializable {
    /** Perform Sammon mapping */
    public static SammonMapping fit(double[][] data, int k);
    
    /** Perform with custom parameters */
    public static SammonMapping fit(double[][] data, int k, double learningRate, int maxIter);
    
    /** Get embedded coordinates */
    public final double[][] coordinates;
    
    /** Get final stress */
    public final double stress;
}

/**
 * Isomap for non-linear manifold learning
 */
class IsoMap implements Serializable {
    /** Perform Isomap with k-nearest neighbors */
    public static IsoMap fit(double[][] data, int k, int d);
    
    /** Perform with epsilon neighborhood */
    public static IsoMap fit(double[][] data, double epsilon, int d);
    
    /** Get embedded coordinates */
    public final double[][] coordinates;
    
    /** Get geodesic distances */
    public final double[][] distances;
}

/**
 * Locally Linear Embedding
 */
class LLE implements Serializable {
    /** Perform LLE with k neighbors */
    public static LLE fit(double[][] data, int k, int d);
    
    /** Get embedded coordinates */
    public final double[][] coordinates;
    
    /** Get reconstruction weights */
    public final double[][] weights;
}

/**
 * Laplacian Eigenmap for spectral manifold learning
 */
class LaplacianEigenmap implements Serializable {
    /** Perform Laplacian Eigenmap */
    public static LaplacianEigenmap fit(double[][] data, int k, int d);
    
    /** Perform with RBF kernel */
    public static LaplacianEigenmap fit(double[][] data, int k, int d, double sigma);
    
    /** Get embedded coordinates */
    public final double[][] coordinates;
    
    /** Get eigenvalues */
    public final double[] eigenvalues;
}

/**
 * t-SNE for visualization and non-linear embedding
 */
class TSNE implements Serializable {
    /** Perform t-SNE with default parameters */
    public static TSNE fit(double[][] data);
    
    /** Perform t-SNE with custom parameters */
    public static TSNE fit(double[][] data, int d, double perplexity, double learningRate, int maxIter);
    
    /** Get embedded coordinates */
    public final double[][] coordinates;
    
    /** Get final KL divergence */
    public final double klDivergence;
}

/**
 * UMAP for uniform manifold approximation
 */
class UMAP implements Serializable {
    /** Perform UMAP with default parameters */
    public static UMAP fit(double[][] data);
    
    /** Perform UMAP with custom parameters */
    public static UMAP fit(double[][] data, int d, int nNeighbors, double minDist, int nEpochs);
    
    /** Get embedded coordinates */
    public final double[][] coordinates;
    
    /** Transform new data points */
    public double[] transform(double[] x);
}

Usage Example:

import smile.manifold.*;

// t-SNE for visualization
TSNE tsne = TSNE.fit(highDimData, 2, 30.0, 200.0, 1000);
double[][] embedding = tsne.coordinates;

// UMAP for general manifold learning
UMAP umap = UMAP.fit(data, 10, 15, 0.1, 200);
double[][] reducedData = umap.coordinates;
double[] newPoint = umap.transform(testSample);

// Isomap for geodesic distances
IsoMap isomap = IsoMap.fit(data, 10, 5); // 10 neighbors, 5 dimensions
double[][] manifoldCoords = isomap.coordinates;

Time Series Analysis

Algorithms for analyzing temporal data patterns and forecasting.

/**
 * Time series utilities and analysis tools
 */
class TimeSeries {
    /** Calculate autocorrelation function */
    public static double[] autocorrelation(double[] data);
    
    /** Calculate autocorrelation with max lag */
    public static double[] autocorrelation(double[] data, int maxLag);
    
    /** Calculate cross-correlation between two series */
    public static double[] crosscorrelation(double[] x, double[] y);
    
    /** Calculate partial autocorrelation function */
    public static double[] pacf(double[] data, int maxLag);
    
    /** Differencing for stationarity */
    public static double[] difference(double[] data);
    
    /** Seasonal differencing */
    public static double[] seasonalDifference(double[] data, int period);
    
    /** Moving average smoothing */
    public static double[] movingAverage(double[] data, int window);
    
    /** Exponential smoothing */
    public static double[] exponentialSmoothing(double[] data, double alpha);
}

/**
 * Autoregressive model for time series forecasting
 */
class AR implements Serializable {
    /** Estimation methods */
    enum Method { BURG, OLS, MLE }
    
    /** Fit AR model using Burg method */
    public static AR fit(double[] data, int p);
    
    /** Fit AR model with specified method */
    public static AR fit(double[] data, int p, Method method);
    
    /** Get AR coefficients */
    public double[] coefficients();
    
    /** Get model order */
    public int order();
    
    /** Get white noise variance */
    public double variance();
    
    /** Forecast future values */
    public double[] forecast(int steps);
    
    /** One-step ahead prediction */
    public double predict(double[] history);
}

/**
 * ARMA model combining autoregressive and moving average
 */
class ARMA implements Serializable {
    /** Fit ARMA model */
    public static ARMA fit(double[] data, int p, int q);
    
    /** Fit with custom initialization */
    public static ARMA fit(double[] data, int p, int q, boolean includeIntercept);
    
    /** Get AR coefficients */
    public double[] arCoefficients();
    
    /** Get MA coefficients */
    public double[] maCoefficients();
    
    /** Get intercept term */
    public double intercept();
    
    /** Forecast future values */
    public double[] forecast(int steps);
    
    /** Calculate residuals */
    public double[] residuals();
    
    /** Get AIC (Akaike Information Criterion) */
    public double aic();
}

/**
 * Box test for time series diagnostics
 */
class BoxTest {
    /** Test types */
    enum Type { LJUNG_BOX, BOX_PIERCE }
    
    /** Perform Ljung-Box test */
    public static BoxTest ljungBox(double[] residuals, int lags);
    
    /** Perform Box-Pierce test */
    public static BoxTest boxPierce(double[] residuals, int lags);
    
    /** Test statistic */
    public final double statistic;
    
    /** P-value */
    public final double pvalue;
    
    /** Degrees of freedom */
    public final int df;
}

Sequence Modeling

Algorithms for labeling and analyzing sequential data.

/**
 * Base interface for sequence labeling
 * @param <T> the type of sequence elements
 */
interface SequenceLabeler<T> {
    /** Predict labels for sequence */
    int[] predict(T[] sequence);
    
    /** Get label vocabulary */
    default String[] labels();
}

/**
 * Hidden Markov Model for sequence analysis
 */
class HMM implements Serializable {
    /** Train HMM from observation sequences */
    public static HMM fit(int[][] observations, int numStates);
    
    /** Train with known state sequences */
    public static HMM fit(int[][] observations, int[][] states, int numStates, int numSymbols);
    
    /** Predict most likely state sequence (Viterbi) */
    public int[] predict(int[] observations);
    
    /** Calculate sequence probability (forward algorithm) */
    public double probability(int[] observations);
    
    /** Get transition probabilities */
    public double[][] transitionProbabilities();
    
    /** Get emission probabilities */
    public double[][] emissionProbabilities();
    
    /** Get initial state probabilities */
    public double[] initialProbabilities();
}

/**
 * HMM-based sequence labeler
 * @param <T> the type of sequence elements
 */
class HMMLabeler<T> implements SequenceLabeler<T> {
    /** Train HMM labeler */
    public static <T> HMMLabeler<T> fit(T[][] sequences, int[][] labels, Function<T, Integer> encoder);
    
    /** Predict labels for sequence */
    public int[] predict(T[] sequence);
    
    /** Get underlying HMM */
    public HMM hmm();
}

/**
 * Conditional Random Field for sequence labeling
 */
class CRF implements Serializable {
    /** Train CRF from feature sequences and labels */
    public static CRF fit(double[][][] features, int[][] labels);
    
    /** Train with regularization */
    public static CRF fit(double[][][] features, int[][] labels, double lambda);
    
    /** Predict label sequence */
    public int[] predict(double[][] features);
    
    /** Calculate sequence probability */
    public double probability(double[][] features, int[] labels);
    
    /** Get feature weights */
    public double[] weights();
    
    /** Get number of labels */
    public int numLabels();
}

/**
 * CRF-based sequence labeler
 * @param <T> the type of sequence elements
 */
class CRFLabeler<T> implements SequenceLabeler<T> {
    /** Train CRF labeler with feature extractor */
    public static <T> CRFLabeler<T> fit(T[][] sequences, int[][] labels, Function<T[], double[][]> featureExtractor);
    
    /** Predict labels for sequence */
    public int[] predict(T[] sequence);
    
    /** Get underlying CRF */
    public CRF crf();
}

/**
 * Trellis for dynamic programming in sequence algorithms
 */
class Trellis {
    /** Create trellis for sequence length and states */
    public static Trellis of(int length, int states);
    
    /** Forward algorithm for HMM */
    public double forward(HMM hmm, int[] observations);
    
    /** Backward algorithm for HMM */
    public double backward(HMM hmm, int[] observations);
    
    /** Viterbi algorithm for best path */
    public int[] viterbi(HMM hmm, int[] observations);
}

Association Rule Mining

Algorithms for discovering frequent patterns and association rules in transactional data.

/**
 * Association Rule Mining implementing Iterable<AssociationRule>
 */
class ARM implements Iterable<AssociationRule>, Serializable {
    /** Mine association rules from transactions */
    public static ARM fit(int[][] transactions, double minSupport, double minConfidence);
    
    /** Mine with additional constraints */
    public static ARM fit(int[][] transactions, double minSupport, double minConfidence, int maxRuleLength);
    
    /** Iterate over discovered rules */
    public Iterator<AssociationRule> iterator();
    
    /** Get number of rules */
    public int size();
    
    /** Get all rules as array */
    public AssociationRule[] rules();
}

/**
 * Association rule representation
 */
class AssociationRule implements Serializable {
    /** Rule antecedent (if part) */
    public final int[] antecedent;
    
    /** Rule consequent (then part) */
    public final int[] consequent;
    
    /** Rule support (frequency) */
    public final double support;
    
    /** Rule confidence */
    public final double confidence;
    
    /** Rule lift */
    public final double lift;
    
    /** Rule conviction */
    public final double conviction;
    
    /** Convert to string representation */
    public String toString();
}

/**
 * FP-Growth algorithm for frequent pattern mining
 */
class FPGrowth implements Iterable<ItemSet>, Serializable {
    /** Mine frequent patterns */
    public static FPGrowth fit(int[][] transactions, double minSupport);
    
    /** Mine with minimum pattern length */
    public static FPGrowth fit(int[][] transactions, double minSupport, int minLength);
    
    /** Iterate over frequent itemsets */
    public Iterator<ItemSet> iterator();
    
    /** Get number of frequent itemsets */
    public int size();
    
    /** Get all itemsets as array */
    public ItemSet[] itemsets();
}

/**
 * Frequent Pattern Tree for FP-Growth
 */
class FPTree implements Serializable {
    /** Build FP-tree from transactions */
    public static FPTree of(int[][] transactions, double minSupport);
    
    /** Add transaction to tree */
    public void add(int[] transaction);
    
    /** Mine patterns from tree */
    public ItemSet[] mine(double minSupport);
    
    /** Get header table */
    public Map<Integer, Integer> headerTable();
}

/**
 * Item set representation
 */
class ItemSet implements Serializable {
    /** Items in the set */
    public final int[] items;
    
    /** Support count */
    public final int support;
    
    /** Support frequency */
    public final double frequency;
    
    /** Get itemset size */
    public int size();
    
    /** Check if contains item */
    public boolean contains(int item);
}

/**
 * Total Support Tree for association mining
 */
class TotalSupportTree implements Serializable {
    /** Build total support tree */
    public static TotalSupportTree of(int[][] transactions);
    
    /** Add transaction */
    public void add(int[] transaction);
    
    /** Get total support for itemset */
    public int support(int[] itemset);
}

Anomaly Detection

Algorithms for identifying outliers and anomalous patterns in data.

/**
 * Isolation Forest for anomaly detection
 */
class IsolationForest implements Serializable {
    /** Train isolation forest */
    public static IsolationForest fit(double[][] data);
    
    /** Train with custom parameters */
    public static IsolationForest fit(double[][] data, int numTrees, int subsampleSize);
    
    /** Calculate anomaly score (higher = more anomalous) */
    public double score(double[] x);
    
    /** Predict if sample is anomaly */
    public boolean predict(double[] x, double threshold);
    
    /** Get isolation trees */
    public IsolationTree[] trees();
    
    /** Calculate average path length for normalization */
    public double averagePathLength(int n);
}

/**
 * Individual isolation tree
 */
class IsolationTree implements Serializable {
    /** Build isolation tree from data */
    public static IsolationTree fit(double[][] data, int maxDepth);
    
    /** Calculate path length for sample */
    public double pathLength(double[] x);
    
    /** Get tree height */
    public int height();
    
    /** Get number of leaves */
    public int leaves();
}

/**
 * One-class SVM for anomaly detection
 */
class SVM {
    /** Train one-class SVM */
    public static SVM fit(double[][] data, double nu);
    
    /** Train with RBF kernel */
    public static SVM fit(double[][] data, double nu, double gamma);
    
    /** Predict if sample is normal (1) or anomaly (-1) */
    public int predict(double[] x);
    
    /** Calculate decision function value */
    public double score(double[] x);
    
    /** Get support vectors */
    public double[][] supportVectors();
}

Vector Quantization

Self-organizing algorithms for data compression and visualization.

/**
 * Base vector quantizer interface
 */
interface VectorQuantizer {
    /** Quantize input vector to nearest prototype */
    int quantize(double[] x);
    
    /** Get prototype vectors */
    double[][] prototypes();
    
    /** Get quantization error */
    double quantizationError(double[][] data);
}

/**
 * Self-Organizing Map for vector quantization
 */
class SOM implements VectorQuantizer, Serializable {
    /** Train SOM with rectangular grid */
    public static SOM fit(double[][] data, int width, int height);
    
    /** Train with custom parameters */
    public static SOM fit(double[][] data, int width, int height, double learningRate, int epochs);
    
    /** Quantize vector to best matching unit */
    public int quantize(double[] x);
    
    /** Get prototype at grid position */
    public double[] prototype(int x, int y);
    
    /** Get all prototypes */
    public double[][] prototypes();
    
    /** Get grid dimensions */
    public int[] dimensions();
    
    /** Calculate topographic error */
    public double topographicError(double[][] data);
}

/**
 * Neural Gas algorithm
 */
class NeuralGas implements VectorQuantizer, Serializable {
    /** Train Neural Gas */
    public static NeuralGas fit(double[][] data, int numPrototypes);
    
    /** Train with custom parameters */
    public static NeuralGas fit(double[][] data, int numPrototypes, double learningRate, int epochs);
    
    /** Quantize vector */
    public int quantize(double[] x);
    
    /** Get prototypes */
    public double[][] prototypes();
    
    /** Get prototype ages */
    public int[] ages();
}

/**
 * Growing Neural Gas with dynamic topology
 */
class GrowingNeuralGas implements VectorQuantizer, Serializable {
    /** Train Growing Neural Gas */
    public static GrowingNeuralGas fit(double[][] data, int maxNodes);
    
    /** Quantize vector */
    public int quantize(double[] x);
    
    /** Get current prototypes */
    public double[][] prototypes();
    
    /** Get topology edges */
    public int[][] edges();
    
    /** Get number of nodes */
    public int size();
}

Hyperparameter Optimization

Tools for optimizing machine learning model hyperparameters.

/**
 * Hyperparameter optimization utilities
 */
class Hyperparameters {
    /** Grid search over parameter combinations */
    public static <T> T grid(Function<Map<String, Object>, T> trainer, 
                            Map<String, Object[]> paramGrid,
                            Function<T, Double> evaluator);
    
    /** Random search over parameter distributions */
    public static <T> T random(Function<Map<String, Object>, T> trainer,
                              Map<String, Distribution> paramDist,
                              int nIter,
                              Function<T, Double> evaluator);
    
    /** Bayesian optimization using Gaussian processes */
    public static <T> T bayesian(Function<Map<String, Object>, T> trainer,
                                Map<String, Double[]> bounds,
                                int nIter,
                                Function<T, Double> evaluator);
    
    /** Tree-structured Parzen Estimator optimization */
    public static <T> T tpe(Function<Map<String, Object>, T> trainer,
                           Map<String, Distribution> space,
                           int nIter,
                           Function<T, Double> evaluator);
}

Comprehensive Usage Example:

import smile.manifold.TSNE;
import smile.timeseries.AR;
import smile.association.ARM;
import smile.anomaly.IsolationForest;

// Manifold learning for visualization
TSNE tsne = TSNE.fit(highDimData, 2, 30.0, 200.0, 1000);
double[][] visualization = tsne.coordinates;

// Time series forecasting
AR arModel = AR.fit(timeSeries, 5); // AR(5) model
double[] forecast = arModel.forecast(10); // 10-step forecast

// Association rule mining
ARM arm = ARM.fit(transactions, 0.01, 0.5); // 1% support, 50% confidence
for (AssociationRule rule : arm) {
    System.out.println(rule.toString() + " (lift: " + rule.lift + ")");
}

// Anomaly detection
IsolationForest iforest = IsolationForest.fit(normalData, 100, 256);
for (double[] sample : testData) {
    double score = iforest.score(sample);
    boolean isAnomaly = iforest.predict(sample, 0.1); // 10% anomaly threshold
    System.out.println("Sample score: " + score + ", Anomaly: " + isAnomaly);
}

// Sequence labeling with HMM
HMM hmm = HMM.fit(observationSequences, 5); // 5 hidden states
int[] predictedStates = hmm.predict(newObservations);

Advanced Analytics Integration

These advanced analytics capabilities integrate seamlessly with Smile's core machine learning framework:

  • Preprocessing: Use manifold learning for dimensionality reduction before classification
  • Feature Engineering: Extract time series features for predictive modeling
  • Pattern Discovery: Mine association rules to understand data relationships
  • Quality Control: Apply anomaly detection for data cleaning and monitoring
  • Evaluation: Use sequence modeling metrics for temporal prediction tasks
  • Optimization: Apply hyperparameter tuning to all model types

Install with Tessl CLI

npx tessl i tessl/maven-com-github-haifengl--smile-core

docs

advanced-analytics.md

classification.md

clustering.md

deep-learning.md

feature-engineering.md

index.md

regression.md

validation-metrics.md

tile.json