CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-com-github-haifengl--smile-core

Statistical Machine Intelligence and Learning Engine providing comprehensive machine learning algorithms for classification, regression, clustering, and feature engineering in Java

Pending
Overview
Eval results
Files

deep-learning.mddocs/

Deep Learning

Neural network components including multi-layer perceptrons, activation functions, optimization algorithms, and neural network building utilities. Smile Core provides foundational deep learning capabilities for classification and regression tasks.

Capabilities

Multi-Layer Perceptron

Core neural network implementation with configurable architecture and training algorithms.

/**
 * Base multi-layer perceptron for deep learning
 */
abstract class MultilayerPerceptron implements Classifier<double[]>, Serializable {
    /** Predict class label */
    public abstract int predict(double[] x);
    
    /** Predict with class probabilities */
    public abstract int predict(double[] x, double[] posteriori);
    
    /** Online learning update */
    public abstract void update(double[] x, int y);
    
    /** Get network architecture */
    public abstract int[] architecture();
    
    /** Get activation function for layer */
    public abstract ActivationFunction activation(int layer);
    
    /** Get network weights for specific layer */
    public abstract double[][] getWeights(int layer);
    
    /** Set learning rate */
    public abstract void setLearningRate(double rate);
}

/**
 * MLP for classification tasks
 */
class MLP implements Classifier<double[]> {
    /** Train MLP classifier with default architecture */
    public static MLP fit(double[][] x, int[] y);
    
    /** Train with custom hidden layer configuration */
    public static MLP fit(double[][] x, int[] y, int[] hiddenLayers);
    
    /** Train with full configuration */
    public static MLP fit(double[][] x, int[] y, Properties params);
    
    /** Train with builder pattern */
    public static MLP fit(double[][] x, int[] y, Consumer<MLPBuilder> config);
    
    /** Predict class label */
    public int predict(double[] x);
    
    /** Predict with probabilities */
    public int predict(double[] x, double[] posteriori);
    
    /** Online learning update */
    public void update(double[] x, int y);
    
    /** Batch training update */
    public void update(double[][] x, int[] y);
    
    /** Get training error */
    public double error();
    
    /** Get network weights */
    public double[][][] weights();
}

Usage Example:

import smile.classification.MLP;
import smile.base.mlp.*;

// Basic MLP with default architecture
MLP mlp = MLP.fit(trainX, trainY);

// Custom architecture: input -> 100 -> 50 -> output
MLP customMLP = MLP.fit(trainX, trainY, new int[]{100, 50});

// Advanced configuration
MLP advancedMLP = MLP.fit(trainX, trainY, builder -> builder
    .layer(new HiddenLayerBuilder(100, ActivationFunction.ReLU))
    .layer(new HiddenLayerBuilder(50, ActivationFunction.ReLU))
    .outputLayer(OutputFunction.SOFTMAX)
    .learningRate(0.01)
    .momentum(0.9)
);

// Make predictions
int prediction = mlp.predict(testSample);
double[] probabilities = new double[numClasses];
int predicted = mlp.predict(testSample, probabilities);

Neural Network Layers

Building blocks for constructing neural network architectures.

/**
 * Base neural network layer
 */
abstract class Layer implements Serializable {
    /** Number of neurons in layer */
    public final int n;
    
    /** Forward propagation through layer */
    public abstract void forward(double[] input);
    
    /** Backward propagation through layer */
    public abstract void backward(double[] error);
    
    /** Update layer weights */
    public abstract void update(double learningRate);
    
    /** Get layer output */
    public abstract double[] output();
    
    /** Get layer weights */
    public abstract double[][] weights();
}

/**
 * Input layer for neural networks
 */
class InputLayer extends Layer {
    /** Create input layer with specified dimension */
    public InputLayer(int dimension);
    
    /** Forward pass (identity) */
    public void forward(double[] input);
    
    /** Get output (same as input) */
    public double[] output();
}

/**
 * Hidden layer with activation function
 */
class HiddenLayer extends Layer {
    /** Create hidden layer */
    public HiddenLayer(int neurons, ActivationFunction activation);
    
    /** Create with custom weight initialization */
    public HiddenLayer(int neurons, ActivationFunction activation, double weightRange);
    
    /** Forward propagation */
    public void forward(double[] input);
    
    /** Backward propagation */
    public void backward(double[] error);
    
    /** Update weights using gradient descent */
    public void update(double learningRate);
    
    /** Update with momentum */
    public void update(double learningRate, double momentum);
    
    /** Get activation function */
    public ActivationFunction activation();
}

/**
 * Output layer for final predictions
 */
class OutputLayer extends Layer {
    /** Create output layer for classification */
    public OutputLayer(int classes, OutputFunction function);
    
    /** Create output layer for regression */
    public OutputLayer(OutputFunction function);
    
    /** Forward propagation */
    public void forward(double[] input);
    
    /** Backward propagation */
    public void backward(double[] target);
    
    /** Calculate loss for training sample */
    public double loss(double[] target);
    
    /** Get output function */
    public OutputFunction outputFunction();
}

Layer Builders

Builder pattern for constructing neural network layers.

/**
 * Abstract base for layer builders
 */
abstract class LayerBuilder {
    /** Build the layer */
    public abstract Layer build(int inputSize);
}

/**
 * Builder for hidden layers
 */
class HiddenLayerBuilder extends LayerBuilder {
    /** Create hidden layer builder */
    public HiddenLayerBuilder(int neurons, ActivationFunction activation);
    
    /** Set dropout rate */
    public HiddenLayerBuilder dropout(double rate);
    
    /** Set weight initialization range */
    public HiddenLayerBuilder weightInit(double range);
    
    /** Set L1 regularization */
    public HiddenLayerBuilder l1(double lambda);
    
    /** Set L2 regularization */
    public HiddenLayerBuilder l2(double lambda);
    
    /** Build the hidden layer */
    public Layer build(int inputSize);
}

/**
 * Builder for output layers
 */
class OutputLayerBuilder extends LayerBuilder {
    /** Create output layer builder */
    public OutputLayerBuilder(int neurons, OutputFunction function);
    
    /** Create for binary classification */
    public static OutputLayerBuilder binary();
    
    /** Create for multi-class classification */
    public static OutputLayerBuilder multiclass(int classes);
    
    /** Create for regression */
    public static OutputLayerBuilder regression();
    
    /** Build the output layer */
    public Layer build(int inputSize);
}

Activation Functions

Various activation functions for neural network layers.

/**
 * Base activation function interface
 */
interface ActivationFunction extends Serializable {
    /** Apply activation function */
    double apply(double x);
    
    /** Compute derivative of activation function */
    double derivative(double x);
    
    /** Apply to vector (in-place) */
    default void apply(double[] x) {
        for (int i = 0; i < x.length; i++) {
            x[i] = apply(x[i]);
        }
    }
    
    // Static factory methods for common activations
    /** Rectified Linear Unit */
    static ActivationFunction ReLU = new ReLU();
    
    /** Leaky ReLU */
    static ActivationFunction LeakyReLU = new LeakyReLU();
    
    /** Sigmoid function */
    static ActivationFunction Sigmoid = new Sigmoid();
    
    /** Hyperbolic tangent */
    static ActivationFunction Tanh = new Tanh();
    
    /** Linear activation (identity) */
    static ActivationFunction Linear = new Linear();
}

/**
 * Rectified Linear Unit activation
 */
class ReLU implements ActivationFunction {
    /** Apply ReLU: max(0, x) */
    public double apply(double x);
    
    /** ReLU derivative */
    public double derivative(double x);
}

/**
 * Leaky ReLU activation
 */
class LeakyReLU implements ActivationFunction {
    /** Create leaky ReLU with default slope 0.01 */
    public LeakyReLU();
    
    /** Create with custom negative slope */
    public LeakyReLU(double alpha);
    
    /** Apply leaky ReLU */
    public double apply(double x);
    
    /** Leaky ReLU derivative */
    public double derivative(double x);
}

/**
 * Sigmoid activation function
 */
class Sigmoid implements ActivationFunction {
    /** Apply sigmoid: 1 / (1 + exp(-x)) */
    public double apply(double x);
    
    /** Sigmoid derivative */
    public double derivative(double x);
}

/**
 * Hyperbolic tangent activation
 */
class Tanh implements ActivationFunction {
    /** Apply tanh */
    public double apply(double x);
    
    /** Tanh derivative */
    public double derivative(double x);
}

/**
 * Softmax activation for multi-class output
 */
class Softmax implements ActivationFunction {
    /** Apply softmax to vector */
    public void apply(double[] x);
    
    /** Softmax derivative matrix */
    public double[][] derivative(double[] x);
}

Output Functions

Output layer functions for different types of neural network tasks.

/**
 * Output function types for neural networks
 */
enum OutputFunction {
    /** Linear output for regression */
    LINEAR,
    
    /** Sigmoid output for binary classification */
    SIGMOID,
    
    /** Softmax output for multi-class classification */
    SOFTMAX;
    
    /** Apply output function to layer activations */
    public void apply(double[] output);
    
    /** Calculate loss for target values */
    public double loss(double[] output, double[] target);
    
    /** Calculate error gradient */
    public double[] gradient(double[] output, double[] target);
}

Cost Functions

Loss functions for training neural networks.

/**
 * Cost function types for neural network training
 */
enum Cost {
    /** Mean squared error for regression */
    MEAN_SQUARED_ERROR,
    
    /** Cross entropy for classification */
    CROSS_ENTROPY,
    
    /** Sparse cross entropy for large vocabulary */
    SPARSE_CROSS_ENTROPY;
    
    /** Calculate loss value */
    public double loss(double[] output, double[] target);
    
    /** Calculate error gradient */
    public double[] gradient(double[] output, double[] target);
}

Optimizers

Optimization algorithms for training neural networks.

/**
 * Base optimizer interface
 */
interface Optimizer extends Serializable {
    /** Update parameters using gradients */
    void update(double[] parameters, double[] gradients);
    
    /** Update with learning rate */
    void update(double[] parameters, double[] gradients, double learningRate);
    
    /** Reset optimizer state */
    void reset();
}

/**
 * Stochastic Gradient Descent optimizer
 */
class SGD implements Optimizer {
    /** Create SGD with learning rate */
    public SGD(double learningRate);
    
    /** Create SGD with momentum */
    public SGD(double learningRate, double momentum);
    
    /** Update parameters */
    public void update(double[] parameters, double[] gradients);
    
    /** Get learning rate */
    public double learningRate();
    
    /** Set learning rate */
    public void setLearningRate(double rate);
}

/**
 * Adam optimizer with adaptive learning rates
 */
class Adam implements Optimizer {
    /** Create Adam with default parameters */
    public Adam();
    
    /** Create Adam with custom parameters */
    public Adam(double learningRate, double beta1, double beta2, double epsilon);
    
    /** Update parameters */
    public void update(double[] parameters, double[] gradients);
    
    /** Reset momentum estimates */
    public void reset();
}

/**
 * RMSProp optimizer
 */
class RMSProp implements Optimizer {
    /** Create RMSProp with default parameters */
    public RMSProp();
    
    /** Create RMSProp with custom decay rate */
    public RMSProp(double learningRate, double decay);
    
    /** Update parameters */
    public void update(double[] parameters, double[] gradients);
    
    /** Reset accumulated gradients */
    public void reset();
}

MLP Regression

Multi-layer perceptron for regression tasks.

/**
 * MLP for regression tasks
 */
class MLPRegression implements Regression<double[]> {
    /** Train MLP regression with default architecture */
    public static MLPRegression fit(double[][] x, double[] y);
    
    /** Train with custom hidden layers */
    public static MLPRegression fit(double[][] x, double[] y, int[] hiddenLayers);
    
    /** Train with full configuration */
    public static MLPRegression fit(double[][] x, double[] y, Properties params);
    
    /** Predict target value */
    public double predict(double[] x);
    
    /** Online learning update */
    public void update(double[] x, double y);
    
    /** Get training RMSE */
    public double rmse();
    
    /** Get network weights */
    public double[][][] weights();
}

Advanced Neural Network Components

Additional components for building sophisticated neural networks.

/**
 * Dropout layer for regularization
 */
class DropoutLayer extends Layer {
    /** Create dropout layer with specified rate */
    public DropoutLayer(double dropoutRate);
    
    /** Forward pass with dropout (training mode) */
    public void forward(double[] input, boolean training);
    
    /** Set training mode */
    public void setTraining(boolean training);
    
    /** Get dropout rate */
    public double dropoutRate();
}

/**
 * Batch normalization layer
 */
class BatchNormLayer extends Layer {
    /** Create batch normalization layer */
    public BatchNormLayer(int features);
    
    /** Forward pass with batch normalization */
    public void forward(double[] input);
    
    /** Update running statistics */
    public void updateStatistics(double[][] batch);
    
    /** Get learned scale parameters */
    public double[] gamma();
    
    /** Get learned shift parameters */
    public double[] beta();
}

/**
 * Neural network builder for complex architectures
 */
class NetworkBuilder {
    /** Start building network */
    public static NetworkBuilder input(int dimension);
    
    /** Add hidden layer */
    public NetworkBuilder hidden(int neurons, ActivationFunction activation);
    
    /** Add dropout layer */
    public NetworkBuilder dropout(double rate);
    
    /** Add batch normalization */
    public NetworkBuilder batchNorm();
    
    /** Set output layer */
    public NetworkBuilder output(int neurons, OutputFunction function);
    
    /** Build the network */
    public MLP build();
}

Advanced Usage Example:

import smile.base.mlp.*;
import smile.deep.activation.*;
import smile.deep.optimizer.*;

// Build complex neural network
MLP network = NetworkBuilder.input(784) // 28x28 images
    .hidden(512, ActivationFunction.ReLU)
    .dropout(0.5)
    .batchNorm()
    .hidden(256, ActivationFunction.ReLU)
    .dropout(0.3)
    .hidden(128, ActivationFunction.ReLU)
    .output(10, OutputFunction.SOFTMAX) // 10 classes
    .build();

// Custom training loop with Adam optimizer
Adam optimizer = new Adam(0.001, 0.9, 0.999, 1e-8);
int epochs = 100;
int batchSize = 32;

for (int epoch = 0; epoch < epochs; epoch++) {
    // Shuffle training data
    shuffleData(trainX, trainY);
    
    double epochLoss = 0.0;
    for (int i = 0; i < trainX.length; i += batchSize) {
        // Get batch
        double[][] batchX = getBatch(trainX, i, batchSize);
        int[] batchY = getBatch(trainY, i, batchSize);
        
        // Forward and backward pass
        double batchLoss = network.train(batchX, batchY, optimizer);
        epochLoss += batchLoss;
    }
    
    // Validation
    double accuracy = evaluate(network, validX, validY);
    System.out.println("Epoch " + epoch + ", Loss: " + epochLoss + ", Accuracy: " + accuracy);
}

Training Configuration

Common parameters for neural network training:

  • learningRate: Learning rate for gradient descent (default: 0.01)
  • momentum: Momentum factor for SGD (default: 0.0)
  • weightDecay: L2 regularization strength (default: 0.0)
  • epochs: Number of training epochs
  • batchSize: Mini-batch size for training
  • dropout: Dropout rate for regularization
  • earlyStop: Early stopping patience
  • validation: Validation split ratio

Best Practices

Guidelines for effective neural network training:

  1. Data Preprocessing: Normalize inputs to [0,1] or standardize to mean=0, std=1
  2. Architecture: Start simple, add complexity gradually
  3. Activation Functions: Use ReLU for hidden layers, appropriate output function
  4. Regularization: Apply dropout and weight decay to prevent overfitting
  5. Learning Rate: Start with 0.01, adjust based on training dynamics
  6. Batch Size: Use powers of 2 (32, 64, 128) for efficiency
  7. Monitoring: Track both training and validation metrics

Install with Tessl CLI

npx tessl i tessl/maven-com-github-haifengl--smile-core

docs

advanced-analytics.md

classification.md

clustering.md

deep-learning.md

feature-engineering.md

index.md

regression.md

validation-metrics.md

tile.json