Tessl Tile for maven/com.github.haifengl/smile-core@3.1.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

advanced-analytics.md classification.md clustering.md deep-learning.md feature-engineering.md index.md regression.md validation-metrics.md

deep-learning.mddocs/

0
# Deep Learning
1

2
Neural network components including multi-layer perceptrons, activation functions, optimization algorithms, and neural network building utilities. Smile Core provides foundational deep learning capabilities for classification and regression tasks.
3

4
## Capabilities
5

6
### Multi-Layer Perceptron
7

8
Core neural network implementation with configurable architecture and training algorithms.
9

10
```java { .api }
11
/**
12
 * Base multi-layer perceptron for deep learning
13
 */
14
abstract class MultilayerPerceptron implements Classifier<double[]>, Serializable {
15
    /** Predict class label */
16
    public abstract int predict(double[] x);
17
    
18
    /** Predict with class probabilities */
19
    public abstract int predict(double[] x, double[] posteriori);
20
    
21
    /** Online learning update */
22
    public abstract void update(double[] x, int y);
23
    
24
    /** Get network architecture */
25
    public abstract int[] architecture();
26
    
27
    /** Get activation function for layer */
28
    public abstract ActivationFunction activation(int layer);
29
    
30
    /** Get network weights for specific layer */
31
    public abstract double[][] getWeights(int layer);
32
    
33
    /** Set learning rate */
34
    public abstract void setLearningRate(double rate);
35
}
36

37
/**
38
 * MLP for classification tasks
39
 */
40
class MLP implements Classifier<double[]> {
41
    /** Train MLP classifier with default architecture */
42
    public static MLP fit(double[][] x, int[] y);
43
    
44
    /** Train with custom hidden layer configuration */
45
    public static MLP fit(double[][] x, int[] y, int[] hiddenLayers);
46
    
47
    /** Train with full configuration */
48
    public static MLP fit(double[][] x, int[] y, Properties params);
49
    
50
    /** Train with builder pattern */
51
    public static MLP fit(double[][] x, int[] y, Consumer<MLPBuilder> config);
52
    
53
    /** Predict class label */
54
    public int predict(double[] x);
55
    
56
    /** Predict with probabilities */
57
    public int predict(double[] x, double[] posteriori);
58
    
59
    /** Online learning update */
60
    public void update(double[] x, int y);
61
    
62
    /** Batch training update */
63
    public void update(double[][] x, int[] y);
64
    
65
    /** Get training error */
66
    public double error();
67
    
68
    /** Get network weights */
69
    public double[][][] weights();
70
}
71
```
72

73
**Usage Example:**
74

75
```java
76
import smile.classification.MLP;
77
import smile.base.mlp.*;
78

79
// Basic MLP with default architecture
80
MLP mlp = MLP.fit(trainX, trainY);
81

82
// Custom architecture: input -> 100 -> 50 -> output
83
MLP customMLP = MLP.fit(trainX, trainY, new int[]{100, 50});
84

85
// Advanced configuration
86
MLP advancedMLP = MLP.fit(trainX, trainY, builder -> builder
87
    .layer(new HiddenLayerBuilder(100, ActivationFunction.ReLU))
88
    .layer(new HiddenLayerBuilder(50, ActivationFunction.ReLU))
89
    .outputLayer(OutputFunction.SOFTMAX)
90
    .learningRate(0.01)
91
    .momentum(0.9)
92
);
93

94
// Make predictions
95
int prediction = mlp.predict(testSample);
96
double[] probabilities = new double[numClasses];
97
int predicted = mlp.predict(testSample, probabilities);
98
```
99

100
### Neural Network Layers
101

102
Building blocks for constructing neural network architectures.
103

104
```java { .api }
105
/**
106
 * Base neural network layer
107
 */
108
abstract class Layer implements Serializable {
109
    /** Number of neurons in layer */
110
    public final int n;
111
    
112
    /** Forward propagation through layer */
113
    public abstract void forward(double[] input);
114
    
115
    /** Backward propagation through layer */
116
    public abstract void backward(double[] error);
117
    
118
    /** Update layer weights */
119
    public abstract void update(double learningRate);
120
    
121
    /** Get layer output */
122
    public abstract double[] output();
123
    
124
    /** Get layer weights */
125
    public abstract double[][] weights();
126
}
127

128
/**
129
 * Input layer for neural networks
130
 */
131
class InputLayer extends Layer {
132
    /** Create input layer with specified dimension */
133
    public InputLayer(int dimension);
134
    
135
    /** Forward pass (identity) */
136
    public void forward(double[] input);
137
    
138
    /** Get output (same as input) */
139
    public double[] output();
140
}
141

142
/**
143
 * Hidden layer with activation function
144
 */
145
class HiddenLayer extends Layer {
146
    /** Create hidden layer */
147
    public HiddenLayer(int neurons, ActivationFunction activation);
148
    
149
    /** Create with custom weight initialization */
150
    public HiddenLayer(int neurons, ActivationFunction activation, double weightRange);
151
    
152
    /** Forward propagation */
153
    public void forward(double[] input);
154
    
155
    /** Backward propagation */
156
    public void backward(double[] error);
157
    
158
    /** Update weights using gradient descent */
159
    public void update(double learningRate);
160
    
161
    /** Update with momentum */
162
    public void update(double learningRate, double momentum);
163
    
164
    /** Get activation function */
165
    public ActivationFunction activation();
166
}
167

168
/**
169
 * Output layer for final predictions
170
 */
171
class OutputLayer extends Layer {
172
    /** Create output layer for classification */
173
    public OutputLayer(int classes, OutputFunction function);
174
    
175
    /** Create output layer for regression */
176
    public OutputLayer(OutputFunction function);
177
    
178
    /** Forward propagation */
179
    public void forward(double[] input);
180
    
181
    /** Backward propagation */
182
    public void backward(double[] target);
183
    
184
    /** Calculate loss for training sample */
185
    public double loss(double[] target);
186
    
187
    /** Get output function */
188
    public OutputFunction outputFunction();
189
}
190
```
191

192
### Layer Builders
193

194
Builder pattern for constructing neural network layers.
195

196
```java { .api }
197
/**
198
 * Abstract base for layer builders
199
 */
200
abstract class LayerBuilder {
201
    /** Build the layer */
202
    public abstract Layer build(int inputSize);
203
}
204

205
/**
206
 * Builder for hidden layers
207
 */
208
class HiddenLayerBuilder extends LayerBuilder {
209
    /** Create hidden layer builder */
210
    public HiddenLayerBuilder(int neurons, ActivationFunction activation);
211
    
212
    /** Set dropout rate */
213
    public HiddenLayerBuilder dropout(double rate);
214
    
215
    /** Set weight initialization range */
216
    public HiddenLayerBuilder weightInit(double range);
217
    
218
    /** Set L1 regularization */
219
    public HiddenLayerBuilder l1(double lambda);
220
    
221
    /** Set L2 regularization */
222
    public HiddenLayerBuilder l2(double lambda);
223
    
224
    /** Build the hidden layer */
225
    public Layer build(int inputSize);
226
}
227

228
/**
229
 * Builder for output layers
230
 */
231
class OutputLayerBuilder extends LayerBuilder {
232
    /** Create output layer builder */
233
    public OutputLayerBuilder(int neurons, OutputFunction function);
234
    
235
    /** Create for binary classification */
236
    public static OutputLayerBuilder binary();
237
    
238
    /** Create for multi-class classification */
239
    public static OutputLayerBuilder multiclass(int classes);
240
    
241
    /** Create for regression */
242
    public static OutputLayerBuilder regression();
243
    
244
    /** Build the output layer */
245
    public Layer build(int inputSize);
246
}
247
```
248

249
### Activation Functions
250

251
Various activation functions for neural network layers.
252

253
```java { .api }
254
/**
255
 * Base activation function interface
256
 */
257
interface ActivationFunction extends Serializable {
258
    /** Apply activation function */
259
    double apply(double x);
260
    
261
    /** Compute derivative of activation function */
262
    double derivative(double x);
263
    
264
    /** Apply to vector (in-place) */
265
    default void apply(double[] x) {
266
        for (int i = 0; i < x.length; i++) {
267
            x[i] = apply(x[i]);
268
        }
269
    }
270
    
271
    // Static factory methods for common activations
272
    /** Rectified Linear Unit */
273
    static ActivationFunction ReLU = new ReLU();
274
    
275
    /** Leaky ReLU */
276
    static ActivationFunction LeakyReLU = new LeakyReLU();
277
    
278
    /** Sigmoid function */
279
    static ActivationFunction Sigmoid = new Sigmoid();
280
    
281
    /** Hyperbolic tangent */
282
    static ActivationFunction Tanh = new Tanh();
283
    
284
    /** Linear activation (identity) */
285
    static ActivationFunction Linear = new Linear();
286
}
287

288
/**
289
 * Rectified Linear Unit activation
290
 */
291
class ReLU implements ActivationFunction {
292
    /** Apply ReLU: max(0, x) */
293
    public double apply(double x);
294
    
295
    /** ReLU derivative */
296
    public double derivative(double x);
297
}
298

299
/**
300
 * Leaky ReLU activation
301
 */
302
class LeakyReLU implements ActivationFunction {
303
    /** Create leaky ReLU with default slope 0.01 */
304
    public LeakyReLU();
305
    
306
    /** Create with custom negative slope */
307
    public LeakyReLU(double alpha);
308
    
309
    /** Apply leaky ReLU */
310
    public double apply(double x);
311
    
312
    /** Leaky ReLU derivative */
313
    public double derivative(double x);
314
}
315

316
/**
317
 * Sigmoid activation function
318
 */
319
class Sigmoid implements ActivationFunction {
320
    /** Apply sigmoid: 1 / (1 + exp(-x)) */
321
    public double apply(double x);
322
    
323
    /** Sigmoid derivative */
324
    public double derivative(double x);
325
}
326

327
/**
328
 * Hyperbolic tangent activation
329
 */
330
class Tanh implements ActivationFunction {
331
    /** Apply tanh */
332
    public double apply(double x);
333
    
334
    /** Tanh derivative */
335
    public double derivative(double x);
336
}
337

338
/**
339
 * Softmax activation for multi-class output
340
 */
341
class Softmax implements ActivationFunction {
342
    /** Apply softmax to vector */
343
    public void apply(double[] x);
344
    
345
    /** Softmax derivative matrix */
346
    public double[][] derivative(double[] x);
347
}
348
```
349

350
### Output Functions
351

352
Output layer functions for different types of neural network tasks.
353

354
```java { .api }
355
/**
356
 * Output function types for neural networks
357
 */
358
enum OutputFunction {
359
    /** Linear output for regression */
360
    LINEAR,
361
    
362
    /** Sigmoid output for binary classification */
363
    SIGMOID,
364
    
365
    /** Softmax output for multi-class classification */
366
    SOFTMAX;
367
    
368
    /** Apply output function to layer activations */
369
    public void apply(double[] output);
370
    
371
    /** Calculate loss for target values */
372
    public double loss(double[] output, double[] target);
373
    
374
    /** Calculate error gradient */
375
    public double[] gradient(double[] output, double[] target);
376
}
377
```
378

379
### Cost Functions
380

381
Loss functions for training neural networks.
382

383
```java { .api }
384
/**
385
 * Cost function types for neural network training
386
 */
387
enum Cost {
388
    /** Mean squared error for regression */
389
    MEAN_SQUARED_ERROR,
390
    
391
    /** Cross entropy for classification */
392
    CROSS_ENTROPY,
393
    
394
    /** Sparse cross entropy for large vocabulary */
395
    SPARSE_CROSS_ENTROPY;
396
    
397
    /** Calculate loss value */
398
    public double loss(double[] output, double[] target);
399
    
400
    /** Calculate error gradient */
401
    public double[] gradient(double[] output, double[] target);
402
}
403
```
404

405
### Optimizers
406

407
Optimization algorithms for training neural networks.
408

409
```java { .api }
410
/**
411
 * Base optimizer interface
412
 */
413
interface Optimizer extends Serializable {
414
    /** Update parameters using gradients */
415
    void update(double[] parameters, double[] gradients);
416
    
417
    /** Update with learning rate */
418
    void update(double[] parameters, double[] gradients, double learningRate);
419
    
420
    /** Reset optimizer state */
421
    void reset();
422
}
423

424
/**
425
 * Stochastic Gradient Descent optimizer
426
 */
427
class SGD implements Optimizer {
428
    /** Create SGD with learning rate */
429
    public SGD(double learningRate);
430
    
431
    /** Create SGD with momentum */
432
    public SGD(double learningRate, double momentum);
433
    
434
    /** Update parameters */
435
    public void update(double[] parameters, double[] gradients);
436
    
437
    /** Get learning rate */
438
    public double learningRate();
439
    
440
    /** Set learning rate */
441
    public void setLearningRate(double rate);
442
}
443

444
/**
445
 * Adam optimizer with adaptive learning rates
446
 */
447
class Adam implements Optimizer {
448
    /** Create Adam with default parameters */
449
    public Adam();
450
    
451
    /** Create Adam with custom parameters */
452
    public Adam(double learningRate, double beta1, double beta2, double epsilon);
453
    
454
    /** Update parameters */
455
    public void update(double[] parameters, double[] gradients);
456
    
457
    /** Reset momentum estimates */
458
    public void reset();
459
}
460

461
/**
462
 * RMSProp optimizer
463
 */
464
class RMSProp implements Optimizer {
465
    /** Create RMSProp with default parameters */
466
    public RMSProp();
467
    
468
    /** Create RMSProp with custom decay rate */
469
    public RMSProp(double learningRate, double decay);
470
    
471
    /** Update parameters */
472
    public void update(double[] parameters, double[] gradients);
473
    
474
    /** Reset accumulated gradients */
475
    public void reset();
476
}
477
```
478

479
### MLP Regression
480

481
Multi-layer perceptron for regression tasks.
482

483
```java { .api }
484
/**
485
 * MLP for regression tasks
486
 */
487
class MLPRegression implements Regression<double[]> {
488
    /** Train MLP regression with default architecture */
489
    public static MLPRegression fit(double[][] x, double[] y);
490
    
491
    /** Train with custom hidden layers */
492
    public static MLPRegression fit(double[][] x, double[] y, int[] hiddenLayers);
493
    
494
    /** Train with full configuration */
495
    public static MLPRegression fit(double[][] x, double[] y, Properties params);
496
    
497
    /** Predict target value */
498
    public double predict(double[] x);
499
    
500
    /** Online learning update */
501
    public void update(double[] x, double y);
502
    
503
    /** Get training RMSE */
504
    public double rmse();
505
    
506
    /** Get network weights */
507
    public double[][][] weights();
508
}
509
```
510

511
### Advanced Neural Network Components
512

513
Additional components for building sophisticated neural networks.
514

515
```java { .api }
516
/**
517
 * Dropout layer for regularization
518
 */
519
class DropoutLayer extends Layer {
520
    /** Create dropout layer with specified rate */
521
    public DropoutLayer(double dropoutRate);
522
    
523
    /** Forward pass with dropout (training mode) */
524
    public void forward(double[] input, boolean training);
525
    
526
    /** Set training mode */
527
    public void setTraining(boolean training);
528
    
529
    /** Get dropout rate */
530
    public double dropoutRate();
531
}
532

533
/**
534
 * Batch normalization layer
535
 */
536
class BatchNormLayer extends Layer {
537
    /** Create batch normalization layer */
538
    public BatchNormLayer(int features);
539
    
540
    /** Forward pass with batch normalization */
541
    public void forward(double[] input);
542
    
543
    /** Update running statistics */
544
    public void updateStatistics(double[][] batch);
545
    
546
    /** Get learned scale parameters */
547
    public double[] gamma();
548
    
549
    /** Get learned shift parameters */
550
    public double[] beta();
551
}
552

553
/**
554
 * Neural network builder for complex architectures
555
 */
556
class NetworkBuilder {
557
    /** Start building network */
558
    public static NetworkBuilder input(int dimension);
559
    
560
    /** Add hidden layer */
561
    public NetworkBuilder hidden(int neurons, ActivationFunction activation);
562
    
563
    /** Add dropout layer */
564
    public NetworkBuilder dropout(double rate);
565
    
566
    /** Add batch normalization */
567
    public NetworkBuilder batchNorm();
568
    
569
    /** Set output layer */
570
    public NetworkBuilder output(int neurons, OutputFunction function);
571
    
572
    /** Build the network */
573
    public MLP build();
574
}
575
```
576

577
**Advanced Usage Example:**
578

579
```java
580
import smile.base.mlp.*;
581
import smile.deep.activation.*;
582
import smile.deep.optimizer.*;
583

584
// Build complex neural network
585
MLP network = NetworkBuilder.input(784) // 28x28 images
586
    .hidden(512, ActivationFunction.ReLU)
587
    .dropout(0.5)
588
    .batchNorm()
589
    .hidden(256, ActivationFunction.ReLU)
590
    .dropout(0.3)
591
    .hidden(128, ActivationFunction.ReLU)
592
    .output(10, OutputFunction.SOFTMAX) // 10 classes
593
    .build();
594

595
// Custom training loop with Adam optimizer
596
Adam optimizer = new Adam(0.001, 0.9, 0.999, 1e-8);
597
int epochs = 100;
598
int batchSize = 32;
599

600
for (int epoch = 0; epoch < epochs; epoch++) {
601
    // Shuffle training data
602
    shuffleData(trainX, trainY);
603
    
604
    double epochLoss = 0.0;
605
    for (int i = 0; i < trainX.length; i += batchSize) {
606
        // Get batch
607
        double[][] batchX = getBatch(trainX, i, batchSize);
608
        int[] batchY = getBatch(trainY, i, batchSize);
609
        
610
        // Forward and backward pass
611
        double batchLoss = network.train(batchX, batchY, optimizer);
612
        epochLoss += batchLoss;
613
    }
614
    
615
    // Validation
616
    double accuracy = evaluate(network, validX, validY);
617
    System.out.println("Epoch " + epoch + ", Loss: " + epochLoss + ", Accuracy: " + accuracy);
618
}
619
```
620

621
### Training Configuration
622

623
Common parameters for neural network training:
624

625
- **learningRate**: Learning rate for gradient descent (default: 0.01)
626
- **momentum**: Momentum factor for SGD (default: 0.0)
627
- **weightDecay**: L2 regularization strength (default: 0.0)
628
- **epochs**: Number of training epochs
629
- **batchSize**: Mini-batch size for training
630
- **dropout**: Dropout rate for regularization
631
- **earlyStop**: Early stopping patience
632
- **validation**: Validation split ratio
633

634
### Best Practices
635

636
Guidelines for effective neural network training:
637

638
1. **Data Preprocessing**: Normalize inputs to [0,1] or standardize to mean=0, std=1
639
2. **Architecture**: Start simple, add complexity gradually
640
3. **Activation Functions**: Use ReLU for hidden layers, appropriate output function
641
4. **Regularization**: Apply dropout and weight decay to prevent overfitting
642
5. **Learning Rate**: Start with 0.01, adjust based on training dynamics
643
6. **Batch Size**: Use powers of 2 (32, 64, 128) for efficiency
644
7. **Monitoring**: Track both training and validation metrics

Version

Tile

Files

deep-learning.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

deep-learning.mddocs/