0
# Deep Learning
1
2
Neural network components including multi-layer perceptrons, activation functions, optimization algorithms, and neural network building utilities. Smile Core provides foundational deep learning capabilities for classification and regression tasks.
3
4
## Capabilities
5
6
### Multi-Layer Perceptron
7
8
Core neural network implementation with configurable architecture and training algorithms.
9
10
```java { .api }
11
/**
12
* Base multi-layer perceptron for deep learning
13
*/
14
abstract class MultilayerPerceptron implements Classifier<double[]>, Serializable {
15
/** Predict class label */
16
public abstract int predict(double[] x);
17
18
/** Predict with class probabilities */
19
public abstract int predict(double[] x, double[] posteriori);
20
21
/** Online learning update */
22
public abstract void update(double[] x, int y);
23
24
/** Get network architecture */
25
public abstract int[] architecture();
26
27
/** Get activation function for layer */
28
public abstract ActivationFunction activation(int layer);
29
30
/** Get network weights for specific layer */
31
public abstract double[][] getWeights(int layer);
32
33
/** Set learning rate */
34
public abstract void setLearningRate(double rate);
35
}
36
37
/**
38
* MLP for classification tasks
39
*/
40
class MLP implements Classifier<double[]> {
41
/** Train MLP classifier with default architecture */
42
public static MLP fit(double[][] x, int[] y);
43
44
/** Train with custom hidden layer configuration */
45
public static MLP fit(double[][] x, int[] y, int[] hiddenLayers);
46
47
/** Train with full configuration */
48
public static MLP fit(double[][] x, int[] y, Properties params);
49
50
/** Train with builder pattern */
51
public static MLP fit(double[][] x, int[] y, Consumer<MLPBuilder> config);
52
53
/** Predict class label */
54
public int predict(double[] x);
55
56
/** Predict with probabilities */
57
public int predict(double[] x, double[] posteriori);
58
59
/** Online learning update */
60
public void update(double[] x, int y);
61
62
/** Batch training update */
63
public void update(double[][] x, int[] y);
64
65
/** Get training error */
66
public double error();
67
68
/** Get network weights */
69
public double[][][] weights();
70
}
71
```
72
73
**Usage Example:**
74
75
```java
76
import smile.classification.MLP;
77
import smile.base.mlp.*;
78
79
// Basic MLP with default architecture
80
MLP mlp = MLP.fit(trainX, trainY);
81
82
// Custom architecture: input -> 100 -> 50 -> output
83
MLP customMLP = MLP.fit(trainX, trainY, new int[]{100, 50});
84
85
// Advanced configuration
86
MLP advancedMLP = MLP.fit(trainX, trainY, builder -> builder
87
.layer(new HiddenLayerBuilder(100, ActivationFunction.ReLU))
88
.layer(new HiddenLayerBuilder(50, ActivationFunction.ReLU))
89
.outputLayer(OutputFunction.SOFTMAX)
90
.learningRate(0.01)
91
.momentum(0.9)
92
);
93
94
// Make predictions
95
int prediction = mlp.predict(testSample);
96
double[] probabilities = new double[numClasses];
97
int predicted = mlp.predict(testSample, probabilities);
98
```
99
100
### Neural Network Layers
101
102
Building blocks for constructing neural network architectures.
103
104
```java { .api }
105
/**
106
* Base neural network layer
107
*/
108
abstract class Layer implements Serializable {
109
/** Number of neurons in layer */
110
public final int n;
111
112
/** Forward propagation through layer */
113
public abstract void forward(double[] input);
114
115
/** Backward propagation through layer */
116
public abstract void backward(double[] error);
117
118
/** Update layer weights */
119
public abstract void update(double learningRate);
120
121
/** Get layer output */
122
public abstract double[] output();
123
124
/** Get layer weights */
125
public abstract double[][] weights();
126
}
127
128
/**
129
* Input layer for neural networks
130
*/
131
class InputLayer extends Layer {
132
/** Create input layer with specified dimension */
133
public InputLayer(int dimension);
134
135
/** Forward pass (identity) */
136
public void forward(double[] input);
137
138
/** Get output (same as input) */
139
public double[] output();
140
}
141
142
/**
143
* Hidden layer with activation function
144
*/
145
class HiddenLayer extends Layer {
146
/** Create hidden layer */
147
public HiddenLayer(int neurons, ActivationFunction activation);
148
149
/** Create with custom weight initialization */
150
public HiddenLayer(int neurons, ActivationFunction activation, double weightRange);
151
152
/** Forward propagation */
153
public void forward(double[] input);
154
155
/** Backward propagation */
156
public void backward(double[] error);
157
158
/** Update weights using gradient descent */
159
public void update(double learningRate);
160
161
/** Update with momentum */
162
public void update(double learningRate, double momentum);
163
164
/** Get activation function */
165
public ActivationFunction activation();
166
}
167
168
/**
169
* Output layer for final predictions
170
*/
171
class OutputLayer extends Layer {
172
/** Create output layer for classification */
173
public OutputLayer(int classes, OutputFunction function);
174
175
/** Create output layer for regression */
176
public OutputLayer(OutputFunction function);
177
178
/** Forward propagation */
179
public void forward(double[] input);
180
181
/** Backward propagation */
182
public void backward(double[] target);
183
184
/** Calculate loss for training sample */
185
public double loss(double[] target);
186
187
/** Get output function */
188
public OutputFunction outputFunction();
189
}
190
```
191
192
### Layer Builders
193
194
Builder pattern for constructing neural network layers.
195
196
```java { .api }
197
/**
198
* Abstract base for layer builders
199
*/
200
abstract class LayerBuilder {
201
/** Build the layer */
202
public abstract Layer build(int inputSize);
203
}
204
205
/**
206
* Builder for hidden layers
207
*/
208
class HiddenLayerBuilder extends LayerBuilder {
209
/** Create hidden layer builder */
210
public HiddenLayerBuilder(int neurons, ActivationFunction activation);
211
212
/** Set dropout rate */
213
public HiddenLayerBuilder dropout(double rate);
214
215
/** Set weight initialization range */
216
public HiddenLayerBuilder weightInit(double range);
217
218
/** Set L1 regularization */
219
public HiddenLayerBuilder l1(double lambda);
220
221
/** Set L2 regularization */
222
public HiddenLayerBuilder l2(double lambda);
223
224
/** Build the hidden layer */
225
public Layer build(int inputSize);
226
}
227
228
/**
229
* Builder for output layers
230
*/
231
class OutputLayerBuilder extends LayerBuilder {
232
/** Create output layer builder */
233
public OutputLayerBuilder(int neurons, OutputFunction function);
234
235
/** Create for binary classification */
236
public static OutputLayerBuilder binary();
237
238
/** Create for multi-class classification */
239
public static OutputLayerBuilder multiclass(int classes);
240
241
/** Create for regression */
242
public static OutputLayerBuilder regression();
243
244
/** Build the output layer */
245
public Layer build(int inputSize);
246
}
247
```
248
249
### Activation Functions
250
251
Various activation functions for neural network layers.
252
253
```java { .api }
254
/**
255
* Base activation function interface
256
*/
257
interface ActivationFunction extends Serializable {
258
/** Apply activation function */
259
double apply(double x);
260
261
/** Compute derivative of activation function */
262
double derivative(double x);
263
264
/** Apply to vector (in-place) */
265
default void apply(double[] x) {
266
for (int i = 0; i < x.length; i++) {
267
x[i] = apply(x[i]);
268
}
269
}
270
271
// Static factory methods for common activations
272
/** Rectified Linear Unit */
273
static ActivationFunction ReLU = new ReLU();
274
275
/** Leaky ReLU */
276
static ActivationFunction LeakyReLU = new LeakyReLU();
277
278
/** Sigmoid function */
279
static ActivationFunction Sigmoid = new Sigmoid();
280
281
/** Hyperbolic tangent */
282
static ActivationFunction Tanh = new Tanh();
283
284
/** Linear activation (identity) */
285
static ActivationFunction Linear = new Linear();
286
}
287
288
/**
289
* Rectified Linear Unit activation
290
*/
291
class ReLU implements ActivationFunction {
292
/** Apply ReLU: max(0, x) */
293
public double apply(double x);
294
295
/** ReLU derivative */
296
public double derivative(double x);
297
}
298
299
/**
300
* Leaky ReLU activation
301
*/
302
class LeakyReLU implements ActivationFunction {
303
/** Create leaky ReLU with default slope 0.01 */
304
public LeakyReLU();
305
306
/** Create with custom negative slope */
307
public LeakyReLU(double alpha);
308
309
/** Apply leaky ReLU */
310
public double apply(double x);
311
312
/** Leaky ReLU derivative */
313
public double derivative(double x);
314
}
315
316
/**
317
* Sigmoid activation function
318
*/
319
class Sigmoid implements ActivationFunction {
320
/** Apply sigmoid: 1 / (1 + exp(-x)) */
321
public double apply(double x);
322
323
/** Sigmoid derivative */
324
public double derivative(double x);
325
}
326
327
/**
328
* Hyperbolic tangent activation
329
*/
330
class Tanh implements ActivationFunction {
331
/** Apply tanh */
332
public double apply(double x);
333
334
/** Tanh derivative */
335
public double derivative(double x);
336
}
337
338
/**
339
* Softmax activation for multi-class output
340
*/
341
class Softmax implements ActivationFunction {
342
/** Apply softmax to vector */
343
public void apply(double[] x);
344
345
/** Softmax derivative matrix */
346
public double[][] derivative(double[] x);
347
}
348
```
349
350
### Output Functions
351
352
Output layer functions for different types of neural network tasks.
353
354
```java { .api }
355
/**
356
* Output function types for neural networks
357
*/
358
enum OutputFunction {
359
/** Linear output for regression */
360
LINEAR,
361
362
/** Sigmoid output for binary classification */
363
SIGMOID,
364
365
/** Softmax output for multi-class classification */
366
SOFTMAX;
367
368
/** Apply output function to layer activations */
369
public void apply(double[] output);
370
371
/** Calculate loss for target values */
372
public double loss(double[] output, double[] target);
373
374
/** Calculate error gradient */
375
public double[] gradient(double[] output, double[] target);
376
}
377
```
378
379
### Cost Functions
380
381
Loss functions for training neural networks.
382
383
```java { .api }
384
/**
385
* Cost function types for neural network training
386
*/
387
enum Cost {
388
/** Mean squared error for regression */
389
MEAN_SQUARED_ERROR,
390
391
/** Cross entropy for classification */
392
CROSS_ENTROPY,
393
394
/** Sparse cross entropy for large vocabulary */
395
SPARSE_CROSS_ENTROPY;
396
397
/** Calculate loss value */
398
public double loss(double[] output, double[] target);
399
400
/** Calculate error gradient */
401
public double[] gradient(double[] output, double[] target);
402
}
403
```
404
405
### Optimizers
406
407
Optimization algorithms for training neural networks.
408
409
```java { .api }
410
/**
411
* Base optimizer interface
412
*/
413
interface Optimizer extends Serializable {
414
/** Update parameters using gradients */
415
void update(double[] parameters, double[] gradients);
416
417
/** Update with learning rate */
418
void update(double[] parameters, double[] gradients, double learningRate);
419
420
/** Reset optimizer state */
421
void reset();
422
}
423
424
/**
425
* Stochastic Gradient Descent optimizer
426
*/
427
class SGD implements Optimizer {
428
/** Create SGD with learning rate */
429
public SGD(double learningRate);
430
431
/** Create SGD with momentum */
432
public SGD(double learningRate, double momentum);
433
434
/** Update parameters */
435
public void update(double[] parameters, double[] gradients);
436
437
/** Get learning rate */
438
public double learningRate();
439
440
/** Set learning rate */
441
public void setLearningRate(double rate);
442
}
443
444
/**
445
* Adam optimizer with adaptive learning rates
446
*/
447
class Adam implements Optimizer {
448
/** Create Adam with default parameters */
449
public Adam();
450
451
/** Create Adam with custom parameters */
452
public Adam(double learningRate, double beta1, double beta2, double epsilon);
453
454
/** Update parameters */
455
public void update(double[] parameters, double[] gradients);
456
457
/** Reset momentum estimates */
458
public void reset();
459
}
460
461
/**
462
* RMSProp optimizer
463
*/
464
class RMSProp implements Optimizer {
465
/** Create RMSProp with default parameters */
466
public RMSProp();
467
468
/** Create RMSProp with custom decay rate */
469
public RMSProp(double learningRate, double decay);
470
471
/** Update parameters */
472
public void update(double[] parameters, double[] gradients);
473
474
/** Reset accumulated gradients */
475
public void reset();
476
}
477
```
478
479
### MLP Regression
480
481
Multi-layer perceptron for regression tasks.
482
483
```java { .api }
484
/**
485
* MLP for regression tasks
486
*/
487
class MLPRegression implements Regression<double[]> {
488
/** Train MLP regression with default architecture */
489
public static MLPRegression fit(double[][] x, double[] y);
490
491
/** Train with custom hidden layers */
492
public static MLPRegression fit(double[][] x, double[] y, int[] hiddenLayers);
493
494
/** Train with full configuration */
495
public static MLPRegression fit(double[][] x, double[] y, Properties params);
496
497
/** Predict target value */
498
public double predict(double[] x);
499
500
/** Online learning update */
501
public void update(double[] x, double y);
502
503
/** Get training RMSE */
504
public double rmse();
505
506
/** Get network weights */
507
public double[][][] weights();
508
}
509
```
510
511
### Advanced Neural Network Components
512
513
Additional components for building sophisticated neural networks.
514
515
```java { .api }
516
/**
517
* Dropout layer for regularization
518
*/
519
class DropoutLayer extends Layer {
520
/** Create dropout layer with specified rate */
521
public DropoutLayer(double dropoutRate);
522
523
/** Forward pass with dropout (training mode) */
524
public void forward(double[] input, boolean training);
525
526
/** Set training mode */
527
public void setTraining(boolean training);
528
529
/** Get dropout rate */
530
public double dropoutRate();
531
}
532
533
/**
534
* Batch normalization layer
535
*/
536
class BatchNormLayer extends Layer {
537
/** Create batch normalization layer */
538
public BatchNormLayer(int features);
539
540
/** Forward pass with batch normalization */
541
public void forward(double[] input);
542
543
/** Update running statistics */
544
public void updateStatistics(double[][] batch);
545
546
/** Get learned scale parameters */
547
public double[] gamma();
548
549
/** Get learned shift parameters */
550
public double[] beta();
551
}
552
553
/**
554
* Neural network builder for complex architectures
555
*/
556
class NetworkBuilder {
557
/** Start building network */
558
public static NetworkBuilder input(int dimension);
559
560
/** Add hidden layer */
561
public NetworkBuilder hidden(int neurons, ActivationFunction activation);
562
563
/** Add dropout layer */
564
public NetworkBuilder dropout(double rate);
565
566
/** Add batch normalization */
567
public NetworkBuilder batchNorm();
568
569
/** Set output layer */
570
public NetworkBuilder output(int neurons, OutputFunction function);
571
572
/** Build the network */
573
public MLP build();
574
}
575
```
576
577
**Advanced Usage Example:**
578
579
```java
580
import smile.base.mlp.*;
581
import smile.deep.activation.*;
582
import smile.deep.optimizer.*;
583
584
// Build complex neural network
585
MLP network = NetworkBuilder.input(784) // 28x28 images
586
.hidden(512, ActivationFunction.ReLU)
587
.dropout(0.5)
588
.batchNorm()
589
.hidden(256, ActivationFunction.ReLU)
590
.dropout(0.3)
591
.hidden(128, ActivationFunction.ReLU)
592
.output(10, OutputFunction.SOFTMAX) // 10 classes
593
.build();
594
595
// Custom training loop with Adam optimizer
596
Adam optimizer = new Adam(0.001, 0.9, 0.999, 1e-8);
597
int epochs = 100;
598
int batchSize = 32;
599
600
for (int epoch = 0; epoch < epochs; epoch++) {
601
// Shuffle training data
602
shuffleData(trainX, trainY);
603
604
double epochLoss = 0.0;
605
for (int i = 0; i < trainX.length; i += batchSize) {
606
// Get batch
607
double[][] batchX = getBatch(trainX, i, batchSize);
608
int[] batchY = getBatch(trainY, i, batchSize);
609
610
// Forward and backward pass
611
double batchLoss = network.train(batchX, batchY, optimizer);
612
epochLoss += batchLoss;
613
}
614
615
// Validation
616
double accuracy = evaluate(network, validX, validY);
617
System.out.println("Epoch " + epoch + ", Loss: " + epochLoss + ", Accuracy: " + accuracy);
618
}
619
```
620
621
### Training Configuration
622
623
Common parameters for neural network training:
624
625
- **learningRate**: Learning rate for gradient descent (default: 0.01)
626
- **momentum**: Momentum factor for SGD (default: 0.0)
627
- **weightDecay**: L2 regularization strength (default: 0.0)
628
- **epochs**: Number of training epochs
629
- **batchSize**: Mini-batch size for training
630
- **dropout**: Dropout rate for regularization
631
- **earlyStop**: Early stopping patience
632
- **validation**: Validation split ratio
633
634
### Best Practices
635
636
Guidelines for effective neural network training:
637
638
1. **Data Preprocessing**: Normalize inputs to [0,1] or standardize to mean=0, std=1
639
2. **Architecture**: Start simple, add complexity gradually
640
3. **Activation Functions**: Use ReLU for hidden layers, appropriate output function
641
4. **Regularization**: Apply dropout and weight decay to prevent overfitting
642
5. **Learning Rate**: Start with 0.01, adjust based on training dynamics
643
6. **Batch Size**: Use powers of 2 (32, 64, 128) for efficiency
644
7. **Monitoring**: Track both training and validation metrics