or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-analytics.mdclassification.mdclustering.mddeep-learning.mdfeature-engineering.mdindex.mdregression.mdvalidation-metrics.md

deep-learning.mddocs/

0

# Deep Learning

1

2

Neural network components including multi-layer perceptrons, activation functions, optimization algorithms, and neural network building utilities. Smile Core provides foundational deep learning capabilities for classification and regression tasks.

3

4

## Capabilities

5

6

### Multi-Layer Perceptron

7

8

Core neural network implementation with configurable architecture and training algorithms.

9

10

```java { .api }

11

/**

12

* Base multi-layer perceptron for deep learning

13

*/

14

abstract class MultilayerPerceptron implements Classifier<double[]>, Serializable {

15

/** Predict class label */

16

public abstract int predict(double[] x);

17

18

/** Predict with class probabilities */

19

public abstract int predict(double[] x, double[] posteriori);

20

21

/** Online learning update */

22

public abstract void update(double[] x, int y);

23

24

/** Get network architecture */

25

public abstract int[] architecture();

26

27

/** Get activation function for layer */

28

public abstract ActivationFunction activation(int layer);

29

30

/** Get network weights for specific layer */

31

public abstract double[][] getWeights(int layer);

32

33

/** Set learning rate */

34

public abstract void setLearningRate(double rate);

35

}

36

37

/**

38

* MLP for classification tasks

39

*/

40

class MLP implements Classifier<double[]> {

41

/** Train MLP classifier with default architecture */

42

public static MLP fit(double[][] x, int[] y);

43

44

/** Train with custom hidden layer configuration */

45

public static MLP fit(double[][] x, int[] y, int[] hiddenLayers);

46

47

/** Train with full configuration */

48

public static MLP fit(double[][] x, int[] y, Properties params);

49

50

/** Train with builder pattern */

51

public static MLP fit(double[][] x, int[] y, Consumer<MLPBuilder> config);

52

53

/** Predict class label */

54

public int predict(double[] x);

55

56

/** Predict with probabilities */

57

public int predict(double[] x, double[] posteriori);

58

59

/** Online learning update */

60

public void update(double[] x, int y);

61

62

/** Batch training update */

63

public void update(double[][] x, int[] y);

64

65

/** Get training error */

66

public double error();

67

68

/** Get network weights */

69

public double[][][] weights();

70

}

71

```

72

73

**Usage Example:**

74

75

```java

76

import smile.classification.MLP;

77

import smile.base.mlp.*;

78

79

// Basic MLP with default architecture

80

MLP mlp = MLP.fit(trainX, trainY);

81

82

// Custom architecture: input -> 100 -> 50 -> output

83

MLP customMLP = MLP.fit(trainX, trainY, new int[]{100, 50});

84

85

// Advanced configuration

86

MLP advancedMLP = MLP.fit(trainX, trainY, builder -> builder

87

.layer(new HiddenLayerBuilder(100, ActivationFunction.ReLU))

88

.layer(new HiddenLayerBuilder(50, ActivationFunction.ReLU))

89

.outputLayer(OutputFunction.SOFTMAX)

90

.learningRate(0.01)

91

.momentum(0.9)

92

);

93

94

// Make predictions

95

int prediction = mlp.predict(testSample);

96

double[] probabilities = new double[numClasses];

97

int predicted = mlp.predict(testSample, probabilities);

98

```

99

100

### Neural Network Layers

101

102

Building blocks for constructing neural network architectures.

103

104

```java { .api }

105

/**

106

* Base neural network layer

107

*/

108

abstract class Layer implements Serializable {

109

/** Number of neurons in layer */

110

public final int n;

111

112

/** Forward propagation through layer */

113

public abstract void forward(double[] input);

114

115

/** Backward propagation through layer */

116

public abstract void backward(double[] error);

117

118

/** Update layer weights */

119

public abstract void update(double learningRate);

120

121

/** Get layer output */

122

public abstract double[] output();

123

124

/** Get layer weights */

125

public abstract double[][] weights();

126

}

127

128

/**

129

* Input layer for neural networks

130

*/

131

class InputLayer extends Layer {

132

/** Create input layer with specified dimension */

133

public InputLayer(int dimension);

134

135

/** Forward pass (identity) */

136

public void forward(double[] input);

137

138

/** Get output (same as input) */

139

public double[] output();

140

}

141

142

/**

143

* Hidden layer with activation function

144

*/

145

class HiddenLayer extends Layer {

146

/** Create hidden layer */

147

public HiddenLayer(int neurons, ActivationFunction activation);

148

149

/** Create with custom weight initialization */

150

public HiddenLayer(int neurons, ActivationFunction activation, double weightRange);

151

152

/** Forward propagation */

153

public void forward(double[] input);

154

155

/** Backward propagation */

156

public void backward(double[] error);

157

158

/** Update weights using gradient descent */

159

public void update(double learningRate);

160

161

/** Update with momentum */

162

public void update(double learningRate, double momentum);

163

164

/** Get activation function */

165

public ActivationFunction activation();

166

}

167

168

/**

169

* Output layer for final predictions

170

*/

171

class OutputLayer extends Layer {

172

/** Create output layer for classification */

173

public OutputLayer(int classes, OutputFunction function);

174

175

/** Create output layer for regression */

176

public OutputLayer(OutputFunction function);

177

178

/** Forward propagation */

179

public void forward(double[] input);

180

181

/** Backward propagation */

182

public void backward(double[] target);

183

184

/** Calculate loss for training sample */

185

public double loss(double[] target);

186

187

/** Get output function */

188

public OutputFunction outputFunction();

189

}

190

```

191

192

### Layer Builders

193

194

Builder pattern for constructing neural network layers.

195

196

```java { .api }

197

/**

198

* Abstract base for layer builders

199

*/

200

abstract class LayerBuilder {

201

/** Build the layer */

202

public abstract Layer build(int inputSize);

203

}

204

205

/**

206

* Builder for hidden layers

207

*/

208

class HiddenLayerBuilder extends LayerBuilder {

209

/** Create hidden layer builder */

210

public HiddenLayerBuilder(int neurons, ActivationFunction activation);

211

212

/** Set dropout rate */

213

public HiddenLayerBuilder dropout(double rate);

214

215

/** Set weight initialization range */

216

public HiddenLayerBuilder weightInit(double range);

217

218

/** Set L1 regularization */

219

public HiddenLayerBuilder l1(double lambda);

220

221

/** Set L2 regularization */

222

public HiddenLayerBuilder l2(double lambda);

223

224

/** Build the hidden layer */

225

public Layer build(int inputSize);

226

}

227

228

/**

229

* Builder for output layers

230

*/

231

class OutputLayerBuilder extends LayerBuilder {

232

/** Create output layer builder */

233

public OutputLayerBuilder(int neurons, OutputFunction function);

234

235

/** Create for binary classification */

236

public static OutputLayerBuilder binary();

237

238

/** Create for multi-class classification */

239

public static OutputLayerBuilder multiclass(int classes);

240

241

/** Create for regression */

242

public static OutputLayerBuilder regression();

243

244

/** Build the output layer */

245

public Layer build(int inputSize);

246

}

247

```

248

249

### Activation Functions

250

251

Various activation functions for neural network layers.

252

253

```java { .api }

254

/**

255

* Base activation function interface

256

*/

257

interface ActivationFunction extends Serializable {

258

/** Apply activation function */

259

double apply(double x);

260

261

/** Compute derivative of activation function */

262

double derivative(double x);

263

264

/** Apply to vector (in-place) */

265

default void apply(double[] x) {

266

for (int i = 0; i < x.length; i++) {

267

x[i] = apply(x[i]);

268

}

269

}

270

271

// Static factory methods for common activations

272

/** Rectified Linear Unit */

273

static ActivationFunction ReLU = new ReLU();

274

275

/** Leaky ReLU */

276

static ActivationFunction LeakyReLU = new LeakyReLU();

277

278

/** Sigmoid function */

279

static ActivationFunction Sigmoid = new Sigmoid();

280

281

/** Hyperbolic tangent */

282

static ActivationFunction Tanh = new Tanh();

283

284

/** Linear activation (identity) */

285

static ActivationFunction Linear = new Linear();

286

}

287

288

/**

289

* Rectified Linear Unit activation

290

*/

291

class ReLU implements ActivationFunction {

292

/** Apply ReLU: max(0, x) */

293

public double apply(double x);

294

295

/** ReLU derivative */

296

public double derivative(double x);

297

}

298

299

/**

300

* Leaky ReLU activation

301

*/

302

class LeakyReLU implements ActivationFunction {

303

/** Create leaky ReLU with default slope 0.01 */

304

public LeakyReLU();

305

306

/** Create with custom negative slope */

307

public LeakyReLU(double alpha);

308

309

/** Apply leaky ReLU */

310

public double apply(double x);

311

312

/** Leaky ReLU derivative */

313

public double derivative(double x);

314

}

315

316

/**

317

* Sigmoid activation function

318

*/

319

class Sigmoid implements ActivationFunction {

320

/** Apply sigmoid: 1 / (1 + exp(-x)) */

321

public double apply(double x);

322

323

/** Sigmoid derivative */

324

public double derivative(double x);

325

}

326

327

/**

328

* Hyperbolic tangent activation

329

*/

330

class Tanh implements ActivationFunction {

331

/** Apply tanh */

332

public double apply(double x);

333

334

/** Tanh derivative */

335

public double derivative(double x);

336

}

337

338

/**

339

* Softmax activation for multi-class output

340

*/

341

class Softmax implements ActivationFunction {

342

/** Apply softmax to vector */

343

public void apply(double[] x);

344

345

/** Softmax derivative matrix */

346

public double[][] derivative(double[] x);

347

}

348

```

349

350

### Output Functions

351

352

Output layer functions for different types of neural network tasks.

353

354

```java { .api }

355

/**

356

* Output function types for neural networks

357

*/

358

enum OutputFunction {

359

/** Linear output for regression */

360

LINEAR,

361

362

/** Sigmoid output for binary classification */

363

SIGMOID,

364

365

/** Softmax output for multi-class classification */

366

SOFTMAX;

367

368

/** Apply output function to layer activations */

369

public void apply(double[] output);

370

371

/** Calculate loss for target values */

372

public double loss(double[] output, double[] target);

373

374

/** Calculate error gradient */

375

public double[] gradient(double[] output, double[] target);

376

}

377

```

378

379

### Cost Functions

380

381

Loss functions for training neural networks.

382

383

```java { .api }

384

/**

385

* Cost function types for neural network training

386

*/

387

enum Cost {

388

/** Mean squared error for regression */

389

MEAN_SQUARED_ERROR,

390

391

/** Cross entropy for classification */

392

CROSS_ENTROPY,

393

394

/** Sparse cross entropy for large vocabulary */

395

SPARSE_CROSS_ENTROPY;

396

397

/** Calculate loss value */

398

public double loss(double[] output, double[] target);

399

400

/** Calculate error gradient */

401

public double[] gradient(double[] output, double[] target);

402

}

403

```

404

405

### Optimizers

406

407

Optimization algorithms for training neural networks.

408

409

```java { .api }

410

/**

411

* Base optimizer interface

412

*/

413

interface Optimizer extends Serializable {

414

/** Update parameters using gradients */

415

void update(double[] parameters, double[] gradients);

416

417

/** Update with learning rate */

418

void update(double[] parameters, double[] gradients, double learningRate);

419

420

/** Reset optimizer state */

421

void reset();

422

}

423

424

/**

425

* Stochastic Gradient Descent optimizer

426

*/

427

class SGD implements Optimizer {

428

/** Create SGD with learning rate */

429

public SGD(double learningRate);

430

431

/** Create SGD with momentum */

432

public SGD(double learningRate, double momentum);

433

434

/** Update parameters */

435

public void update(double[] parameters, double[] gradients);

436

437

/** Get learning rate */

438

public double learningRate();

439

440

/** Set learning rate */

441

public void setLearningRate(double rate);

442

}

443

444

/**

445

* Adam optimizer with adaptive learning rates

446

*/

447

class Adam implements Optimizer {

448

/** Create Adam with default parameters */

449

public Adam();

450

451

/** Create Adam with custom parameters */

452

public Adam(double learningRate, double beta1, double beta2, double epsilon);

453

454

/** Update parameters */

455

public void update(double[] parameters, double[] gradients);

456

457

/** Reset momentum estimates */

458

public void reset();

459

}

460

461

/**

462

* RMSProp optimizer

463

*/

464

class RMSProp implements Optimizer {

465

/** Create RMSProp with default parameters */

466

public RMSProp();

467

468

/** Create RMSProp with custom decay rate */

469

public RMSProp(double learningRate, double decay);

470

471

/** Update parameters */

472

public void update(double[] parameters, double[] gradients);

473

474

/** Reset accumulated gradients */

475

public void reset();

476

}

477

```

478

479

### MLP Regression

480

481

Multi-layer perceptron for regression tasks.

482

483

```java { .api }

484

/**

485

* MLP for regression tasks

486

*/

487

class MLPRegression implements Regression<double[]> {

488

/** Train MLP regression with default architecture */

489

public static MLPRegression fit(double[][] x, double[] y);

490

491

/** Train with custom hidden layers */

492

public static MLPRegression fit(double[][] x, double[] y, int[] hiddenLayers);

493

494

/** Train with full configuration */

495

public static MLPRegression fit(double[][] x, double[] y, Properties params);

496

497

/** Predict target value */

498

public double predict(double[] x);

499

500

/** Online learning update */

501

public void update(double[] x, double y);

502

503

/** Get training RMSE */

504

public double rmse();

505

506

/** Get network weights */

507

public double[][][] weights();

508

}

509

```

510

511

### Advanced Neural Network Components

512

513

Additional components for building sophisticated neural networks.

514

515

```java { .api }

516

/**

517

* Dropout layer for regularization

518

*/

519

class DropoutLayer extends Layer {

520

/** Create dropout layer with specified rate */

521

public DropoutLayer(double dropoutRate);

522

523

/** Forward pass with dropout (training mode) */

524

public void forward(double[] input, boolean training);

525

526

/** Set training mode */

527

public void setTraining(boolean training);

528

529

/** Get dropout rate */

530

public double dropoutRate();

531

}

532

533

/**

534

* Batch normalization layer

535

*/

536

class BatchNormLayer extends Layer {

537

/** Create batch normalization layer */

538

public BatchNormLayer(int features);

539

540

/** Forward pass with batch normalization */

541

public void forward(double[] input);

542

543

/** Update running statistics */

544

public void updateStatistics(double[][] batch);

545

546

/** Get learned scale parameters */

547

public double[] gamma();

548

549

/** Get learned shift parameters */

550

public double[] beta();

551

}

552

553

/**

554

* Neural network builder for complex architectures

555

*/

556

class NetworkBuilder {

557

/** Start building network */

558

public static NetworkBuilder input(int dimension);

559

560

/** Add hidden layer */

561

public NetworkBuilder hidden(int neurons, ActivationFunction activation);

562

563

/** Add dropout layer */

564

public NetworkBuilder dropout(double rate);

565

566

/** Add batch normalization */

567

public NetworkBuilder batchNorm();

568

569

/** Set output layer */

570

public NetworkBuilder output(int neurons, OutputFunction function);

571

572

/** Build the network */

573

public MLP build();

574

}

575

```

576

577

**Advanced Usage Example:**

578

579

```java

580

import smile.base.mlp.*;

581

import smile.deep.activation.*;

582

import smile.deep.optimizer.*;

583

584

// Build complex neural network

585

MLP network = NetworkBuilder.input(784) // 28x28 images

586

.hidden(512, ActivationFunction.ReLU)

587

.dropout(0.5)

588

.batchNorm()

589

.hidden(256, ActivationFunction.ReLU)

590

.dropout(0.3)

591

.hidden(128, ActivationFunction.ReLU)

592

.output(10, OutputFunction.SOFTMAX) // 10 classes

593

.build();

594

595

// Custom training loop with Adam optimizer

596

Adam optimizer = new Adam(0.001, 0.9, 0.999, 1e-8);

597

int epochs = 100;

598

int batchSize = 32;

599

600

for (int epoch = 0; epoch < epochs; epoch++) {

601

// Shuffle training data

602

shuffleData(trainX, trainY);

603

604

double epochLoss = 0.0;

605

for (int i = 0; i < trainX.length; i += batchSize) {

606

// Get batch

607

double[][] batchX = getBatch(trainX, i, batchSize);

608

int[] batchY = getBatch(trainY, i, batchSize);

609

610

// Forward and backward pass

611

double batchLoss = network.train(batchX, batchY, optimizer);

612

epochLoss += batchLoss;

613

}

614

615

// Validation

616

double accuracy = evaluate(network, validX, validY);

617

System.out.println("Epoch " + epoch + ", Loss: " + epochLoss + ", Accuracy: " + accuracy);

618

}

619

```

620

621

### Training Configuration

622

623

Common parameters for neural network training:

624

625

- **learningRate**: Learning rate for gradient descent (default: 0.01)

626

- **momentum**: Momentum factor for SGD (default: 0.0)

627

- **weightDecay**: L2 regularization strength (default: 0.0)

628

- **epochs**: Number of training epochs

629

- **batchSize**: Mini-batch size for training

630

- **dropout**: Dropout rate for regularization

631

- **earlyStop**: Early stopping patience

632

- **validation**: Validation split ratio

633

634

### Best Practices

635

636

Guidelines for effective neural network training:

637

638

1. **Data Preprocessing**: Normalize inputs to [0,1] or standardize to mean=0, std=1

639

2. **Architecture**: Start simple, add complexity gradually

640

3. **Activation Functions**: Use ReLU for hidden layers, appropriate output function

641

4. **Regularization**: Apply dropout and weight decay to prevent overfitting

642

5. **Learning Rate**: Start with 0.01, adjust based on training dynamics

643

6. **Batch Size**: Use powers of 2 (32, 64, 128) for efficiency

644

7. **Monitoring**: Track both training and validation metrics