or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-analytics.mdclassification.mdclustering.mddeep-learning.mdfeature-engineering.mdindex.mdregression.mdvalidation-metrics.md

advanced-analytics.mddocs/

0

# Advanced Analytics

1

2

Specialized algorithms for manifold learning, time series analysis, sequence modeling, association rule mining, anomaly detection, and other advanced machine learning tasks. Smile Core provides sophisticated tools for complex analytical scenarios.

3

4

## Capabilities

5

6

### Manifold Learning

7

8

Algorithms for discovering low-dimensional structure in high-dimensional data.

9

10

```java { .api }

11

/**

12

* Multi-Dimensional Scaling for manifold learning

13

*/

14

class MDS implements Serializable {

15

/** Perform classical MDS */

16

public static MDS fit(double[][] proximity);

17

18

/** Perform MDS with specified dimensions */

19

public static MDS fit(double[][] proximity, int k);

20

21

/** Perform metric MDS with stress minimization */

22

public static MDS fit(double[][] proximity, int k, boolean metric);

23

24

/** Get embedded coordinates */

25

public final double[][] coordinates;

26

27

/** Get eigenvalues */

28

public final double[] eigenvalues;

29

30

/** Get stress value */

31

public final double stress;

32

}

33

34

/**

35

* Isotonic MDS with monotonic distance constraints

36

*/

37

class IsotonicMDS implements Serializable {

38

/** Perform isotonic MDS */

39

public static IsotonicMDS fit(double[][] proximity, int k);

40

41

/** Get embedded coordinates */

42

public final double[][] coordinates;

43

44

/** Get stress value */

45

public final double stress;

46

}

47

48

/**

49

* Sammon's mapping for non-linear dimensionality reduction

50

*/

51

class SammonMapping implements Serializable {

52

/** Perform Sammon mapping */

53

public static SammonMapping fit(double[][] data, int k);

54

55

/** Perform with custom parameters */

56

public static SammonMapping fit(double[][] data, int k, double learningRate, int maxIter);

57

58

/** Get embedded coordinates */

59

public final double[][] coordinates;

60

61

/** Get final stress */

62

public final double stress;

63

}

64

65

/**

66

* Isomap for non-linear manifold learning

67

*/

68

class IsoMap implements Serializable {

69

/** Perform Isomap with k-nearest neighbors */

70

public static IsoMap fit(double[][] data, int k, int d);

71

72

/** Perform with epsilon neighborhood */

73

public static IsoMap fit(double[][] data, double epsilon, int d);

74

75

/** Get embedded coordinates */

76

public final double[][] coordinates;

77

78

/** Get geodesic distances */

79

public final double[][] distances;

80

}

81

82

/**

83

* Locally Linear Embedding

84

*/

85

class LLE implements Serializable {

86

/** Perform LLE with k neighbors */

87

public static LLE fit(double[][] data, int k, int d);

88

89

/** Get embedded coordinates */

90

public final double[][] coordinates;

91

92

/** Get reconstruction weights */

93

public final double[][] weights;

94

}

95

96

/**

97

* Laplacian Eigenmap for spectral manifold learning

98

*/

99

class LaplacianEigenmap implements Serializable {

100

/** Perform Laplacian Eigenmap */

101

public static LaplacianEigenmap fit(double[][] data, int k, int d);

102

103

/** Perform with RBF kernel */

104

public static LaplacianEigenmap fit(double[][] data, int k, int d, double sigma);

105

106

/** Get embedded coordinates */

107

public final double[][] coordinates;

108

109

/** Get eigenvalues */

110

public final double[] eigenvalues;

111

}

112

113

/**

114

* t-SNE for visualization and non-linear embedding

115

*/

116

class TSNE implements Serializable {

117

/** Perform t-SNE with default parameters */

118

public static TSNE fit(double[][] data);

119

120

/** Perform t-SNE with custom parameters */

121

public static TSNE fit(double[][] data, int d, double perplexity, double learningRate, int maxIter);

122

123

/** Get embedded coordinates */

124

public final double[][] coordinates;

125

126

/** Get final KL divergence */

127

public final double klDivergence;

128

}

129

130

/**

131

* UMAP for uniform manifold approximation

132

*/

133

class UMAP implements Serializable {

134

/** Perform UMAP with default parameters */

135

public static UMAP fit(double[][] data);

136

137

/** Perform UMAP with custom parameters */

138

public static UMAP fit(double[][] data, int d, int nNeighbors, double minDist, int nEpochs);

139

140

/** Get embedded coordinates */

141

public final double[][] coordinates;

142

143

/** Transform new data points */

144

public double[] transform(double[] x);

145

}

146

```

147

148

**Usage Example:**

149

150

```java

151

import smile.manifold.*;

152

153

// t-SNE for visualization

154

TSNE tsne = TSNE.fit(highDimData, 2, 30.0, 200.0, 1000);

155

double[][] embedding = tsne.coordinates;

156

157

// UMAP for general manifold learning

158

UMAP umap = UMAP.fit(data, 10, 15, 0.1, 200);

159

double[][] reducedData = umap.coordinates;

160

double[] newPoint = umap.transform(testSample);

161

162

// Isomap for geodesic distances

163

IsoMap isomap = IsoMap.fit(data, 10, 5); // 10 neighbors, 5 dimensions

164

double[][] manifoldCoords = isomap.coordinates;

165

```

166

167

### Time Series Analysis

168

169

Algorithms for analyzing temporal data patterns and forecasting.

170

171

```java { .api }

172

/**

173

* Time series utilities and analysis tools

174

*/

175

class TimeSeries {

176

/** Calculate autocorrelation function */

177

public static double[] autocorrelation(double[] data);

178

179

/** Calculate autocorrelation with max lag */

180

public static double[] autocorrelation(double[] data, int maxLag);

181

182

/** Calculate cross-correlation between two series */

183

public static double[] crosscorrelation(double[] x, double[] y);

184

185

/** Calculate partial autocorrelation function */

186

public static double[] pacf(double[] data, int maxLag);

187

188

/** Differencing for stationarity */

189

public static double[] difference(double[] data);

190

191

/** Seasonal differencing */

192

public static double[] seasonalDifference(double[] data, int period);

193

194

/** Moving average smoothing */

195

public static double[] movingAverage(double[] data, int window);

196

197

/** Exponential smoothing */

198

public static double[] exponentialSmoothing(double[] data, double alpha);

199

}

200

201

/**

202

* Autoregressive model for time series forecasting

203

*/

204

class AR implements Serializable {

205

/** Estimation methods */

206

enum Method { BURG, OLS, MLE }

207

208

/** Fit AR model using Burg method */

209

public static AR fit(double[] data, int p);

210

211

/** Fit AR model with specified method */

212

public static AR fit(double[] data, int p, Method method);

213

214

/** Get AR coefficients */

215

public double[] coefficients();

216

217

/** Get model order */

218

public int order();

219

220

/** Get white noise variance */

221

public double variance();

222

223

/** Forecast future values */

224

public double[] forecast(int steps);

225

226

/** One-step ahead prediction */

227

public double predict(double[] history);

228

}

229

230

/**

231

* ARMA model combining autoregressive and moving average

232

*/

233

class ARMA implements Serializable {

234

/** Fit ARMA model */

235

public static ARMA fit(double[] data, int p, int q);

236

237

/** Fit with custom initialization */

238

public static ARMA fit(double[] data, int p, int q, boolean includeIntercept);

239

240

/** Get AR coefficients */

241

public double[] arCoefficients();

242

243

/** Get MA coefficients */

244

public double[] maCoefficients();

245

246

/** Get intercept term */

247

public double intercept();

248

249

/** Forecast future values */

250

public double[] forecast(int steps);

251

252

/** Calculate residuals */

253

public double[] residuals();

254

255

/** Get AIC (Akaike Information Criterion) */

256

public double aic();

257

}

258

259

/**

260

* Box test for time series diagnostics

261

*/

262

class BoxTest {

263

/** Test types */

264

enum Type { LJUNG_BOX, BOX_PIERCE }

265

266

/** Perform Ljung-Box test */

267

public static BoxTest ljungBox(double[] residuals, int lags);

268

269

/** Perform Box-Pierce test */

270

public static BoxTest boxPierce(double[] residuals, int lags);

271

272

/** Test statistic */

273

public final double statistic;

274

275

/** P-value */

276

public final double pvalue;

277

278

/** Degrees of freedom */

279

public final int df;

280

}

281

```

282

283

### Sequence Modeling

284

285

Algorithms for labeling and analyzing sequential data.

286

287

```java { .api }

288

/**

289

* Base interface for sequence labeling

290

* @param <T> the type of sequence elements

291

*/

292

interface SequenceLabeler<T> {

293

/** Predict labels for sequence */

294

int[] predict(T[] sequence);

295

296

/** Get label vocabulary */

297

default String[] labels();

298

}

299

300

/**

301

* Hidden Markov Model for sequence analysis

302

*/

303

class HMM implements Serializable {

304

/** Train HMM from observation sequences */

305

public static HMM fit(int[][] observations, int numStates);

306

307

/** Train with known state sequences */

308

public static HMM fit(int[][] observations, int[][] states, int numStates, int numSymbols);

309

310

/** Predict most likely state sequence (Viterbi) */

311

public int[] predict(int[] observations);

312

313

/** Calculate sequence probability (forward algorithm) */

314

public double probability(int[] observations);

315

316

/** Get transition probabilities */

317

public double[][] transitionProbabilities();

318

319

/** Get emission probabilities */

320

public double[][] emissionProbabilities();

321

322

/** Get initial state probabilities */

323

public double[] initialProbabilities();

324

}

325

326

/**

327

* HMM-based sequence labeler

328

* @param <T> the type of sequence elements

329

*/

330

class HMMLabeler<T> implements SequenceLabeler<T> {

331

/** Train HMM labeler */

332

public static <T> HMMLabeler<T> fit(T[][] sequences, int[][] labels, Function<T, Integer> encoder);

333

334

/** Predict labels for sequence */

335

public int[] predict(T[] sequence);

336

337

/** Get underlying HMM */

338

public HMM hmm();

339

}

340

341

/**

342

* Conditional Random Field for sequence labeling

343

*/

344

class CRF implements Serializable {

345

/** Train CRF from feature sequences and labels */

346

public static CRF fit(double[][][] features, int[][] labels);

347

348

/** Train with regularization */

349

public static CRF fit(double[][][] features, int[][] labels, double lambda);

350

351

/** Predict label sequence */

352

public int[] predict(double[][] features);

353

354

/** Calculate sequence probability */

355

public double probability(double[][] features, int[] labels);

356

357

/** Get feature weights */

358

public double[] weights();

359

360

/** Get number of labels */

361

public int numLabels();

362

}

363

364

/**

365

* CRF-based sequence labeler

366

* @param <T> the type of sequence elements

367

*/

368

class CRFLabeler<T> implements SequenceLabeler<T> {

369

/** Train CRF labeler with feature extractor */

370

public static <T> CRFLabeler<T> fit(T[][] sequences, int[][] labels, Function<T[], double[][]> featureExtractor);

371

372

/** Predict labels for sequence */

373

public int[] predict(T[] sequence);

374

375

/** Get underlying CRF */

376

public CRF crf();

377

}

378

379

/**

380

* Trellis for dynamic programming in sequence algorithms

381

*/

382

class Trellis {

383

/** Create trellis for sequence length and states */

384

public static Trellis of(int length, int states);

385

386

/** Forward algorithm for HMM */

387

public double forward(HMM hmm, int[] observations);

388

389

/** Backward algorithm for HMM */

390

public double backward(HMM hmm, int[] observations);

391

392

/** Viterbi algorithm for best path */

393

public int[] viterbi(HMM hmm, int[] observations);

394

}

395

```

396

397

### Association Rule Mining

398

399

Algorithms for discovering frequent patterns and association rules in transactional data.

400

401

```java { .api }

402

/**

403

* Association Rule Mining implementing Iterable<AssociationRule>

404

*/

405

class ARM implements Iterable<AssociationRule>, Serializable {

406

/** Mine association rules from transactions */

407

public static ARM fit(int[][] transactions, double minSupport, double minConfidence);

408

409

/** Mine with additional constraints */

410

public static ARM fit(int[][] transactions, double minSupport, double minConfidence, int maxRuleLength);

411

412

/** Iterate over discovered rules */

413

public Iterator<AssociationRule> iterator();

414

415

/** Get number of rules */

416

public int size();

417

418

/** Get all rules as array */

419

public AssociationRule[] rules();

420

}

421

422

/**

423

* Association rule representation

424

*/

425

class AssociationRule implements Serializable {

426

/** Rule antecedent (if part) */

427

public final int[] antecedent;

428

429

/** Rule consequent (then part) */

430

public final int[] consequent;

431

432

/** Rule support (frequency) */

433

public final double support;

434

435

/** Rule confidence */

436

public final double confidence;

437

438

/** Rule lift */

439

public final double lift;

440

441

/** Rule conviction */

442

public final double conviction;

443

444

/** Convert to string representation */

445

public String toString();

446

}

447

448

/**

449

* FP-Growth algorithm for frequent pattern mining

450

*/

451

class FPGrowth implements Iterable<ItemSet>, Serializable {

452

/** Mine frequent patterns */

453

public static FPGrowth fit(int[][] transactions, double minSupport);

454

455

/** Mine with minimum pattern length */

456

public static FPGrowth fit(int[][] transactions, double minSupport, int minLength);

457

458

/** Iterate over frequent itemsets */

459

public Iterator<ItemSet> iterator();

460

461

/** Get number of frequent itemsets */

462

public int size();

463

464

/** Get all itemsets as array */

465

public ItemSet[] itemsets();

466

}

467

468

/**

469

* Frequent Pattern Tree for FP-Growth

470

*/

471

class FPTree implements Serializable {

472

/** Build FP-tree from transactions */

473

public static FPTree of(int[][] transactions, double minSupport);

474

475

/** Add transaction to tree */

476

public void add(int[] transaction);

477

478

/** Mine patterns from tree */

479

public ItemSet[] mine(double minSupport);

480

481

/** Get header table */

482

public Map<Integer, Integer> headerTable();

483

}

484

485

/**

486

* Item set representation

487

*/

488

class ItemSet implements Serializable {

489

/** Items in the set */

490

public final int[] items;

491

492

/** Support count */

493

public final int support;

494

495

/** Support frequency */

496

public final double frequency;

497

498

/** Get itemset size */

499

public int size();

500

501

/** Check if contains item */

502

public boolean contains(int item);

503

}

504

505

/**

506

* Total Support Tree for association mining

507

*/

508

class TotalSupportTree implements Serializable {

509

/** Build total support tree */

510

public static TotalSupportTree of(int[][] transactions);

511

512

/** Add transaction */

513

public void add(int[] transaction);

514

515

/** Get total support for itemset */

516

public int support(int[] itemset);

517

}

518

```

519

520

### Anomaly Detection

521

522

Algorithms for identifying outliers and anomalous patterns in data.

523

524

```java { .api }

525

/**

526

* Isolation Forest for anomaly detection

527

*/

528

class IsolationForest implements Serializable {

529

/** Train isolation forest */

530

public static IsolationForest fit(double[][] data);

531

532

/** Train with custom parameters */

533

public static IsolationForest fit(double[][] data, int numTrees, int subsampleSize);

534

535

/** Calculate anomaly score (higher = more anomalous) */

536

public double score(double[] x);

537

538

/** Predict if sample is anomaly */

539

public boolean predict(double[] x, double threshold);

540

541

/** Get isolation trees */

542

public IsolationTree[] trees();

543

544

/** Calculate average path length for normalization */

545

public double averagePathLength(int n);

546

}

547

548

/**

549

* Individual isolation tree

550

*/

551

class IsolationTree implements Serializable {

552

/** Build isolation tree from data */

553

public static IsolationTree fit(double[][] data, int maxDepth);

554

555

/** Calculate path length for sample */

556

public double pathLength(double[] x);

557

558

/** Get tree height */

559

public int height();

560

561

/** Get number of leaves */

562

public int leaves();

563

}

564

565

/**

566

* One-class SVM for anomaly detection

567

*/

568

class SVM {

569

/** Train one-class SVM */

570

public static SVM fit(double[][] data, double nu);

571

572

/** Train with RBF kernel */

573

public static SVM fit(double[][] data, double nu, double gamma);

574

575

/** Predict if sample is normal (1) or anomaly (-1) */

576

public int predict(double[] x);

577

578

/** Calculate decision function value */

579

public double score(double[] x);

580

581

/** Get support vectors */

582

public double[][] supportVectors();

583

}

584

```

585

586

### Vector Quantization

587

588

Self-organizing algorithms for data compression and visualization.

589

590

```java { .api }

591

/**

592

* Base vector quantizer interface

593

*/

594

interface VectorQuantizer {

595

/** Quantize input vector to nearest prototype */

596

int quantize(double[] x);

597

598

/** Get prototype vectors */

599

double[][] prototypes();

600

601

/** Get quantization error */

602

double quantizationError(double[][] data);

603

}

604

605

/**

606

* Self-Organizing Map for vector quantization

607

*/

608

class SOM implements VectorQuantizer, Serializable {

609

/** Train SOM with rectangular grid */

610

public static SOM fit(double[][] data, int width, int height);

611

612

/** Train with custom parameters */

613

public static SOM fit(double[][] data, int width, int height, double learningRate, int epochs);

614

615

/** Quantize vector to best matching unit */

616

public int quantize(double[] x);

617

618

/** Get prototype at grid position */

619

public double[] prototype(int x, int y);

620

621

/** Get all prototypes */

622

public double[][] prototypes();

623

624

/** Get grid dimensions */

625

public int[] dimensions();

626

627

/** Calculate topographic error */

628

public double topographicError(double[][] data);

629

}

630

631

/**

632

* Neural Gas algorithm

633

*/

634

class NeuralGas implements VectorQuantizer, Serializable {

635

/** Train Neural Gas */

636

public static NeuralGas fit(double[][] data, int numPrototypes);

637

638

/** Train with custom parameters */

639

public static NeuralGas fit(double[][] data, int numPrototypes, double learningRate, int epochs);

640

641

/** Quantize vector */

642

public int quantize(double[] x);

643

644

/** Get prototypes */

645

public double[][] prototypes();

646

647

/** Get prototype ages */

648

public int[] ages();

649

}

650

651

/**

652

* Growing Neural Gas with dynamic topology

653

*/

654

class GrowingNeuralGas implements VectorQuantizer, Serializable {

655

/** Train Growing Neural Gas */

656

public static GrowingNeuralGas fit(double[][] data, int maxNodes);

657

658

/** Quantize vector */

659

public int quantize(double[] x);

660

661

/** Get current prototypes */

662

public double[][] prototypes();

663

664

/** Get topology edges */

665

public int[][] edges();

666

667

/** Get number of nodes */

668

public int size();

669

}

670

```

671

672

### Hyperparameter Optimization

673

674

Tools for optimizing machine learning model hyperparameters.

675

676

```java { .api }

677

/**

678

* Hyperparameter optimization utilities

679

*/

680

class Hyperparameters {

681

/** Grid search over parameter combinations */

682

public static <T> T grid(Function<Map<String, Object>, T> trainer,

683

Map<String, Object[]> paramGrid,

684

Function<T, Double> evaluator);

685

686

/** Random search over parameter distributions */

687

public static <T> T random(Function<Map<String, Object>, T> trainer,

688

Map<String, Distribution> paramDist,

689

int nIter,

690

Function<T, Double> evaluator);

691

692

/** Bayesian optimization using Gaussian processes */

693

public static <T> T bayesian(Function<Map<String, Object>, T> trainer,

694

Map<String, Double[]> bounds,

695

int nIter,

696

Function<T, Double> evaluator);

697

698

/** Tree-structured Parzen Estimator optimization */

699

public static <T> T tpe(Function<Map<String, Object>, T> trainer,

700

Map<String, Distribution> space,

701

int nIter,

702

Function<T, Double> evaluator);

703

}

704

```

705

706

**Comprehensive Usage Example:**

707

708

```java

709

import smile.manifold.TSNE;

710

import smile.timeseries.AR;

711

import smile.association.ARM;

712

import smile.anomaly.IsolationForest;

713

714

// Manifold learning for visualization

715

TSNE tsne = TSNE.fit(highDimData, 2, 30.0, 200.0, 1000);

716

double[][] visualization = tsne.coordinates;

717

718

// Time series forecasting

719

AR arModel = AR.fit(timeSeries, 5); // AR(5) model

720

double[] forecast = arModel.forecast(10); // 10-step forecast

721

722

// Association rule mining

723

ARM arm = ARM.fit(transactions, 0.01, 0.5); // 1% support, 50% confidence

724

for (AssociationRule rule : arm) {

725

System.out.println(rule.toString() + " (lift: " + rule.lift + ")");

726

}

727

728

// Anomaly detection

729

IsolationForest iforest = IsolationForest.fit(normalData, 100, 256);

730

for (double[] sample : testData) {

731

double score = iforest.score(sample);

732

boolean isAnomaly = iforest.predict(sample, 0.1); // 10% anomaly threshold

733

System.out.println("Sample score: " + score + ", Anomaly: " + isAnomaly);

734

}

735

736

// Sequence labeling with HMM

737

HMM hmm = HMM.fit(observationSequences, 5); // 5 hidden states

738

int[] predictedStates = hmm.predict(newObservations);

739

```

740

741

### Advanced Analytics Integration

742

743

These advanced analytics capabilities integrate seamlessly with Smile's core machine learning framework:

744

745

- **Preprocessing**: Use manifold learning for dimensionality reduction before classification

746

- **Feature Engineering**: Extract time series features for predictive modeling

747

- **Pattern Discovery**: Mine association rules to understand data relationships

748

- **Quality Control**: Apply anomaly detection for data cleaning and monitoring

749

- **Evaluation**: Use sequence modeling metrics for temporal prediction tasks

750

- **Optimization**: Apply hyperparameter tuning to all model types