or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

classification.mdclustering.mdevaluation-tuning.mdfeature-engineering.mdindex.mdlinear-algebra.mdpipeline-components.mdrecommendation.mdregression.md

linear-algebra.mddocs/

0

# Linear Algebra

1

2

Distributed linear algebra operations and data structures optimized for large-scale numerical computations across cluster nodes.

3

4

## Capabilities

5

6

### Vector Operations

7

8

Core vector data structures and operations for representing feature vectors and model parameters.

9

10

```scala { .api }

11

/**

12

* Abstract base class for vectors

13

*/

14

abstract class Vector extends Serializable {

15

def size: Int

16

def apply(i: Int): Double

17

def copy: Vector

18

def foreachActive(f: (Int, Double) => Unit): Unit

19

def numActives: Int

20

def numNonzeros: Int

21

def toArray: Array[Double]

22

def toSparse: SparseVector

23

def toDense: DenseVector

24

def compressed: Vector

25

def argmax: Int

26

def dot(v: Vector): Double

27

def equals(other: Any): Boolean

28

def hashCode(): Int

29

def toString: String

30

}

31

32

/**

33

* Dense vector implementation storing all values

34

*/

35

class DenseVector(val values: Array[Double]) extends Vector {

36

def size: Int = values.length

37

def apply(i: Int): Double = values(i)

38

def copy: DenseVector = new DenseVector(values.clone())

39

def update(i: Int, value: Double): Unit = values(i) = value

40

def dot(other: Vector): Double

41

def norm(p: Double): Double

42

}

43

44

/**

45

* Sparse vector implementation storing only non-zero values

46

*/

47

class SparseVector(

48

override val size: Int,

49

val indices: Array[Int],

50

val values: Array[Double]

51

) extends Vector {

52

def apply(i: Int): Double = {

53

val idx = java.util.Arrays.binarySearch(indices, i)

54

if (idx >= 0) values(idx) else 0.0

55

}

56

def copy: SparseVector = new SparseVector(size, indices.clone(), values.clone())

57

def dot(other: Vector): Double

58

def norm(p: Double): Double

59

}

60

61

/**

62

* Vector factory methods and utilities

63

*/

64

object Vectors {

65

def dense(firstValue: Double, otherValues: Double*): DenseVector

66

def dense(values: Array[Double]): DenseVector

67

def sparse(size: Int, elements: Seq[(Int, Double)]): SparseVector

68

def sparse(size: Int, indices: Array[Int], values: Array[Double]): SparseVector

69

def zeros(size: Int): DenseVector

70

def norm(vector: Vector, p: Double): Double

71

def sqdist(v1: Vector, v2: Vector): Double

72

def fromML(v: org.apache.spark.mllib.linalg.Vector): Vector

73

def fromBreeze(bv: breeze.linalg.Vector[Double]): Vector

74

}

75

```

76

77

**Usage Example:**

78

79

```scala

80

import org.apache.spark.ml.linalg.{Vector, Vectors}

81

82

// Create dense vector

83

val denseVec = Vectors.dense(1.0, 2.0, 3.0, 4.0)

84

println(s"Dense vector: $denseVec")

85

86

// Create sparse vector

87

val sparseVec = Vectors.sparse(10, Array(0, 2, 9), Array(1.0, 3.0, 5.0))

88

println(s"Sparse vector: $sparseVec")

89

90

// Vector operations

91

val norm = Vectors.norm(denseVec, 2.0)

92

println(s"L2 norm: $norm")

93

94

val distance = Vectors.sqdist(denseVec, sparseVec.toDense)

95

println(s"Squared distance: $distance")

96

```

97

98

### Matrix Operations

99

100

Matrix data structures and operations for representing datasets and model parameters.

101

102

```scala { .api }

103

/**

104

* Abstract base class for matrices

105

*/

106

abstract class Matrix extends Serializable {

107

def numRows: Int

108

def numCols: Int

109

def apply(i: Int, j: Int): Double

110

def copy: Matrix

111

def foreachActive(f: (Int, Int, Double) => Unit): Unit

112

def numActives: Int

113

def numNonzeros: Int

114

def toArray: Array[Double]

115

def isTransposed: Boolean

116

def asML: org.apache.spark.mllib.linalg.Matrix

117

def toSparse: SparseMatrix

118

def toDense: DenseMatrix

119

def transpose: Matrix

120

def multiply(y: DenseVector): DenseVector

121

def multiply(y: DenseMatrix): DenseMatrix

122

def equals(other: Any): Boolean

123

def hashCode(): Int

124

def toString: String

125

}

126

127

/**

128

* Dense matrix implementation storing all values in column-major order

129

*/

130

class DenseMatrix(

131

val numRows: Int,

132

val numCols: Int,

133

val values: Array[Double],

134

val isTransposed: Boolean = false

135

) extends Matrix {

136

def apply(i: Int, j: Int): Double = {

137

if (isTransposed) values(j * numRows + i)

138

else values(i + j * numRows)

139

}

140

def copy: DenseMatrix = new DenseMatrix(numRows, numCols, values.clone(), isTransposed)

141

def update(i: Int, j: Int, value: Double): Unit = {

142

if (isTransposed) values(j * numRows + i) = value

143

else values(i + j * numRows) = value

144

}

145

}

146

147

/**

148

* Sparse matrix implementation storing only non-zero values in compressed sparse column format

149

*/

150

class SparseMatrix(

151

val numRows: Int,

152

val numCols: Int,

153

val colPtrs: Array[Int],

154

val rowIndices: Array[Int],

155

val values: Array[Double],

156

val isTransposed: Boolean = false

157

) extends Matrix {

158

def apply(i: Int, j: Int): Double = {

159

val startIdx = colPtrs(j)

160

val endIdx = colPtrs(j + 1)

161

val idx = java.util.Arrays.binarySearch(rowIndices, startIdx, endIdx, i)

162

if (idx >= 0) values(idx) else 0.0

163

}

164

def copy: SparseMatrix = new SparseMatrix(

165

numRows, numCols, colPtrs.clone(), rowIndices.clone(), values.clone(), isTransposed

166

)

167

}

168

169

/**

170

* Matrix factory methods and utilities

171

*/

172

object Matrices {

173

def dense(numRows: Int, numCols: Int, values: Array[Double]): DenseMatrix

174

def sparse(numRows: Int, numCols: Int, entries: Seq[(Int, Int, Double)]): SparseMatrix

175

def sparse(

176

numRows: Int,

177

numCols: Int,

178

colPtrs: Array[Int],

179

rowIndices: Array[Int],

180

values: Array[Double]

181

): SparseMatrix

182

def eye(n: Int): DenseMatrix

183

def zeros(numRows: Int, numCols: Int): DenseMatrix

184

def ones(numRows: Int, numCols: Int): DenseMatrix

185

def diag(vector: Vector): DenseMatrix

186

def rand(numRows: Int, numCols: Int, rng: Random): DenseMatrix

187

def randn(numRows: Int, numCols: Int, rng: Random): DenseMatrix

188

def horzcat(matrices: Array[Matrix]): Matrix

189

def vertcat(matrices: Array[Matrix]): Matrix

190

def fromML(m: org.apache.spark.mllib.linalg.Matrix): Matrix

191

def fromBreeze(bm: breeze.linalg.Matrix[Double]): Matrix

192

}

193

```

194

195

**Usage Example:**

196

197

```scala

198

import org.apache.spark.ml.linalg.{Matrix, Matrices, Vectors}

199

200

// Create dense matrix

201

val denseMatrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))

202

println(s"Dense matrix:\n$denseMatrix")

203

204

// Create sparse matrix

205

val sparseMatrix = Matrices.sparse(3, 2, Seq((0, 0, 9.0), (2, 1, 6.0)))

206

println(s"Sparse matrix:\n$sparseMatrix")

207

208

// Matrix operations

209

val identity = Matrices.eye(3)

210

val vector = Vectors.dense(1.0, 2.0, 3.0)

211

val result = identity.multiply(vector.toDense)

212

println(s"Matrix-vector multiplication: $result")

213

```

214

215

### Advanced Linear Algebra Operations

216

217

Extended operations for complex numerical computations and transformations.

218

219

```scala { .api }

220

/**

221

* BLAS (Basic Linear Algebra Subprograms) operations

222

*/

223

object BLAS {

224

/**

225

* Vector dot product: x^T * y

226

*/

227

def dot(x: Vector, y: Vector): Double

228

229

/**

230

* Vector L2 norm: ||x||_2

231

*/

232

def nrm2(x: Vector): Double

233

234

/**

235

* Scalar-vector multiplication: a * x

236

*/

237

def scal(a: Double, x: Vector): Unit

238

239

/**

240

* Vector addition: y := a * x + y

241

*/

242

def axpy(a: Double, x: Vector, y: Vector): Unit

243

244

/**

245

* Matrix-vector multiplication: y := alpha * A * x + beta * y

246

*/

247

def gemv(

248

alpha: Double, A: Matrix, x: Vector, beta: Double, y: Vector

249

): Unit

250

251

/**

252

* Matrix-matrix multiplication: C := alpha * A * B + beta * C

253

*/

254

def gemm(

255

alpha: Double, A: Matrix, B: Matrix, beta: Double, C: Matrix

256

): Unit

257

258

/**

259

* Symmetric matrix-vector multiplication

260

*/

261

def symv(

262

alpha: Double, A: Matrix, x: Vector, beta: Double, y: Vector

263

): Unit

264

265

/**

266

* Rank-1 update: A := alpha * x * y^T + A

267

*/

268

def ger(alpha: Double, x: Vector, y: Vector, A: Matrix): Unit

269

270

/**

271

* Symmetric rank-1 update: A := alpha * x * x^T + A

272

*/

273

def syr(alpha: Double, x: Vector, A: Matrix): Unit

274

}

275

276

/**

277

* LAPACK (Linear Algebra Package) operations

278

*/

279

object LAPACK {

280

/**

281

* Cholesky decomposition

282

*/

283

def potrf(A: DenseMatrix): Int

284

285

/**

286

* Solve linear system using Cholesky decomposition

287

*/

288

def potrs(A: DenseMatrix, B: DenseMatrix): Int

289

290

/**

291

* QR decomposition

292

*/

293

def geqrf(A: DenseMatrix, tau: Array[Double]): Int

294

295

/**

296

* Singular Value Decomposition

297

*/

298

def gesvd(

299

A: DenseMatrix,

300

U: DenseMatrix,

301

s: Array[Double],

302

Vt: DenseMatrix

303

): Int

304

305

/**

306

* Eigenvalue decomposition

307

*/

308

def syev(

309

A: DenseMatrix,

310

w: Array[Double]

311

): Int

312

}

313

```

314

315

### Vector and Matrix Conversions

316

317

Utilities for converting between different vector and matrix representations.

318

319

```scala { .api }

320

/**

321

* Conversion utilities between MLlib and ML linear algebra types

322

*/

323

object LinearAlgebraUtils {

324

/**

325

* Convert ML vector to MLlib vector

326

*/

327

def toMLlib(v: org.apache.spark.ml.linalg.Vector): org.apache.spark.mllib.linalg.Vector

328

329

/**

330

* Convert MLlib vector to ML vector

331

*/

332

def fromMLlib(v: org.apache.spark.mllib.linalg.Vector): org.apache.spark.ml.linalg.Vector

333

334

/**

335

* Convert ML matrix to MLlib matrix

336

*/

337

def toMLlib(m: org.apache.spark.ml.linalg.Matrix): org.apache.spark.mllib.linalg.Matrix

338

339

/**

340

* Convert MLlib matrix to ML matrix

341

*/

342

def fromMLlib(m: org.apache.spark.mllib.linalg.Matrix): org.apache.spark.ml.linalg.Matrix

343

344

/**

345

* Convert Breeze vector to ML vector

346

*/

347

def fromBreeze(bv: breeze.linalg.Vector[Double]): Vector

348

349

/**

350

* Convert ML vector to Breeze vector

351

*/

352

def toBreeze(v: Vector): breeze.linalg.Vector[Double]

353

354

/**

355

* Convert Breeze matrix to ML matrix

356

*/

357

def fromBreeze(bm: breeze.linalg.Matrix[Double]): Matrix

358

359

/**

360

* Convert ML matrix to Breeze matrix

361

*/

362

def toBreeze(m: Matrix): breeze.linalg.Matrix[Double]

363

}

364

```

365

366

### Distributed Linear Algebra (Legacy MLlib)

367

368

Large-scale distributed matrix operations from the legacy RDD-based API.

369

370

```scala { .api }

371

/**

372

* Base class for distributed matrices

373

*/

374

abstract class org.apache.spark.mllib.linalg.distributed.DistributedMatrix {

375

def numRows(): Long

376

def numCols(): Long

377

}

378

379

/**

380

* Row-oriented distributed matrix

381

*/

382

class org.apache.spark.mllib.linalg.distributed.RowMatrix(

383

val rows: RDD[org.apache.spark.mllib.linalg.Vector]

384

) extends DistributedMatrix {

385

def computeColumnSummaryStatistics(): MultivariateStatisticalSummary

386

def computeCovariance(): org.apache.spark.mllib.linalg.Matrix

387

def computeGramianMatrix(): org.apache.spark.mllib.linalg.Matrix

388

def computePrincipalComponents(k: Int): org.apache.spark.mllib.linalg.Matrix

389

def computeSVD(

390

k: Int,

391

computeU: Boolean = false,

392

rCond: Double = 1e-9

393

): SingularValueDecomposition[RowMatrix, org.apache.spark.mllib.linalg.Matrix]

394

def multiply(B: org.apache.spark.mllib.linalg.Matrix): RowMatrix

395

def columnSimilarities(): CoordinateMatrix

396

}

397

398

/**

399

* Indexed row matrix for matrices with meaningful row indices

400

*/

401

class org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix(

402

val rows: RDD[IndexedRow]

403

) extends DistributedMatrix {

404

def toRowMatrix(): RowMatrix

405

def toCoordinateMatrix(): CoordinateMatrix

406

def toBlockMatrix(): BlockMatrix

407

def multiply(B: org.apache.spark.mllib.linalg.Matrix): IndexedRowMatrix

408

def computeGramianMatrix(): org.apache.spark.mllib.linalg.Matrix

409

}

410

411

/**

412

* Coordinate matrix for matrices stored as (row, col, value) triplets

413

*/

414

class org.apache.spark.mllib.linalg.distributed.CoordinateMatrix(

415

val entries: RDD[MatrixEntry]

416

) extends DistributedMatrix {

417

def toRowMatrix(): RowMatrix

418

def toIndexedRowMatrix(): IndexedRowMatrix

419

def toBlockMatrix(): BlockMatrix

420

def transpose(): CoordinateMatrix

421

}

422

423

/**

424

* Block matrix for matrices partitioned into blocks

425

*/

426

class org.apache.spark.mllib.linalg.distributed.BlockMatrix(

427

val blocks: RDD[((Int, Int), org.apache.spark.mllib.linalg.Matrix)],

428

val rowsPerBlock: Int,

429

val colsPerBlock: Int

430

) extends DistributedMatrix {

431

def add(other: BlockMatrix): BlockMatrix

432

def subtract(other: BlockMatrix): BlockMatrix

433

def multiply(other: BlockMatrix): BlockMatrix

434

def transpose: BlockMatrix

435

def toLocalMatrix(): org.apache.spark.mllib.linalg.Matrix

436

def toIndexedRowMatrix(): IndexedRowMatrix

437

def toCoordinateMatrix(): CoordinateMatrix

438

}

439

```

440

441

### Statistical Summary

442

443

Statistical operations on vectors and matrices for data analysis.

444

445

```scala { .api }

446

/**

447

* Multivariate statistical summary

448

*/

449

trait MultivariateStatisticalSummary {

450

def mean: org.apache.spark.mllib.linalg.Vector

451

def variance: org.apache.spark.mllib.linalg.Vector

452

def count: Long

453

def numNonzeros: org.apache.spark.mllib.linalg.Vector

454

def max: org.apache.spark.mllib.linalg.Vector

455

def min: org.apache.spark.mllib.linalg.Vector

456

def normL1: org.apache.spark.mllib.linalg.Vector

457

def normL2: org.apache.spark.mllib.linalg.Vector

458

}

459

460

/**

461

* Online multivariate summarizer for streaming statistics

462

*/

463

class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary {

464

def add(sample: org.apache.spark.mllib.linalg.Vector): this.type

465

def add(sample: org.apache.spark.mllib.linalg.Vector, weight: Double): this.type

466

def merge(other: MultivariateOnlineSummarizer): this.type

467

}

468

```

469

470

## Types

471

472

```scala { .api }

473

// Core linear algebra imports

474

import org.apache.spark.ml.linalg._

475

476

// Vector types

477

import org.apache.spark.ml.linalg.{Vector, DenseVector, SparseVector, Vectors}

478

479

// Matrix types

480

import org.apache.spark.ml.linalg.{Matrix, DenseMatrix, SparseMatrix, Matrices}

481

482

// BLAS and LAPACK operations

483

import org.apache.spark.ml.linalg.{BLAS, LAPACK}

484

485

// Legacy distributed linear algebra (from mllib)

486

import org.apache.spark.mllib.linalg.distributed._

487

import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, MultivariateOnlineSummarizer}

488

489

// Conversion utilities

490

import org.apache.spark.mllib.linalg.{Vector => OldVector, Matrix => OldMatrix}

491

import breeze.linalg.{Vector => BreezeVector, Matrix => BreezeMatrix}

492

493

// Supporting types

494

case class IndexedRow(index: Long, vector: org.apache.spark.mllib.linalg.Vector)

495

case class MatrixEntry(i: Long, j: Long, value: Double)

496

case class SingularValueDecomposition[RowType, MatrixType](

497

U: RowType,

498

s: org.apache.spark.mllib.linalg.Vector,

499

V: MatrixType

500

)

501

```