Tessl Tile for maven/org.apache.spark/spark-mllib_2.13@4.0.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

classification.md clustering.md evaluation-tuning.md feature-engineering.md index.md linear-algebra.md pipeline-components.md recommendation.md regression.md

linear-algebra.mddocs/

0
# Linear Algebra
1

2
Distributed linear algebra operations and data structures optimized for large-scale numerical computations across cluster nodes.
3

4
## Capabilities
5

6
### Vector Operations
7

8
Core vector data structures and operations for representing feature vectors and model parameters.
9

10
```scala { .api }
11
/**
12
 * Abstract base class for vectors
13
 */
14
abstract class Vector extends Serializable {
15
  def size: Int
16
  def apply(i: Int): Double
17
  def copy: Vector
18
  def foreachActive(f: (Int, Double) => Unit): Unit
19
  def numActives: Int
20
  def numNonzeros: Int
21
  def toArray: Array[Double]
22
  def toSparse: SparseVector
23
  def toDense: DenseVector
24
  def compressed: Vector
25
  def argmax: Int
26
  def dot(v: Vector): Double
27
  def equals(other: Any): Boolean
28
  def hashCode(): Int
29
  def toString: String
30
}
31

32
/**
33
 * Dense vector implementation storing all values
34
 */
35
class DenseVector(val values: Array[Double]) extends Vector {
36
  def size: Int = values.length
37
  def apply(i: Int): Double = values(i)
38
  def copy: DenseVector = new DenseVector(values.clone())
39
  def update(i: Int, value: Double): Unit = values(i) = value
40
  def dot(other: Vector): Double
41
  def norm(p: Double): Double
42
}
43

44
/**
45
 * Sparse vector implementation storing only non-zero values
46
 */
47
class SparseVector(
48
  override val size: Int,
49
  val indices: Array[Int],
50
  val values: Array[Double]
51
) extends Vector {
52
  def apply(i: Int): Double = {
53
    val idx = java.util.Arrays.binarySearch(indices, i)
54
    if (idx >= 0) values(idx) else 0.0
55
  }
56
  def copy: SparseVector = new SparseVector(size, indices.clone(), values.clone())
57
  def dot(other: Vector): Double
58
  def norm(p: Double): Double
59
}
60

61
/**
62
 * Vector factory methods and utilities
63
 */
64
object Vectors {
65
  def dense(firstValue: Double, otherValues: Double*): DenseVector
66
  def dense(values: Array[Double]): DenseVector
67
  def sparse(size: Int, elements: Seq[(Int, Double)]): SparseVector
68
  def sparse(size: Int, indices: Array[Int], values: Array[Double]): SparseVector
69
  def zeros(size: Int): DenseVector
70
  def norm(vector: Vector, p: Double): Double
71
  def sqdist(v1: Vector, v2: Vector): Double
72
  def fromML(v: org.apache.spark.mllib.linalg.Vector): Vector
73
  def fromBreeze(bv: breeze.linalg.Vector[Double]): Vector
74
}
75
```
76

77
**Usage Example:**
78

79
```scala
80
import org.apache.spark.ml.linalg.{Vector, Vectors}
81

82
// Create dense vector
83
val denseVec = Vectors.dense(1.0, 2.0, 3.0, 4.0)
84
println(s"Dense vector: $denseVec")
85

86
// Create sparse vector
87
val sparseVec = Vectors.sparse(10, Array(0, 2, 9), Array(1.0, 3.0, 5.0))
88
println(s"Sparse vector: $sparseVec")
89

90
// Vector operations
91
val norm = Vectors.norm(denseVec, 2.0)
92
println(s"L2 norm: $norm")
93

94
val distance = Vectors.sqdist(denseVec, sparseVec.toDense)
95
println(s"Squared distance: $distance")
96
```
97

98
### Matrix Operations
99

100
Matrix data structures and operations for representing datasets and model parameters.
101

102
```scala { .api }
103
/**
104
 * Abstract base class for matrices
105
 */
106
abstract class Matrix extends Serializable {
107
  def numRows: Int
108
  def numCols: Int
109
  def apply(i: Int, j: Int): Double
110
  def copy: Matrix
111
  def foreachActive(f: (Int, Int, Double) => Unit): Unit
112
  def numActives: Int
113
  def numNonzeros: Int
114
  def toArray: Array[Double]
115
  def isTransposed: Boolean
116
  def asML: org.apache.spark.mllib.linalg.Matrix
117
  def toSparse: SparseMatrix
118
  def toDense: DenseMatrix
119
  def transpose: Matrix
120
  def multiply(y: DenseVector): DenseVector
121
  def multiply(y: DenseMatrix): DenseMatrix
122
  def equals(other: Any): Boolean
123
  def hashCode(): Int
124
  def toString: String
125
}
126

127
/**
128
 * Dense matrix implementation storing all values in column-major order
129
 */
130
class DenseMatrix(
131
  val numRows: Int,
132
  val numCols: Int,
133
  val values: Array[Double],
134
  val isTransposed: Boolean = false
135
) extends Matrix {
136
  def apply(i: Int, j: Int): Double = {
137
    if (isTransposed) values(j * numRows + i)
138
    else values(i + j * numRows)
139
  }
140
  def copy: DenseMatrix = new DenseMatrix(numRows, numCols, values.clone(), isTransposed)
141
  def update(i: Int, j: Int, value: Double): Unit = {
142
    if (isTransposed) values(j * numRows + i) = value
143
    else values(i + j * numRows) = value
144
  }
145
}
146

147
/**
148
 * Sparse matrix implementation storing only non-zero values in compressed sparse column format
149
 */
150
class SparseMatrix(
151
  val numRows: Int, 
152
  val numCols: Int,
153
  val colPtrs: Array[Int],
154
  val rowIndices: Array[Int],
155
  val values: Array[Double],
156
  val isTransposed: Boolean = false
157
) extends Matrix {
158
  def apply(i: Int, j: Int): Double = {
159
    val startIdx = colPtrs(j)
160
    val endIdx = colPtrs(j + 1)
161
    val idx = java.util.Arrays.binarySearch(rowIndices, startIdx, endIdx, i)
162
    if (idx >= 0) values(idx) else 0.0
163
  }
164
  def copy: SparseMatrix = new SparseMatrix(
165
    numRows, numCols, colPtrs.clone(), rowIndices.clone(), values.clone(), isTransposed
166
  )
167
}
168

169
/**
170
 * Matrix factory methods and utilities
171
 */
172
object Matrices {
173
  def dense(numRows: Int, numCols: Int, values: Array[Double]): DenseMatrix
174
  def sparse(numRows: Int, numCols: Int, entries: Seq[(Int, Int, Double)]): SparseMatrix
175
  def sparse(
176
    numRows: Int,
177
    numCols: Int, 
178
    colPtrs: Array[Int],
179
    rowIndices: Array[Int],
180
    values: Array[Double]
181
  ): SparseMatrix
182
  def eye(n: Int): DenseMatrix
183
  def zeros(numRows: Int, numCols: Int): DenseMatrix
184
  def ones(numRows: Int, numCols: Int): DenseMatrix
185
  def diag(vector: Vector): DenseMatrix
186
  def rand(numRows: Int, numCols: Int, rng: Random): DenseMatrix
187
  def randn(numRows: Int, numCols: Int, rng: Random): DenseMatrix
188
  def horzcat(matrices: Array[Matrix]): Matrix
189
  def vertcat(matrices: Array[Matrix]): Matrix
190
  def fromML(m: org.apache.spark.mllib.linalg.Matrix): Matrix
191
  def fromBreeze(bm: breeze.linalg.Matrix[Double]): Matrix
192
}
193
```
194

195
**Usage Example:**
196

197
```scala
198
import org.apache.spark.ml.linalg.{Matrix, Matrices, Vectors}
199

200
// Create dense matrix
201
val denseMatrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))
202
println(s"Dense matrix:\n$denseMatrix")
203

204
// Create sparse matrix  
205
val sparseMatrix = Matrices.sparse(3, 2, Seq((0, 0, 9.0), (2, 1, 6.0)))
206
println(s"Sparse matrix:\n$sparseMatrix")
207

208
// Matrix operations
209
val identity = Matrices.eye(3)
210
val vector = Vectors.dense(1.0, 2.0, 3.0)
211
val result = identity.multiply(vector.toDense)
212
println(s"Matrix-vector multiplication: $result")
213
```
214

215
### Advanced Linear Algebra Operations
216

217
Extended operations for complex numerical computations and transformations.
218

219
```scala { .api }
220
/**
221
 * BLAS (Basic Linear Algebra Subprograms) operations
222
 */
223
object BLAS {
224
  /**
225
   * Vector dot product: x^T * y
226
   */
227
  def dot(x: Vector, y: Vector): Double
228
  
229
  /**
230
   * Vector L2 norm: ||x||_2
231
   */
232
  def nrm2(x: Vector): Double
233
  
234
  /**
235
   * Scalar-vector multiplication: a * x
236
   */
237
  def scal(a: Double, x: Vector): Unit
238
  
239
  /**
240
   * Vector addition: y := a * x + y
241
   */
242
  def axpy(a: Double, x: Vector, y: Vector): Unit
243
  
244
  /**
245
   * Matrix-vector multiplication: y := alpha * A * x + beta * y
246
   */
247
  def gemv(
248
    alpha: Double, A: Matrix, x: Vector, beta: Double, y: Vector
249
  ): Unit
250
  
251
  /**
252
   * Matrix-matrix multiplication: C := alpha * A * B + beta * C
253
   */
254
  def gemm(
255
    alpha: Double, A: Matrix, B: Matrix, beta: Double, C: Matrix
256
  ): Unit
257
  
258
  /**
259
   * Symmetric matrix-vector multiplication
260
   */
261
  def symv(
262
    alpha: Double, A: Matrix, x: Vector, beta: Double, y: Vector
263
  ): Unit
264
  
265
  /**
266
   * Rank-1 update: A := alpha * x * y^T + A
267
   */
268
  def ger(alpha: Double, x: Vector, y: Vector, A: Matrix): Unit
269
  
270
  /**
271
   * Symmetric rank-1 update: A := alpha * x * x^T + A
272
   */
273
  def syr(alpha: Double, x: Vector, A: Matrix): Unit
274
}
275

276
/**
277
 * LAPACK (Linear Algebra Package) operations
278
 */
279
object LAPACK {
280
  /**
281
   * Cholesky decomposition
282
   */
283
  def potrf(A: DenseMatrix): Int
284
  
285
  /**
286
   * Solve linear system using Cholesky decomposition
287
   */
288
  def potrs(A: DenseMatrix, B: DenseMatrix): Int
289
  
290
  /**
291
   * QR decomposition
292
   */
293
  def geqrf(A: DenseMatrix, tau: Array[Double]): Int
294
  
295
  /**
296
   * Singular Value Decomposition
297
   */
298
  def gesvd(
299
    A: DenseMatrix,
300
    U: DenseMatrix,
301
    s: Array[Double],
302
    Vt: DenseMatrix
303
  ): Int
304
  
305
  /**
306
   * Eigenvalue decomposition
307
   */
308
  def syev(
309
    A: DenseMatrix,
310
    w: Array[Double]
311
  ): Int
312
}
313
```
314

315
### Vector and Matrix Conversions
316

317
Utilities for converting between different vector and matrix representations.
318

319
```scala { .api }
320
/**
321
 * Conversion utilities between MLlib and ML linear algebra types
322
 */
323
object LinearAlgebraUtils {
324
  /**
325
   * Convert ML vector to MLlib vector
326
   */
327
  def toMLlib(v: org.apache.spark.ml.linalg.Vector): org.apache.spark.mllib.linalg.Vector
328
  
329
  /**
330
   * Convert MLlib vector to ML vector
331
   */
332
  def fromMLlib(v: org.apache.spark.mllib.linalg.Vector): org.apache.spark.ml.linalg.Vector
333
  
334
  /**
335
   * Convert ML matrix to MLlib matrix
336
   */
337
  def toMLlib(m: org.apache.spark.ml.linalg.Matrix): org.apache.spark.mllib.linalg.Matrix
338
  
339
  /**
340
   * Convert MLlib matrix to ML matrix
341
   */
342
  def fromMLlib(m: org.apache.spark.mllib.linalg.Matrix): org.apache.spark.ml.linalg.Matrix
343
  
344
  /**
345
   * Convert Breeze vector to ML vector
346
   */
347
  def fromBreeze(bv: breeze.linalg.Vector[Double]): Vector
348
  
349
  /**
350
   * Convert ML vector to Breeze vector
351
   */
352
  def toBreeze(v: Vector): breeze.linalg.Vector[Double]
353
  
354
  /**
355
   * Convert Breeze matrix to ML matrix
356
   */
357
  def fromBreeze(bm: breeze.linalg.Matrix[Double]): Matrix
358
  
359
  /**
360
   * Convert ML matrix to Breeze matrix
361
   */
362
  def toBreeze(m: Matrix): breeze.linalg.Matrix[Double]
363
}
364
```
365

366
### Distributed Linear Algebra (Legacy MLlib)
367

368
Large-scale distributed matrix operations from the legacy RDD-based API.
369

370
```scala { .api }
371
/**
372
 * Base class for distributed matrices
373
 */
374
abstract class org.apache.spark.mllib.linalg.distributed.DistributedMatrix {
375
  def numRows(): Long
376
  def numCols(): Long
377
}
378

379
/**
380
 * Row-oriented distributed matrix
381
 */
382
class org.apache.spark.mllib.linalg.distributed.RowMatrix(
383
  val rows: RDD[org.apache.spark.mllib.linalg.Vector]
384
) extends DistributedMatrix {
385
  def computeColumnSummaryStatistics(): MultivariateStatisticalSummary
386
  def computeCovariance(): org.apache.spark.mllib.linalg.Matrix
387
  def computeGramianMatrix(): org.apache.spark.mllib.linalg.Matrix
388
  def computePrincipalComponents(k: Int): org.apache.spark.mllib.linalg.Matrix
389
  def computeSVD(
390
    k: Int,
391
    computeU: Boolean = false,
392
    rCond: Double = 1e-9
393
  ): SingularValueDecomposition[RowMatrix, org.apache.spark.mllib.linalg.Matrix]
394
  def multiply(B: org.apache.spark.mllib.linalg.Matrix): RowMatrix
395
  def columnSimilarities(): CoordinateMatrix
396
}
397

398
/**
399
 * Indexed row matrix for matrices with meaningful row indices
400
 */
401
class org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix(
402
  val rows: RDD[IndexedRow]
403
) extends DistributedMatrix {
404
  def toRowMatrix(): RowMatrix
405
  def toCoordinateMatrix(): CoordinateMatrix
406
  def toBlockMatrix(): BlockMatrix
407
  def multiply(B: org.apache.spark.mllib.linalg.Matrix): IndexedRowMatrix
408
  def computeGramianMatrix(): org.apache.spark.mllib.linalg.Matrix
409
}
410

411
/**
412
 * Coordinate matrix for matrices stored as (row, col, value) triplets
413
 */
414
class org.apache.spark.mllib.linalg.distributed.CoordinateMatrix(
415
  val entries: RDD[MatrixEntry]
416
) extends DistributedMatrix {
417
  def toRowMatrix(): RowMatrix
418
  def toIndexedRowMatrix(): IndexedRowMatrix
419
  def toBlockMatrix(): BlockMatrix
420
  def transpose(): CoordinateMatrix
421
}
422

423
/**
424
 * Block matrix for matrices partitioned into blocks
425
 */
426
class org.apache.spark.mllib.linalg.distributed.BlockMatrix(
427
  val blocks: RDD[((Int, Int), org.apache.spark.mllib.linalg.Matrix)],
428
  val rowsPerBlock: Int,
429
  val colsPerBlock: Int
430
) extends DistributedMatrix {
431
  def add(other: BlockMatrix): BlockMatrix
432
  def subtract(other: BlockMatrix): BlockMatrix
433
  def multiply(other: BlockMatrix): BlockMatrix
434
  def transpose: BlockMatrix
435
  def toLocalMatrix(): org.apache.spark.mllib.linalg.Matrix
436
  def toIndexedRowMatrix(): IndexedRowMatrix
437
  def toCoordinateMatrix(): CoordinateMatrix
438
}
439
```
440

441
### Statistical Summary
442

443
Statistical operations on vectors and matrices for data analysis.
444

445
```scala { .api }
446
/**
447
 * Multivariate statistical summary
448
 */
449
trait MultivariateStatisticalSummary {
450
  def mean: org.apache.spark.mllib.linalg.Vector
451
  def variance: org.apache.spark.mllib.linalg.Vector
452
  def count: Long
453
  def numNonzeros: org.apache.spark.mllib.linalg.Vector
454
  def max: org.apache.spark.mllib.linalg.Vector
455
  def min: org.apache.spark.mllib.linalg.Vector
456
  def normL1: org.apache.spark.mllib.linalg.Vector
457
  def normL2: org.apache.spark.mllib.linalg.Vector
458
}
459

460
/**
461
 * Online multivariate summarizer for streaming statistics
462
 */
463
class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary {
464
  def add(sample: org.apache.spark.mllib.linalg.Vector): this.type
465
  def add(sample: org.apache.spark.mllib.linalg.Vector, weight: Double): this.type
466
  def merge(other: MultivariateOnlineSummarizer): this.type
467
}
468
```
469

470
## Types
471

472
```scala { .api }
473
// Core linear algebra imports
474
import org.apache.spark.ml.linalg._
475

476
// Vector types
477
import org.apache.spark.ml.linalg.{Vector, DenseVector, SparseVector, Vectors}
478

479
// Matrix types
480
import org.apache.spark.ml.linalg.{Matrix, DenseMatrix, SparseMatrix, Matrices}
481

482
// BLAS and LAPACK operations
483
import org.apache.spark.ml.linalg.{BLAS, LAPACK}
484

485
// Legacy distributed linear algebra (from mllib)
486
import org.apache.spark.mllib.linalg.distributed._
487
import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, MultivariateOnlineSummarizer}
488

489
// Conversion utilities
490
import org.apache.spark.mllib.linalg.{Vector => OldVector, Matrix => OldMatrix}
491
import breeze.linalg.{Vector => BreezeVector, Matrix => BreezeMatrix}
492

493
// Supporting types
494
case class IndexedRow(index: Long, vector: org.apache.spark.mllib.linalg.Vector)
495
case class MatrixEntry(i: Long, j: Long, value: Double)
496
case class SingularValueDecomposition[RowType, MatrixType](
497
  U: RowType,
498
  s: org.apache.spark.mllib.linalg.Vector,
499
  V: MatrixType
500
)
501
```

Version

Tile

Files

linear-algebra.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

linear-algebra.mddocs/