0
# Matrix Operations
1
2
Matrix data structures and operations for linear algebra computations in machine learning applications. Provides both dense and sparse implementations with format conversion capabilities and optimized operations.
3
4
## Capabilities
5
6
### Matrix Creation
7
8
Factory methods for creating different types of matrices.
9
10
```scala { .api }
11
object Matrices {
12
/**
13
* Creates a dense matrix in column-major format
14
* @param numRows Number of rows
15
* @param numCols Number of columns
16
* @param values Matrix entries in column major order
17
* @return Dense matrix instance
18
*/
19
def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix
20
21
/**
22
* Creates a sparse matrix in Compressed Sparse Column (CSC) format
23
* @param numRows Number of rows
24
* @param numCols Number of columns
25
* @param colPtrs Column pointers (length numCols + 1)
26
* @param rowIndices Row indices of non-zero entries
27
* @param values Non-zero matrix entries
28
* @return Sparse matrix instance
29
*/
30
def sparse(
31
numRows: Int,
32
numCols: Int,
33
colPtrs: Array[Int],
34
rowIndices: Array[Int],
35
values: Array[Double]
36
): Matrix
37
38
/**
39
* Creates a zero matrix
40
* @param numRows Number of rows
41
* @param numCols Number of columns
42
* @return Dense zero matrix
43
*/
44
def zeros(numRows: Int, numCols: Int): Matrix
45
46
/**
47
* Creates a matrix of ones
48
* @param numRows Number of rows
49
* @param numCols Number of columns
50
* @return Dense matrix filled with ones
51
*/
52
def ones(numRows: Int, numCols: Int): Matrix
53
54
/**
55
* Creates a dense identity matrix
56
* @param n Size (n x n)
57
* @return Dense identity matrix
58
*/
59
def eye(n: Int): Matrix
60
61
/**
62
* Creates a sparse identity matrix
63
* @param n Size (n x n)
64
* @return Sparse identity matrix
65
*/
66
def speye(n: Int): Matrix
67
68
/**
69
* Creates a diagonal matrix from vector
70
* @param vector Values for the diagonal
71
* @return Square matrix with vector values on diagonal
72
*/
73
def diag(vector: Vector): Matrix
74
75
/**
76
* Creates a matrix from sequence of vectors (as rows)
77
* @param vectors Sequence of vectors to use as matrix rows
78
* @return Matrix with vectors as rows
79
*/
80
private[ml] def fromVectors(vectors: Seq[Vector]): Matrix
81
82
/**
83
* Creates a uniform random matrix
84
* @param numRows Number of rows
85
* @param numCols Number of columns
86
* @param rng Random number generator
87
* @return Matrix with uniform random values in [0, 1)
88
*/
89
def rand(numRows: Int, numCols: Int, rng: java.util.Random): Matrix
90
91
/**
92
* Creates a Gaussian random matrix
93
* @param numRows Number of rows
94
* @param numCols Number of columns
95
* @param rng Random number generator
96
* @return Matrix with N(0,1) random values
97
*/
98
def randn(numRows: Int, numCols: Int, rng: java.util.Random): Matrix
99
100
/**
101
* Creates a sparse uniform random matrix
102
* @param numRows Number of rows
103
* @param numCols Number of columns
104
* @param density Fraction of non-zero elements (0.0 to 1.0)
105
* @param rng Random number generator
106
* @return Sparse matrix with uniform random values
107
*/
108
def sprand(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): Matrix
109
110
/**
111
* Creates a sparse Gaussian random matrix
112
* @param numRows Number of rows
113
* @param numCols Number of columns
114
* @param density Fraction of non-zero elements (0.0 to 1.0)
115
* @param rng Random number generator
116
* @return Sparse matrix with N(0,1) random values
117
*/
118
def sprandn(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): Matrix
119
}
120
```
121
122
**Usage Examples:**
123
124
```scala
125
import org.apache.spark.ml.linalg._
126
import java.util.Random
127
128
// Dense matrices
129
val dense = Matrices.dense(3, 2, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
130
val zeros = Matrices.zeros(5, 5)
131
val ones = Matrices.ones(3, 3)
132
133
// Identity matrices
134
val denseId = Matrices.eye(4)
135
val sparseId = Matrices.speye(4)
136
137
// Sparse matrix (CSC format)
138
val sparse = Matrices.sparse(3, 3,
139
Array(0, 1, 2, 3), // colPtrs
140
Array(0, 1, 2), // rowIndices
141
Array(1.0, 2.0, 3.0) // values
142
)
143
144
// Diagonal matrix
145
val diagonal = Matrices.diag(Vectors.dense(1.0, 2.0, 3.0))
146
```
147
148
### Matrix Operations
149
150
Core operations available on all matrix types.
151
152
```scala { .api }
153
sealed trait Matrix extends Serializable {
154
/**
155
* Number of rows
156
* @return Row count
157
*/
158
def numRows: Int
159
160
/**
161
* Number of columns
162
* @return Column count
163
*/
164
def numCols: Int
165
166
/**
167
* Whether matrix is transposed
168
* @return True if matrix is in transposed form
169
*/
170
def isTransposed: Boolean
171
172
/**
173
* Convert to array in column major order
174
* @return Array containing all matrix values
175
*/
176
def toArray: Array[Double]
177
178
/**
179
* Get element at position (i, j)
180
* @param i Row index (0-based)
181
* @param j Column index (0-based)
182
* @return Value at position (i, j)
183
*/
184
def apply(i: Int, j: Int): Double
185
186
/**
187
* Get deep copy of matrix
188
* @return New matrix instance with copied values
189
*/
190
def copy: Matrix
191
192
/**
193
* Transpose the matrix (returns new instance)
194
* @return Transposed matrix sharing underlying data
195
*/
196
def transpose: Matrix
197
198
/**
199
* Matrix-vector multiplication
200
* @param y Vector to multiply (size must match numCols)
201
* @return Result vector of size numRows
202
*/
203
def multiply(y: Vector): DenseVector
204
205
/**
206
* Matrix-matrix multiplication
207
* @param y Matrix to multiply (numRows must match this.numCols)
208
* @return Result matrix of size (numRows x y.numCols)
209
*/
210
def multiply(y: DenseMatrix): DenseMatrix
211
212
/**
213
* Number of non-zero values
214
* @return Count of non-zero elements
215
*/
216
def numNonzeros: Int
217
218
/**
219
* Number of stored values
220
* @return Count of explicitly stored elements
221
*/
222
def numActives: Int
223
224
/**
225
* Iterator over column vectors
226
* @return Iterator of Vector instances (one per column)
227
*/
228
def colIter: Iterator[Vector]
229
230
/**
231
* Iterator over row vectors
232
* @return Iterator of Vector instances (one per row)
233
*/
234
def rowIter: Iterator[Vector]
235
236
/**
237
* Apply function to all active elements
238
* @param f Function taking (row, col, value) parameters
239
*/
240
def foreachActive(f: (Int, Int, Double) => Unit): Unit
241
242
/**
243
* Map values using function (creates new matrix)
244
* @param f Function to apply to each value
245
* @return New matrix with transformed values
246
*/
247
private[spark] def map(f: Double => Double): Matrix
248
249
/**
250
* Update values in-place using function
251
* @param f Function to apply to each value
252
* @return This matrix with updated values
253
*/
254
private[ml] def update(f: Double => Double): Matrix
255
256
/**
257
* Update element at position (i, j)
258
* @param i Row index
259
* @param j Column index
260
* @param v New value
261
*/
262
private[ml] def update(i: Int, j: Int, v: Double): Unit
263
264
/**
265
* Get index for element (i, j) in backing array
266
* @param i Row index
267
* @param j Column index
268
* @return Array index for element
269
*/
270
private[ml] def index(i: Int, j: Int): Int
271
}
272
```
273
274
### Format Conversion
275
276
Methods for converting between dense and sparse representations.
277
278
```scala { .api }
279
sealed trait Matrix extends Serializable {
280
/**
281
* Convert to sparse column major format
282
* @return Sparse matrix in CSC format
283
*/
284
def toSparseColMajor: SparseMatrix
285
286
/**
287
* Convert to sparse row major format
288
* @return Sparse matrix in CSR format
289
*/
290
def toSparseRowMajor: SparseMatrix
291
292
/**
293
* Convert to sparse maintaining current layout
294
* @return Sparse matrix preserving row/column major order
295
*/
296
def toSparse: SparseMatrix
297
298
/**
299
* Convert to dense column major format
300
* @return Dense matrix in column major order
301
*/
302
def toDenseColMajor: DenseMatrix
303
304
/**
305
* Convert to dense row major format
306
* @return Dense matrix in row major order
307
*/
308
def toDenseRowMajor: DenseMatrix
309
310
/**
311
* Convert to dense maintaining current layout
312
* @return Dense matrix preserving row/column major order
313
*/
314
def toDense: DenseMatrix
315
316
/**
317
* Get matrix in most compact column major format
318
* @return Matrix in dense or sparse format using less storage
319
*/
320
def compressedColMajor: Matrix
321
322
/**
323
* Get matrix in most compact row major format
324
* @return Matrix in dense or sparse format using less storage
325
*/
326
def compressedRowMajor: Matrix
327
328
/**
329
* Get matrix in optimal storage format
330
* @return Matrix in format using least memory (dense or sparse)
331
*/
332
def compressed: Matrix
333
}
334
```
335
336
**Usage Examples:**
337
338
```scala
339
import org.apache.spark.ml.linalg._
340
341
val matrix = Matrices.dense(3, 3, Array(1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0))
342
343
// Format conversions
344
val sparse = matrix.toSparse
345
val dense = sparse.toDense
346
val compressed = matrix.compressed // Automatically chooses optimal format
347
348
// Matrix operations
349
val vector = Vectors.dense(1.0, 2.0, 3.0)
350
val result = matrix.multiply(vector)
351
352
val other = Matrices.dense(3, 2, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
353
val product = matrix.multiply(other.asInstanceOf[DenseMatrix])
354
355
// Iteration
356
matrix.foreachActive { (i, j, v) =>
357
if (v != 0.0) println(s"($i, $j) = $v")
358
}
359
```
360
361
### Dense Matrices
362
363
Dense matrix implementation storing all elements in column-major array.
364
365
```scala { .api }
366
class DenseMatrix(
367
val numRows: Int,
368
val numCols: Int,
369
val values: Array[Double],
370
override val isTransposed: Boolean = false
371
) extends Matrix {
372
/**
373
* Create column-major dense matrix
374
* @param numRows Number of rows
375
* @param numCols Number of columns
376
* @param values Matrix entries in column major order
377
*/
378
def this(numRows: Int, numCols: Int, values: Array[Double])
379
}
380
381
object DenseMatrix {
382
/**
383
* Create zero matrix
384
* @param numRows Number of rows
385
* @param numCols Number of columns
386
* @return Dense matrix filled with zeros
387
*/
388
def zeros(numRows: Int, numCols: Int): DenseMatrix
389
390
/**
391
* Create matrix of ones
392
* @param numRows Number of rows
393
* @param numCols Number of columns
394
* @return Dense matrix filled with ones
395
*/
396
def ones(numRows: Int, numCols: Int): DenseMatrix
397
398
/**
399
* Create identity matrix
400
* @param n Size (n x n)
401
* @return Dense identity matrix
402
*/
403
def eye(n: Int): DenseMatrix
404
405
/**
406
* Create uniform random matrix
407
* @param numRows Number of rows
408
* @param numCols Number of columns
409
* @param rng Random number generator
410
* @return Dense matrix with values in [0, 1)
411
*/
412
def rand(numRows: Int, numCols: Int, rng: java.util.Random): DenseMatrix
413
414
/**
415
* Create Gaussian random matrix
416
* @param numRows Number of rows
417
* @param numCols Number of columns
418
* @param rng Random number generator
419
* @return Dense matrix with N(0,1) values
420
*/
421
def randn(numRows: Int, numCols: Int, rng: java.util.Random): DenseMatrix
422
423
/**
424
* Create diagonal matrix
425
* @param vector Diagonal values
426
* @return Square dense matrix with vector on diagonal
427
*/
428
def diag(vector: Vector): DenseMatrix
429
430
/**
431
* Extract components from dense matrix (for pattern matching)
432
* @param dm Dense matrix instance
433
* @return Some((numRows, numCols, values, isTransposed)) or None
434
*/
435
private[ml] def unapply(dm: DenseMatrix): Option[(Int, Int, Array[Double], Boolean)]
436
437
/**
438
* Create dense matrix from sequence of vectors (as rows)
439
* @param vectors Sequence of vectors to use as matrix rows
440
* @return Dense matrix with vectors as rows
441
*/
442
private[ml] def fromVectors(vectors: Seq[Vector]): DenseMatrix
443
}
444
```
445
446
### Sparse Matrices
447
448
Sparse matrix implementation using Compressed Sparse Column (CSC) format.
449
450
```scala { .api }
451
class SparseMatrix(
452
val numRows: Int,
453
val numCols: Int,
454
val colPtrs: Array[Int],
455
val rowIndices: Array[Int],
456
val values: Array[Double],
457
override val isTransposed: Boolean = false
458
) extends Matrix {
459
/**
460
* Create column-major sparse matrix in CSC format
461
* @param numRows Number of rows
462
* @param numCols Number of columns
463
* @param colPtrs Column pointers (length numCols + 1)
464
* @param rowIndices Row indices of non-zero entries
465
* @param values Non-zero matrix entries
466
*/
467
def this(
468
numRows: Int,
469
numCols: Int,
470
colPtrs: Array[Int],
471
rowIndices: Array[Int],
472
values: Array[Double]
473
)
474
}
475
476
object SparseMatrix {
477
/**
478
* Create sparse matrix from coordinate (COO) format
479
* @param numRows Number of rows
480
* @param numCols Number of columns
481
* @param entries Iterable of (row, col, value) tuples
482
* @return Sparse matrix in CSC format
483
*/
484
def fromCOO(
485
numRows: Int,
486
numCols: Int,
487
entries: Iterable[(Int, Int, Double)]
488
): SparseMatrix
489
490
/**
491
* Create sparse identity matrix
492
* @param n Size (n x n)
493
* @return Sparse identity matrix
494
*/
495
def speye(n: Int): SparseMatrix
496
497
/**
498
* Create sparse uniform random matrix
499
* @param numRows Number of rows
500
* @param numCols Number of columns
501
* @param density Fraction of non-zero elements (0.0 to 1.0)
502
* @param rng Random number generator
503
* @return Sparse matrix with uniform random values
504
*/
505
def sprand(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): SparseMatrix
506
507
/**
508
* Create sparse Gaussian random matrix
509
* @param numRows Number of rows
510
* @param numCols Number of columns
511
* @param density Fraction of non-zero elements (0.0 to 1.0)
512
* @param rng Random number generator
513
* @return Sparse matrix with N(0,1) values
514
*/
515
def sprandn(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): SparseMatrix
516
517
/**
518
* Create sparse diagonal matrix
519
* @param vector Diagonal values
520
* @return Square sparse matrix with vector on diagonal
521
*/
522
def spdiag(vector: Vector): SparseMatrix
523
524
/**
525
* Extract components from sparse matrix (for pattern matching)
526
* @param sm Sparse matrix instance
527
* @return Some((numRows, numCols, colPtrs, rowIndices, values, isTransposed)) or None
528
*/
529
private[ml] def unapply(sm: SparseMatrix): Option[(Int, Int, Array[Int], Array[Int], Array[Double], Boolean)]
530
531
/**
532
* Create sparse matrix from sequence of vectors (as rows)
533
* @param vectors Sequence of vectors to use as matrix rows
534
* @return Sparse matrix with vectors as rows in CSR format
535
*/
536
private[ml] def fromVectors(vectors: Seq[Vector]): SparseMatrix
537
}
538
```
539
540
### Matrix Concatenation
541
542
Utility functions for combining matrices.
543
544
```scala { .api }
545
object Matrices {
546
/**
547
* Horizontally concatenate matrices
548
* @param matrices Array of matrices (same number of rows)
549
* @return Single matrix with matrices side-by-side
550
*/
551
def horzcat(matrices: Array[Matrix]): Matrix
552
553
/**
554
* Vertically concatenate matrices
555
* @param matrices Array of matrices (same number of columns)
556
* @return Single matrix with matrices stacked vertically
557
*/
558
def vertcat(matrices: Array[Matrix]): Matrix
559
}
560
```
561
562
**Usage Examples:**
563
564
```scala
565
import org.apache.spark.ml.linalg._
566
567
val m1 = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0))
568
val m2 = Matrices.dense(2, 2, Array(5.0, 6.0, 7.0, 8.0))
569
570
// Horizontal concatenation: [m1 m2]
571
val hcat = Matrices.horzcat(Array(m1, m2)) // 2x4 matrix
572
573
// Vertical concatenation: [m1; m2]
574
val vcat = Matrices.vertcat(Array(m1, m2)) // 4x2 matrix
575
```
576
577
## Type Hierarchy
578
579
```
580
Matrix (sealed trait)
581
├── DenseMatrix (class)
582
└── SparseMatrix (class)
583
```
584
585
## Storage Formats
586
587
- **Dense matrices**: Column-major storage (Fortran-style) for BLAS compatibility
588
- **Sparse matrices**: Compressed Sparse Column (CSC) format by default
589
- **Transposed matrices**: Logical transposition without data copying
590
- **Format conversion**: Automatic optimization between dense and sparse representations
591
592
## Performance Notes
593
594
- **Dense matrices**: Optimal for small to medium matrices or when most elements are non-zero
595
- **Sparse matrices**: Optimal for large matrices with many zero elements (typically < 34% density)
596
- **Native acceleration**: Dense operations use optimized BLAS when available
597
- **Memory efficiency**: Use `compressed` method to automatically choose optimal format