0
# Matrix Operations
1
2
Comprehensive matrix functionality supporting both dense and sparse matrices with efficient storage formats and mathematical operations. Essential for linear algebra computations in machine learning algorithms.
3
4
## Capabilities
5
6
### Matrix Creation
7
8
Create matrices using factory methods from the `Matrices` object.
9
10
```scala { .api }
11
/**
12
* Factory methods for creating Matrix instances
13
*/
14
object Matrices {
15
/** Creates a dense matrix in column-major order */
16
def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix
17
18
/** Creates a sparse matrix in Compressed Sparse Column (CSC) format */
19
def sparse(numRows: Int, numCols: Int, colPtrs: Array[Int], rowIndices: Array[Int], values: Array[Double]): Matrix
20
21
/** Creates a zero matrix */
22
def zeros(numRows: Int, numCols: Int): Matrix
23
24
/** Creates a matrix of ones */
25
def ones(numRows: Int, numCols: Int): Matrix
26
27
/** Creates a dense identity matrix */
28
def eye(n: Int): Matrix
29
30
/** Creates a sparse identity matrix */
31
def speye(n: Int): Matrix
32
33
/** Creates a random dense matrix with uniform distribution */
34
def rand(numRows: Int, numCols: Int, rng: java.util.Random): Matrix
35
36
/** Creates a random sparse matrix with uniform distribution */
37
def sprand(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): Matrix
38
39
/** Creates a random dense matrix with Gaussian distribution */
40
def randn(numRows: Int, numCols: Int, rng: java.util.Random): Matrix
41
42
/** Creates a random sparse matrix with Gaussian distribution */
43
def sprandn(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): Matrix
44
45
/** Creates a diagonal matrix from a vector */
46
def diag(vector: Vector): Matrix
47
48
/** Horizontally concatenates matrices */
49
def horzcat(matrices: Array[Matrix]): Matrix
50
51
/** Vertically concatenates matrices */
52
def vertcat(matrices: Array[Matrix]): Matrix
53
}
54
```
55
56
**Usage Examples:**
57
58
```scala
59
import org.apache.spark.ml.linalg.{Matrices, Vectors}
60
import java.util.Random
61
62
// Dense matrix (2x3, column-major: [1,2], [3,4], [5,6])
63
val dense = Matrices.dense(2, 3, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
64
65
// Sparse matrix in CSC format
66
val sparse = Matrices.sparse(3, 3, Array(0, 2, 3, 6), Array(0, 2, 1, 0, 1, 2), Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
67
68
// Special matrices
69
val zeros = Matrices.zeros(3, 3)
70
val ones = Matrices.ones(2, 4)
71
val identity = Matrices.eye(4)
72
val sparseIdentity = Matrices.speye(4)
73
74
// Random matrices
75
val rng = new Random(42)
76
val randomDense = Matrices.rand(3, 3, rng)
77
val randomSparse = Matrices.sprand(4, 4, 0.3, rng) // 30% density
78
val gaussianDense = Matrices.randn(2, 3, rng)
79
80
// Diagonal matrix
81
val vector = Vectors.dense(1.0, 2.0, 3.0)
82
val diagonal = Matrices.diag(vector)
83
84
// Matrix concatenation
85
val m1 = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0))
86
val m2 = Matrices.dense(2, 2, Array(5.0, 6.0, 7.0, 8.0))
87
val horizontal = Matrices.horzcat(Array(m1, m2)) // 2x4 matrix
88
val vertical = Matrices.vertcat(Array(m1, m2)) // 4x2 matrix
89
```
90
91
### Matrix Operations
92
93
Common operations available on all matrix types.
94
95
```scala { .api }
96
/**
97
* Base Matrix trait with common operations
98
*/
99
trait Matrix extends Serializable {
100
/** Number of rows */
101
def numRows: Int
102
103
/** Number of columns */
104
def numCols: Int
105
106
/** Flag indicating if matrix is transposed */
107
val isTransposed: Boolean
108
109
/** Converts matrix to column-major array */
110
def toArray: Array[Double]
111
112
/** Iterator over column vectors */
113
def colIter: Iterator[Vector]
114
115
/** Iterator over row vectors */
116
def rowIter: Iterator[Vector]
117
118
/** Gets element at position (i, j) */
119
def apply(i: Int, j: Int): Double
120
121
/** Creates a deep copy */
122
def copy: Matrix
123
124
/** Returns transposed matrix */
125
def transpose: Matrix
126
127
/** Matrix-matrix multiplication */
128
def multiply(y: DenseMatrix): DenseMatrix
129
130
/** Matrix-vector multiplication */
131
def multiply(y: Vector): DenseVector
132
133
/** String representation with size limits */
134
def toString(maxLines: Int, maxLineWidth: Int): String
135
136
/** Applies function to all active elements */
137
def foreachActive(f: (Int, Int, Double) => Unit): Unit
138
139
/** Number of non-zero elements */
140
def numNonzeros: Int
141
142
/** Number of stored elements */
143
def numActives: Int
144
145
/** Converts to sparse matrix in column-major order */
146
def toSparseColMajor: SparseMatrix
147
148
/** Converts to sparse matrix in row-major order */
149
def toSparseRowMajor: SparseMatrix
150
151
/** Converts to sparse matrix maintaining current layout */
152
def toSparse: SparseMatrix
153
154
/** Converts to dense matrix maintaining current layout */
155
def toDense: DenseMatrix
156
157
/** Converts to dense matrix in row-major order */
158
def toDenseRowMajor: DenseMatrix
159
160
/** Converts to dense matrix in column-major order */
161
def toDenseColMajor: DenseMatrix
162
163
/** Returns optimal format with column-major preference */
164
def compressedColMajor: Matrix
165
166
/** Returns optimal format with row-major preference */
167
def compressedRowMajor: Matrix
168
169
/** Returns optimal format (dense or sparse) */
170
def compressed: Matrix
171
}
172
```
173
174
**Usage Examples:**
175
176
```scala
177
import org.apache.spark.ml.linalg.{Matrices, Vectors}
178
179
val matrix = Matrices.dense(2, 3, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
180
181
// Basic properties
182
println(s"Rows: ${matrix.numRows}, Cols: ${matrix.numCols}") // Rows: 2, Cols: 3
183
println(s"Element (0,1): ${matrix(0, 1)}") // Element (0,1): 3.0
184
println(s"Non-zeros: ${matrix.numNonzeros}") // Non-zeros: 6
185
186
// Transposition
187
val transposed = matrix.transpose
188
println(s"Transposed size: ${transposed.numRows}x${transposed.numCols}") // 3x2
189
190
// Matrix-vector multiplication
191
val vector = Vectors.dense(1.0, 2.0, 3.0)
192
val result = matrix.multiply(vector) // 2x3 * 3x1 = 2x1
193
194
// Conversions
195
val sparse = matrix.toSparse
196
val dense = sparse.toDense
197
val compressed = matrix.compressed
198
199
// Iterate over elements
200
matrix.foreachActive { (i, j, value) =>
201
println(s"[$i,$j] = $value")
202
}
203
204
// Iterate over columns
205
matrix.colIter.zipWithIndex.foreach { case (col, j) =>
206
println(s"Column $j: ${col.toArray.mkString("[", ", ", "]")}")
207
}
208
```
209
210
### Dense Matrices
211
212
Dense matrix implementation storing all elements in column-major order.
213
214
```scala { .api }
215
/**
216
* Column-major dense matrix
217
*/
218
class DenseMatrix(
219
val numRows: Int,
220
val numCols: Int,
221
val values: Array[Double],
222
override val isTransposed: Boolean
223
) extends Matrix {
224
/** Primary constructor for column-major storage */
225
def this(numRows: Int, numCols: Int, values: Array[Double]) =
226
this(numRows, numCols, values, false)
227
228
/** Number of rows */
229
val numRows: Int
230
231
/** Number of columns */
232
val numCols: Int
233
234
/** Matrix values in column-major (or row-major if transposed) */
235
val values: Array[Double]
236
237
/** Whether matrix is transposed */
238
override val isTransposed: Boolean
239
}
240
241
object DenseMatrix {
242
/** Creates zero matrix */
243
def zeros(numRows: Int, numCols: Int): DenseMatrix
244
245
/** Creates matrix of ones */
246
def ones(numRows: Int, numCols: Int): DenseMatrix
247
248
/** Creates identity matrix */
249
def eye(n: Int): DenseMatrix
250
251
/** Creates random uniform matrix */
252
def rand(numRows: Int, numCols: Int, rng: java.util.Random): DenseMatrix
253
254
/** Creates random Gaussian matrix */
255
def randn(numRows: Int, numCols: Int, rng: java.util.Random): DenseMatrix
256
257
/** Creates diagonal matrix from vector */
258
def diag(vector: Vector): DenseMatrix
259
260
/** Pattern matching extractor */
261
def unapply(dm: DenseMatrix): Option[(Int, Int, Array[Double], Boolean)]
262
}
263
```
264
265
**Usage Examples:**
266
267
```scala
268
import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
269
import java.util.Random
270
271
// Create dense matrix
272
val matrix = new DenseMatrix(2, 3, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
273
val transposed = new DenseMatrix(3, 2, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0), true)
274
275
// Factory methods
276
val zeros = DenseMatrix.zeros(3, 3)
277
val ones = DenseMatrix.ones(2, 4)
278
val identity = DenseMatrix.eye(4)
279
280
val rng = new Random(42)
281
val random = DenseMatrix.rand(3, 3, rng)
282
val gaussian = DenseMatrix.randn(2, 3, rng)
283
284
val vector = Vectors.dense(1.0, 2.0, 3.0)
285
val diagonal = DenseMatrix.diag(vector)
286
287
// Access underlying data
288
println(matrix.values.mkString("[", ", ", "]")) // [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
289
290
// Pattern matching
291
matrix match {
292
case DenseMatrix(rows, cols, values, transposed) =>
293
println(s"Dense matrix: ${rows}x${cols}, transposed: $transposed")
294
case _ => println("Not a dense matrix")
295
}
296
```
297
298
### Sparse Matrices
299
300
Sparse matrix implementation using Compressed Sparse Column (CSC) format.
301
302
```scala { .api }
303
/**
304
* Column-major sparse matrix in Compressed Sparse Column (CSC) format
305
*/
306
class SparseMatrix(
307
val numRows: Int,
308
val numCols: Int,
309
val colPtrs: Array[Int],
310
val rowIndices: Array[Int],
311
val values: Array[Double],
312
override val isTransposed: Boolean
313
) extends Matrix {
314
/** Primary constructor for CSC format */
315
def this(numRows: Int, numCols: Int, colPtrs: Array[Int], rowIndices: Array[Int], values: Array[Double]) =
316
this(numRows, numCols, colPtrs, rowIndices, values, false)
317
318
/** Number of rows */
319
val numRows: Int
320
321
/** Number of columns */
322
val numCols: Int
323
324
/** Column pointers array */
325
val colPtrs: Array[Int]
326
327
/** Row indices of non-zero elements */
328
val rowIndices: Array[Int]
329
330
/** Non-zero values */
331
val values: Array[Double]
332
333
/** Whether matrix is transposed (CSR format) */
334
override val isTransposed: Boolean
335
}
336
337
object SparseMatrix {
338
/** Creates sparse matrix from coordinate (COO) format */
339
def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix
340
341
/** Creates sparse identity matrix */
342
def speye(n: Int): SparseMatrix
343
344
/** Creates random sparse matrix with uniform distribution */
345
def sprand(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): SparseMatrix
346
347
/** Creates random sparse matrix with Gaussian distribution */
348
def sprandn(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): SparseMatrix
349
350
/** Creates sparse diagonal matrix from vector */
351
def spdiag(vector: Vector): SparseMatrix
352
353
/** Pattern matching extractor */
354
def unapply(sm: SparseMatrix): Option[(Int, Int, Array[Int], Array[Int], Array[Double], Boolean)]
355
}
356
```
357
358
**Usage Examples:**
359
360
```scala
361
import org.apache.spark.ml.linalg.{SparseMatrix, Vectors}
362
import java.util.Random
363
364
// Create sparse matrix in CSC format
365
// Matrix: [[1, 0, 4], [0, 3, 5], [2, 0, 6]]
366
val sparse = new SparseMatrix(
367
3, 3, // 3x3 matrix
368
Array(0, 2, 3, 6), // column pointers
369
Array(0, 2, 1, 0, 1, 2), // row indices
370
Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0) // values
371
)
372
373
// Create from coordinate format
374
val entries = Seq((0, 0, 1.0), (1, 1, 2.0), (2, 2, 3.0))
375
val fromCOO = SparseMatrix.fromCOO(3, 3, entries)
376
377
// Factory methods
378
val identity = SparseMatrix.speye(4)
379
380
val rng = new Random(42)
381
val randomSparse = SparseMatrix.sprand(4, 4, 0.3, rng) // 30% density
382
val gaussianSparse = SparseMatrix.sprandn(3, 3, 0.5, rng)
383
384
val vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))
385
val diagonal = SparseMatrix.spdiag(vector)
386
387
// Access CSC components
388
println(s"Column pointers: ${sparse.colPtrs.mkString("[", ", ", "]")}")
389
println(s"Row indices: ${sparse.rowIndices.mkString("[", ", ", "]")}")
390
println(s"Values: ${sparse.values.mkString("[", ", ", "]")}")
391
392
// Pattern matching
393
sparse match {
394
case SparseMatrix(rows, cols, colPtrs, rowIndices, values, transposed) =>
395
println(s"Sparse matrix: ${rows}x${cols}, nnz: ${values.length}")
396
case _ => println("Not a sparse matrix")
397
}
398
399
// Convert to dense for inspection
400
val dense = sparse.toDense
401
println(dense.toString)
402
```
403
404
## Types
405
406
```scala { .api }
407
// Core matrix types
408
sealed trait Matrix extends Serializable
409
410
class DenseMatrix(
411
val numRows: Int,
412
val numCols: Int,
413
val values: Array[Double],
414
override val isTransposed: Boolean = false
415
) extends Matrix
416
417
class SparseMatrix(
418
val numRows: Int,
419
val numCols: Int,
420
val colPtrs: Array[Int],
421
val rowIndices: Array[Int],
422
val values: Array[Double],
423
override val isTransposed: Boolean = false
424
) extends Matrix
425
```