Matrices are essential for linear algebra operations in machine learning. Spark MLlib Local provides both dense and sparse matrix implementations with comprehensive operations and automatic format optimization.
Create dense matrices stored in column-major order.
object Matrices {
def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix
def zeros(numRows: Int, numCols: Int): Matrix
def ones(numRows: Int, numCols: Int): Matrix
def eye(n: Int): Matrix
def speye(n: Int): Matrix
def rand(numRows: Int, numCols: Int, rng: java.util.Random): Matrix
def sprand(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): Matrix
def randn(numRows: Int, numCols: Int, rng: java.util.Random): Matrix
def sprandn(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): Matrix
def diag(vector: Vector): Matrix
def horzcat(matrices: Array[Matrix]): Matrix
def vertcat(matrices: Array[Matrix]): Matrix
}
object DenseMatrix {
def zeros(numRows: Int, numCols: Int): DenseMatrix
def ones(numRows: Int, numCols: Int): DenseMatrix
def eye(n: Int): DenseMatrix
def rand(numRows: Int, numCols: Int, rng: java.util.Random): DenseMatrix
def randn(numRows: Int, numCols: Int, rng: java.util.Random): DenseMatrix
def diag(vector: Vector): DenseMatrix
}Usage examples:
import org.apache.spark.ml.linalg.{Matrices, DenseMatrix, Vectors}
import scala.util.Random
// Create from values array (column-major order)
val mat1 = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0))
// Represents: [[1.0, 3.0],
// [2.0, 4.0]]
// Special matrices
val zeros = DenseMatrix.zeros(3, 3)
val ones = DenseMatrix.ones(2, 3)
val identity = DenseMatrix.eye(3)
// Random matrices
val rng = new Random(42)
val uniform = DenseMatrix.rand(2, 3, rng) // Values in [0,1)
val gaussian = DenseMatrix.randn(2, 3, rng) // Standard normal
// Diagonal matrix from vector
val diagVec = Vectors.dense(1.0, 2.0, 3.0)
val diagMat = DenseMatrix.diag(diagVec)
// Sparse identity
val sparseIdentity = Matrices.speye(3)
// Random sparse matrices
val sparseUniform = Matrices.sprand(3, 3, 0.3, rng) // 30% density
val sparseGaussian = Matrices.sprandn(3, 3, 0.2, rng) // 20% densityCreate sparse matrices in Compressed Sparse Column (CSC) format.
object Matrices {
def sparse(
numRows: Int,
numCols: Int,
colPtrs: Array[Int],
rowIndices: Array[Int],
values: Array[Double]
): Matrix
def speye(n: Int): Matrix
}
object SparseMatrix {
def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix
def speye(n: Int): SparseMatrix
def sprand(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): SparseMatrix
def sprandn(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): SparseMatrix
def spdiag(vector: Vector): SparseMatrix
}Usage examples:
import org.apache.spark.ml.linalg.{SparseMatrix, Vectors}
import scala.util.Random
// Create from COO (coordinate) format - easiest for manual creation
val entries = Seq((0, 0, 1.0), (1, 1, 2.0), (0, 2, 3.0))
val sparseMat = SparseMatrix.fromCOO(2, 3, entries)
// Represents: [[1.0, 0.0, 3.0],
// [0.0, 2.0, 0.0]]
// Sparse identity
val sparseId = SparseMatrix.speye(3)
// Random sparse matrices
val rng = new Random(42)
val sparseRand = SparseMatrix.sprand(4, 4, 0.3, rng) // 30% density
val sparseGauss = SparseMatrix.sprandn(4, 4, 0.2, rng) // 20% density
// Sparse diagonal matrix
val diagVec = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))
val sparseDiag = SparseMatrix.spdiag(diagVec)Access matrix properties and elements.
trait Matrix {
def numRows: Int
def numCols: Int
def apply(i: Int, j: Int): Double
def copy: Matrix
def toArray: Array[Double]
def numNonzeros: Int
def numActives: Int
def foreachActive(f: (Int, Int, Double) => Unit): Unit
def colIter: Iterator[Vector]
def rowIter: Iterator[Vector]
val isTransposed: Boolean
}Usage examples:
val mat = Matrices.dense(2, 3, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
println(mat.numRows) // 2
println(mat.numCols) // 3
println(mat(0, 1)) // 3.0 (row 0, column 1)
println(mat.numNonzeros) // 6
val copied = mat.copy // Deep copy
val array = mat.toArray // Column-major arrayTransform matrices with transpose and format conversion.
trait Matrix {
def transpose: Matrix
def toDense: DenseMatrix
def toSparse: SparseMatrix
def toDenseRowMajor: DenseMatrix
def toDenseColMajor: DenseMatrix
def toSparseRowMajor: SparseMatrix
def toSparseColMajor: SparseMatrix
def compressed: Matrix
def compressedRowMajor: Matrix
def compressedColMajor: Matrix
}Usage examples:
val mat = Matrices.dense(2, 3, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
// Transpose (shares underlying data)
val transposed = mat.transpose
println(s"Original: ${mat.numRows}x${mat.numCols}") // 2x3
println(s"Transposed: ${transposed.numRows}x${transposed.numCols}") // 3x2
// Format conversion
val sparse = mat.toSparse // Convert to sparse
val dense = sparse.toDense // Convert back to dense
val optimal = sparse.compressed // Choose optimal formatPerform matrix-vector and matrix-matrix multiplication.
trait Matrix {
def multiply(y: Vector): DenseVector
def multiply(y: DenseMatrix): DenseMatrix
}Usage examples:
val mat = Matrices.dense(2, 3, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
val vec = Vectors.dense(1.0, 2.0, 3.0)
val mat2 = Matrices.dense(3, 2, Array(1.0, 0.0, 0.0, 1.0, 1.0, 1.0))
// Matrix-vector multiplication
val result1 = mat.multiply(vec) // Returns DenseVector
// Matrix-matrix multiplication
val result2 = mat.multiply(mat2.asInstanceOf[DenseMatrix]) // Returns DenseMatrixApply functions to matrix elements.
trait Matrix {
def foreachActive(f: (Int, Int, Double) => Unit): Unit
def colIter: Iterator[Vector]
def rowIter: Iterator[Vector]
}Usage examples:
val mat = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0))
// Process all active elements
mat.foreachActive { (i, j, value) =>
println(s"mat($i,$j) = $value")
}
// Iterate over columns
mat.colIter.zipWithIndex.foreach { case (col, j) =>
println(s"Column $j: ${col.toArray.mkString(",")}")
}
// Iterate over rows
mat.rowIter.zipWithIndex.foreach { case (row, i) =>
println(s"Row $i: ${row.toArray.mkString(",")}")
}Combine matrices horizontally or vertically.
object Matrices {
def horzcat(matrices: Array[Matrix]): Matrix
def vertcat(matrices: Array[Matrix]): Matrix
}Usage examples:
val mat1 = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0))
val mat2 = Matrices.dense(2, 2, Array(5.0, 6.0, 7.0, 8.0))
// Horizontal concatenation (side by side)
val hcat = Matrices.horzcat(Array(mat1, mat2)) // 2x4 matrix
// Vertical concatenation (stacked)
val vcat = Matrices.vertcat(Array(mat1, mat2)) // 4x2 matrixDense matrices store all elements in column-major order.
class DenseMatrix(
val numRows: Int,
val numCols: Int,
val values: Array[Double],
override val isTransposed: Boolean = false
) extends Matrix {
// Alternative constructor without transpose flag
def this(numRows: Int, numCols: Int, values: Array[Double]) =
this(numRows, numCols, values, false)
}
object DenseMatrix {
def unapply(dm: DenseMatrix): Option[(Int, Int, Array[Double], Boolean)]
}Sparse matrices use Compressed Sparse Column (CSC) format.
class SparseMatrix(
val numRows: Int,
val numCols: Int,
val colPtrs: Array[Int],
val rowIndices: Array[Int],
val values: Array[Double],
override val isTransposed: Boolean = false
) extends Matrix {
// Alternative constructor without transpose flag
def this(
numRows: Int,
numCols: Int,
colPtrs: Array[Int],
rowIndices: Array[Int],
values: Array[Double]
) = this(numRows, numCols, colPtrs, rowIndices, values, false)
}
object SparseMatrix {
def unapply(sm: SparseMatrix): Option[(Int, Int, Array[Int], Array[Int], Array[Double], Boolean)]
}Usage examples:
// Pattern matching
val mat: Matrix = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0))
mat match {
case dm: DenseMatrix =>
println(s"Dense ${dm.numRows}x${dm.numCols} matrix")
println(s"Values: ${dm.values.mkString(",")}")
case sm: SparseMatrix =>
println(s"Sparse ${sm.numRows}x${sm.numCols} matrix")
println(s"NNZ: ${sm.values.length}")
}
// Direct field access for sparse matrices
val sparse = SparseMatrix.fromCOO(3, 3, Seq((0,0,1.0), (2,2,4.0)))
println(sparse.colPtrs.mkString(",")) // Column pointers
println(sparse.rowIndices.mkString(",")) // Row indices
println(sparse.values.mkString(",")) // Non-zero valuescompressed method to automatically choose the most efficient format