or run

tessl search
Log in

Version

Files

docs

distributions.mdindex.mdmatrices.mdvectors.md
tile.json

matrices.mddocs/

Matrix Operations

Matrices are essential for linear algebra operations in machine learning. Spark MLlib Local provides both dense and sparse matrix implementations with comprehensive operations and automatic format optimization.

Matrix Creation

Dense Matrices

Create dense matrices stored in column-major order.

object Matrices {
  def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix
  def zeros(numRows: Int, numCols: Int): Matrix
  def ones(numRows: Int, numCols: Int): Matrix  
  def eye(n: Int): Matrix
  def speye(n: Int): Matrix
  def rand(numRows: Int, numCols: Int, rng: java.util.Random): Matrix
  def sprand(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): Matrix
  def randn(numRows: Int, numCols: Int, rng: java.util.Random): Matrix
  def sprandn(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): Matrix
  def diag(vector: Vector): Matrix
  def horzcat(matrices: Array[Matrix]): Matrix
  def vertcat(matrices: Array[Matrix]): Matrix
}

object DenseMatrix {
  def zeros(numRows: Int, numCols: Int): DenseMatrix
  def ones(numRows: Int, numCols: Int): DenseMatrix
  def eye(n: Int): DenseMatrix
  def rand(numRows: Int, numCols: Int, rng: java.util.Random): DenseMatrix
  def randn(numRows: Int, numCols: Int, rng: java.util.Random): DenseMatrix
  def diag(vector: Vector): DenseMatrix
}

Usage examples:

import org.apache.spark.ml.linalg.{Matrices, DenseMatrix, Vectors}
import scala.util.Random

// Create from values array (column-major order)
val mat1 = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0))
// Represents: [[1.0, 3.0], 
//              [2.0, 4.0]]

// Special matrices
val zeros = DenseMatrix.zeros(3, 3)
val ones = DenseMatrix.ones(2, 3) 
val identity = DenseMatrix.eye(3)

// Random matrices
val rng = new Random(42)
val uniform = DenseMatrix.rand(2, 3, rng)      // Values in [0,1)
val gaussian = DenseMatrix.randn(2, 3, rng)    // Standard normal

// Diagonal matrix from vector
val diagVec = Vectors.dense(1.0, 2.0, 3.0)
val diagMat = DenseMatrix.diag(diagVec)

// Sparse identity
val sparseIdentity = Matrices.speye(3)

// Random sparse matrices
val sparseUniform = Matrices.sprand(3, 3, 0.3, rng)  // 30% density
val sparseGaussian = Matrices.sprandn(3, 3, 0.2, rng) // 20% density

Sparse Matrices

Create sparse matrices in Compressed Sparse Column (CSC) format.

object Matrices {
  def sparse(
    numRows: Int, 
    numCols: Int,
    colPtrs: Array[Int], 
    rowIndices: Array[Int], 
    values: Array[Double]
  ): Matrix
  def speye(n: Int): Matrix
}

object SparseMatrix {
  def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix
  def speye(n: Int): SparseMatrix
  def sprand(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): SparseMatrix
  def sprandn(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): SparseMatrix
  def spdiag(vector: Vector): SparseMatrix
}

Usage examples:

import org.apache.spark.ml.linalg.{SparseMatrix, Vectors}
import scala.util.Random

// Create from COO (coordinate) format - easiest for manual creation
val entries = Seq((0, 0, 1.0), (1, 1, 2.0), (0, 2, 3.0))
val sparseMat = SparseMatrix.fromCOO(2, 3, entries)
// Represents: [[1.0, 0.0, 3.0],
//              [0.0, 2.0, 0.0]]

// Sparse identity
val sparseId = SparseMatrix.speye(3)

// Random sparse matrices  
val rng = new Random(42)
val sparseRand = SparseMatrix.sprand(4, 4, 0.3, rng)  // 30% density
val sparseGauss = SparseMatrix.sprandn(4, 4, 0.2, rng) // 20% density

// Sparse diagonal matrix
val diagVec = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))
val sparseDiag = SparseMatrix.spdiag(diagVec)

Matrix Operations

Basic Operations

Access matrix properties and elements.

trait Matrix {
  def numRows: Int
  def numCols: Int
  def apply(i: Int, j: Int): Double
  def copy: Matrix
  def toArray: Array[Double]
  def numNonzeros: Int
  def numActives: Int
  def foreachActive(f: (Int, Int, Double) => Unit): Unit
  def colIter: Iterator[Vector]
  def rowIter: Iterator[Vector]
  val isTransposed: Boolean
}

Usage examples:

val mat = Matrices.dense(2, 3, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))

println(mat.numRows)     // 2
println(mat.numCols)     // 3  
println(mat(0, 1))       // 3.0 (row 0, column 1)
println(mat.numNonzeros) // 6

val copied = mat.copy    // Deep copy
val array = mat.toArray  // Column-major array

Matrix Transformations

Transform matrices with transpose and format conversion.

trait Matrix {
  def transpose: Matrix
  def toDense: DenseMatrix
  def toSparse: SparseMatrix
  def toDenseRowMajor: DenseMatrix
  def toDenseColMajor: DenseMatrix
  def toSparseRowMajor: SparseMatrix
  def toSparseColMajor: SparseMatrix
  def compressed: Matrix
  def compressedRowMajor: Matrix
  def compressedColMajor: Matrix
}

Usage examples:

val mat = Matrices.dense(2, 3, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))

// Transpose (shares underlying data)
val transposed = mat.transpose
println(s"Original: ${mat.numRows}x${mat.numCols}")      // 2x3
println(s"Transposed: ${transposed.numRows}x${transposed.numCols}") // 3x2

// Format conversion
val sparse = mat.toSparse    // Convert to sparse
val dense = sparse.toDense   // Convert back to dense
val optimal = sparse.compressed // Choose optimal format

Matrix Multiplication

Perform matrix-vector and matrix-matrix multiplication.

trait Matrix {
  def multiply(y: Vector): DenseVector
  def multiply(y: DenseMatrix): DenseMatrix
}

Usage examples:

val mat = Matrices.dense(2, 3, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
val vec = Vectors.dense(1.0, 2.0, 3.0)
val mat2 = Matrices.dense(3, 2, Array(1.0, 0.0, 0.0, 1.0, 1.0, 1.0))

// Matrix-vector multiplication
val result1 = mat.multiply(vec) // Returns DenseVector

// Matrix-matrix multiplication  
val result2 = mat.multiply(mat2.asInstanceOf[DenseMatrix]) // Returns DenseMatrix

Iteration and Processing

Apply functions to matrix elements.

trait Matrix {
  def foreachActive(f: (Int, Int, Double) => Unit): Unit
  def colIter: Iterator[Vector]
  def rowIter: Iterator[Vector]
}

Usage examples:

val mat = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0))

// Process all active elements
mat.foreachActive { (i, j, value) =>
  println(s"mat($i,$j) = $value")
}

// Iterate over columns
mat.colIter.zipWithIndex.foreach { case (col, j) =>
  println(s"Column $j: ${col.toArray.mkString(",")}")
}

// Iterate over rows  
mat.rowIter.zipWithIndex.foreach { case (row, i) =>
  println(s"Row $i: ${row.toArray.mkString(",")}")
}

Matrix Concatenation

Combine matrices horizontally or vertically.

object Matrices {
  def horzcat(matrices: Array[Matrix]): Matrix
  def vertcat(matrices: Array[Matrix]): Matrix
}

Usage examples:

val mat1 = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0))
val mat2 = Matrices.dense(2, 2, Array(5.0, 6.0, 7.0, 8.0))

// Horizontal concatenation (side by side)
val hcat = Matrices.horzcat(Array(mat1, mat2)) // 2x4 matrix

// Vertical concatenation (stacked)  
val vcat = Matrices.vertcat(Array(mat1, mat2)) // 4x2 matrix

Matrix Types

DenseMatrix

Dense matrices store all elements in column-major order.

class DenseMatrix(
  val numRows: Int,
  val numCols: Int,
  val values: Array[Double],
  override val isTransposed: Boolean = false
) extends Matrix {
  // Alternative constructor without transpose flag
  def this(numRows: Int, numCols: Int, values: Array[Double]) = 
    this(numRows, numCols, values, false)
}

object DenseMatrix {
  def unapply(dm: DenseMatrix): Option[(Int, Int, Array[Double], Boolean)]
}

SparseMatrix

Sparse matrices use Compressed Sparse Column (CSC) format.

class SparseMatrix(
  val numRows: Int,
  val numCols: Int, 
  val colPtrs: Array[Int],
  val rowIndices: Array[Int],
  val values: Array[Double],
  override val isTransposed: Boolean = false
) extends Matrix {
  // Alternative constructor without transpose flag
  def this(
    numRows: Int,
    numCols: Int,
    colPtrs: Array[Int], 
    rowIndices: Array[Int],
    values: Array[Double]
  ) = this(numRows, numCols, colPtrs, rowIndices, values, false)
}

object SparseMatrix {
  def unapply(sm: SparseMatrix): Option[(Int, Int, Array[Int], Array[Int], Array[Double], Boolean)]
}

Usage examples:

// Pattern matching
val mat: Matrix = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0))

mat match {
  case dm: DenseMatrix => 
    println(s"Dense ${dm.numRows}x${dm.numCols} matrix")
    println(s"Values: ${dm.values.mkString(",")}")
  case sm: SparseMatrix =>
    println(s"Sparse ${sm.numRows}x${sm.numCols} matrix") 
    println(s"NNZ: ${sm.values.length}")
}

// Direct field access for sparse matrices
val sparse = SparseMatrix.fromCOO(3, 3, Seq((0,0,1.0), (2,2,4.0)))
println(sparse.colPtrs.mkString(","))    // Column pointers
println(sparse.rowIndices.mkString(",")) // Row indices  
println(sparse.values.mkString(","))     // Non-zero values

Performance Considerations

  • Dense matrices are optimal when most elements are non-zero
  • Sparse matrices are optimal for matrices with many zeros (typically >66% zeros)
  • Use compressed method to automatically choose the most efficient format
  • Column-major storage is optimized for column-wise operations
  • Matrix multiplication uses native BLAS when available for better performance
  • Transposition is a lightweight operation that shares underlying data