Spark ML Local Library providing linear algebra and statistical utilities for local machine learning operations without requiring a distributed Spark cluster
—
Comprehensive matrix functionality supporting both dense and sparse matrices with efficient storage formats and mathematical operations. Essential for linear algebra computations in machine learning algorithms.
Create matrices using factory methods from the Matrices object.
/**
* Factory methods for creating Matrix instances
*/
object Matrices {
/** Creates a dense matrix in column-major order */
def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix
/** Creates a sparse matrix in Compressed Sparse Column (CSC) format */
def sparse(numRows: Int, numCols: Int, colPtrs: Array[Int], rowIndices: Array[Int], values: Array[Double]): Matrix
/** Creates a zero matrix */
def zeros(numRows: Int, numCols: Int): Matrix
/** Creates a matrix of ones */
def ones(numRows: Int, numCols: Int): Matrix
/** Creates a dense identity matrix */
def eye(n: Int): Matrix
/** Creates a sparse identity matrix */
def speye(n: Int): Matrix
/** Creates a random dense matrix with uniform distribution */
def rand(numRows: Int, numCols: Int, rng: java.util.Random): Matrix
/** Creates a random sparse matrix with uniform distribution */
def sprand(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): Matrix
/** Creates a random dense matrix with Gaussian distribution */
def randn(numRows: Int, numCols: Int, rng: java.util.Random): Matrix
/** Creates a random sparse matrix with Gaussian distribution */
def sprandn(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): Matrix
/** Creates a diagonal matrix from a vector */
def diag(vector: Vector): Matrix
/** Horizontally concatenates matrices */
def horzcat(matrices: Array[Matrix]): Matrix
/** Vertically concatenates matrices */
def vertcat(matrices: Array[Matrix]): Matrix
}Usage Examples:
import org.apache.spark.ml.linalg.{Matrices, Vectors}
import java.util.Random
// Dense matrix (2x3, column-major: [1,2], [3,4], [5,6])
val dense = Matrices.dense(2, 3, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
// Sparse matrix in CSC format
val sparse = Matrices.sparse(3, 3, Array(0, 2, 3, 6), Array(0, 2, 1, 0, 1, 2), Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
// Special matrices
val zeros = Matrices.zeros(3, 3)
val ones = Matrices.ones(2, 4)
val identity = Matrices.eye(4)
val sparseIdentity = Matrices.speye(4)
// Random matrices
val rng = new Random(42)
val randomDense = Matrices.rand(3, 3, rng)
val randomSparse = Matrices.sprand(4, 4, 0.3, rng) // 30% density
val gaussianDense = Matrices.randn(2, 3, rng)
// Diagonal matrix
val vector = Vectors.dense(1.0, 2.0, 3.0)
val diagonal = Matrices.diag(vector)
// Matrix concatenation
val m1 = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0))
val m2 = Matrices.dense(2, 2, Array(5.0, 6.0, 7.0, 8.0))
val horizontal = Matrices.horzcat(Array(m1, m2)) // 2x4 matrix
val vertical = Matrices.vertcat(Array(m1, m2)) // 4x2 matrixCommon operations available on all matrix types.
/**
* Base Matrix trait with common operations
*/
trait Matrix extends Serializable {
/** Number of rows */
def numRows: Int
/** Number of columns */
def numCols: Int
/** Flag indicating if matrix is transposed */
val isTransposed: Boolean
/** Converts matrix to column-major array */
def toArray: Array[Double]
/** Iterator over column vectors */
def colIter: Iterator[Vector]
/** Iterator over row vectors */
def rowIter: Iterator[Vector]
/** Gets element at position (i, j) */
def apply(i: Int, j: Int): Double
/** Creates a deep copy */
def copy: Matrix
/** Returns transposed matrix */
def transpose: Matrix
/** Matrix-matrix multiplication */
def multiply(y: DenseMatrix): DenseMatrix
/** Matrix-vector multiplication */
def multiply(y: Vector): DenseVector
/** String representation with size limits */
def toString(maxLines: Int, maxLineWidth: Int): String
/** Applies function to all active elements */
def foreachActive(f: (Int, Int, Double) => Unit): Unit
/** Number of non-zero elements */
def numNonzeros: Int
/** Number of stored elements */
def numActives: Int
/** Converts to sparse matrix in column-major order */
def toSparseColMajor: SparseMatrix
/** Converts to sparse matrix in row-major order */
def toSparseRowMajor: SparseMatrix
/** Converts to sparse matrix maintaining current layout */
def toSparse: SparseMatrix
/** Converts to dense matrix maintaining current layout */
def toDense: DenseMatrix
/** Converts to dense matrix in row-major order */
def toDenseRowMajor: DenseMatrix
/** Converts to dense matrix in column-major order */
def toDenseColMajor: DenseMatrix
/** Returns optimal format with column-major preference */
def compressedColMajor: Matrix
/** Returns optimal format with row-major preference */
def compressedRowMajor: Matrix
/** Returns optimal format (dense or sparse) */
def compressed: Matrix
}Usage Examples:
import org.apache.spark.ml.linalg.{Matrices, Vectors}
val matrix = Matrices.dense(2, 3, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
// Basic properties
println(s"Rows: ${matrix.numRows}, Cols: ${matrix.numCols}") // Rows: 2, Cols: 3
println(s"Element (0,1): ${matrix(0, 1)}") // Element (0,1): 3.0
println(s"Non-zeros: ${matrix.numNonzeros}") // Non-zeros: 6
// Transposition
val transposed = matrix.transpose
println(s"Transposed size: ${transposed.numRows}x${transposed.numCols}") // 3x2
// Matrix-vector multiplication
val vector = Vectors.dense(1.0, 2.0, 3.0)
val result = matrix.multiply(vector) // 2x3 * 3x1 = 2x1
// Conversions
val sparse = matrix.toSparse
val dense = sparse.toDense
val compressed = matrix.compressed
// Iterate over elements
matrix.foreachActive { (i, j, value) =>
println(s"[$i,$j] = $value")
}
// Iterate over columns
matrix.colIter.zipWithIndex.foreach { case (col, j) =>
println(s"Column $j: ${col.toArray.mkString("[", ", ", "]")}")
}Dense matrix implementation storing all elements in column-major order.
/**
* Column-major dense matrix
*/
class DenseMatrix(
val numRows: Int,
val numCols: Int,
val values: Array[Double],
override val isTransposed: Boolean
) extends Matrix {
/** Primary constructor for column-major storage */
def this(numRows: Int, numCols: Int, values: Array[Double]) =
this(numRows, numCols, values, false)
/** Number of rows */
val numRows: Int
/** Number of columns */
val numCols: Int
/** Matrix values in column-major (or row-major if transposed) */
val values: Array[Double]
/** Whether matrix is transposed */
override val isTransposed: Boolean
}
object DenseMatrix {
/** Creates zero matrix */
def zeros(numRows: Int, numCols: Int): DenseMatrix
/** Creates matrix of ones */
def ones(numRows: Int, numCols: Int): DenseMatrix
/** Creates identity matrix */
def eye(n: Int): DenseMatrix
/** Creates random uniform matrix */
def rand(numRows: Int, numCols: Int, rng: java.util.Random): DenseMatrix
/** Creates random Gaussian matrix */
def randn(numRows: Int, numCols: Int, rng: java.util.Random): DenseMatrix
/** Creates diagonal matrix from vector */
def diag(vector: Vector): DenseMatrix
/** Pattern matching extractor */
def unapply(dm: DenseMatrix): Option[(Int, Int, Array[Double], Boolean)]
}Usage Examples:
import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
import java.util.Random
// Create dense matrix
val matrix = new DenseMatrix(2, 3, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
val transposed = new DenseMatrix(3, 2, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0), true)
// Factory methods
val zeros = DenseMatrix.zeros(3, 3)
val ones = DenseMatrix.ones(2, 4)
val identity = DenseMatrix.eye(4)
val rng = new Random(42)
val random = DenseMatrix.rand(3, 3, rng)
val gaussian = DenseMatrix.randn(2, 3, rng)
val vector = Vectors.dense(1.0, 2.0, 3.0)
val diagonal = DenseMatrix.diag(vector)
// Access underlying data
println(matrix.values.mkString("[", ", ", "]")) // [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
// Pattern matching
matrix match {
case DenseMatrix(rows, cols, values, transposed) =>
println(s"Dense matrix: ${rows}x${cols}, transposed: $transposed")
case _ => println("Not a dense matrix")
}Sparse matrix implementation using Compressed Sparse Column (CSC) format.
/**
* Column-major sparse matrix in Compressed Sparse Column (CSC) format
*/
class SparseMatrix(
val numRows: Int,
val numCols: Int,
val colPtrs: Array[Int],
val rowIndices: Array[Int],
val values: Array[Double],
override val isTransposed: Boolean
) extends Matrix {
/** Primary constructor for CSC format */
def this(numRows: Int, numCols: Int, colPtrs: Array[Int], rowIndices: Array[Int], values: Array[Double]) =
this(numRows, numCols, colPtrs, rowIndices, values, false)
/** Number of rows */
val numRows: Int
/** Number of columns */
val numCols: Int
/** Column pointers array */
val colPtrs: Array[Int]
/** Row indices of non-zero elements */
val rowIndices: Array[Int]
/** Non-zero values */
val values: Array[Double]
/** Whether matrix is transposed (CSR format) */
override val isTransposed: Boolean
}
object SparseMatrix {
/** Creates sparse matrix from coordinate (COO) format */
def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix
/** Creates sparse identity matrix */
def speye(n: Int): SparseMatrix
/** Creates random sparse matrix with uniform distribution */
def sprand(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): SparseMatrix
/** Creates random sparse matrix with Gaussian distribution */
def sprandn(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): SparseMatrix
/** Creates sparse diagonal matrix from vector */
def spdiag(vector: Vector): SparseMatrix
/** Pattern matching extractor */
def unapply(sm: SparseMatrix): Option[(Int, Int, Array[Int], Array[Int], Array[Double], Boolean)]
}Usage Examples:
import org.apache.spark.ml.linalg.{SparseMatrix, Vectors}
import java.util.Random
// Create sparse matrix in CSC format
// Matrix: [[1, 0, 4], [0, 3, 5], [2, 0, 6]]
val sparse = new SparseMatrix(
3, 3, // 3x3 matrix
Array(0, 2, 3, 6), // column pointers
Array(0, 2, 1, 0, 1, 2), // row indices
Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0) // values
)
// Create from coordinate format
val entries = Seq((0, 0, 1.0), (1, 1, 2.0), (2, 2, 3.0))
val fromCOO = SparseMatrix.fromCOO(3, 3, entries)
// Factory methods
val identity = SparseMatrix.speye(4)
val rng = new Random(42)
val randomSparse = SparseMatrix.sprand(4, 4, 0.3, rng) // 30% density
val gaussianSparse = SparseMatrix.sprandn(3, 3, 0.5, rng)
val vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0))
val diagonal = SparseMatrix.spdiag(vector)
// Access CSC components
println(s"Column pointers: ${sparse.colPtrs.mkString("[", ", ", "]")}")
println(s"Row indices: ${sparse.rowIndices.mkString("[", ", ", "]")}")
println(s"Values: ${sparse.values.mkString("[", ", ", "]")}")
// Pattern matching
sparse match {
case SparseMatrix(rows, cols, colPtrs, rowIndices, values, transposed) =>
println(s"Sparse matrix: ${rows}x${cols}, nnz: ${values.length}")
case _ => println("Not a sparse matrix")
}
// Convert to dense for inspection
val dense = sparse.toDense
println(dense.toString)// Core matrix types
sealed trait Matrix extends Serializable
class DenseMatrix(
val numRows: Int,
val numCols: Int,
val values: Array[Double],
override val isTransposed: Boolean = false
) extends Matrix
class SparseMatrix(
val numRows: Int,
val numCols: Int,
val colPtrs: Array[Int],
val rowIndices: Array[Int],
val values: Array[Double],
override val isTransposed: Boolean = false
) extends MatrixInstall with Tessl CLI
npx tessl i tessl/maven-org-apache-spark--spark-mllib-local-2-11