Spark MLlib Local provides linear algebra abstractions and operations for local machine learning computations without requiring a distributed Spark cluster.
tessl install tessl/maven-org-apache-spark--spark-mllib-local_2-12@3.0.0Spark MLlib Local is a standalone linear algebra library extracted from Apache Spark that provides essential machine learning primitives without requiring a distributed Spark cluster. The library offers comprehensive vector and matrix abstractions with both dense and sparse implementations, high-performance BLAS operations, and statistical distribution utilities.
<dependency><groupId>org.apache.spark</groupId><artifactId>spark-mllib-local_2.12</artifactId><version>3.0.1</version></dependency>libraryDependencies += "org.apache.spark" %% "spark-mllib-local" % "3.0.1"import org.apache.spark.ml.linalg.{Vector, Vectors, Matrix, Matrices}
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, DenseMatrix, SparseMatrix}
import org.apache.spark.ml.stat.distribution.MultivariateGaussianimport org.apache.spark.ml.linalg.{Vectors, Matrices}
// Create vectors
val denseVec = Vectors.dense(1.0, 2.0, 3.0, 4.0)
val sparseVec = Vectors.sparse(4, Array(0, 2), Array(1.0, 3.0))
// Create matrices
val denseMat = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0))
val sparseMat = Matrices.sparse(2, 2, Array(0, 1, 2), Array(0, 1), Array(1.0, 4.0))
// Perform operations
val dotProduct = denseVec.dot(sparseVec)
val matVecProduct = denseMat.multiply(denseVec)The library is organized around several key components:
Create, manipulate, and perform operations on vectors with automatic format optimization.
trait Vector {
def size: Int
def apply(i: Int): Double
def dot(v: Vector): Double
def copy: Vector
def toArray: Array[Double]
def compressed: Vector
def argmax: Int
def foreachActive(f: (Int, Double) => Unit): Unit
def numActives: Int
def numNonzeros: Int
def toSparse: SparseVector
def toDense: DenseVector
}object Vectors {
def dense(firstValue: Double, otherValues: Double*): Vector
def dense(values: Array[Double]): Vector
def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector
def sparse(size: Int, elements: Seq[(Int, Double)]): Vector
def sparse(size: Int, elements: java.lang.Iterable[(java.lang.Integer, java.lang.Double)]): Vector
def zeros(size: Int): Vector
def norm(vector: Vector, p: Double): Double
def sqdist(v1: Vector, v2: Vector): Double
}Create and manipulate matrices with support for both dense and sparse formats.
trait Matrix {
def numRows: Int
def numCols: Int
def apply(i: Int, j: Int): Double
def multiply(y: Vector): DenseVector
def multiply(y: DenseMatrix): DenseMatrix
def transpose: Matrix
def copy: Matrix
def toArray: Array[Double]
def foreachActive(f: (Int, Int, Double) => Unit): Unit
def colIter: Iterator[Vector]
def rowIter: Iterator[Vector]
def numNonzeros: Int
def numActives: Int
def toSparse: SparseMatrix
def toDense: DenseMatrix
def compressed: Matrix
val isTransposed: Boolean
}object Matrices {
def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix
def sparse(numRows: Int, numCols: Int, colPtrs: Array[Int], rowIndices: Array[Int], values: Array[Double]): Matrix
def zeros(numRows: Int, numCols: Int): Matrix
def ones(numRows: Int, numCols: Int): Matrix
def eye(n: Int): Matrix
def speye(n: Int): Matrix
def rand(numRows: Int, numCols: Int, rng: java.util.Random): Matrix
def sprand(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): Matrix
def randn(numRows: Int, numCols: Int, rng: java.util.Random): Matrix
def sprandn(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): Matrix
def diag(vector: Vector): Matrix
def horzcat(matrices: Array[Matrix]): Matrix
def vertcat(matrices: Array[Matrix]): Matrix
}All linear algebra operations are automatically optimized using native BLAS implementations when available. High-performance operations are accessed through the Vector and Matrix APIs:
dot(), argmax(), etc.multiply(), transpose(), etc.Multivariate Gaussian distribution with support for singular covariance matrices.
class MultivariateGaussian(mean: Vector, cov: Matrix) {
def pdf(x: Vector): Double
def logpdf(x: Vector): Double
}// Base vector trait - users should not implement directly
sealed trait Vector extends Serializable
// Dense vector implementation
class DenseVector(val values: Array[Double]) extends Vector
object DenseVector {
def unapply(dv: DenseVector): Option[Array[Double]]
}
// Sparse vector implementation
class SparseVector(
override val size: Int,
val indices: Array[Int],
val values: Array[Double]
) extends Vector
object SparseVector {
def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])]
}
// Base matrix trait
sealed trait Matrix extends Serializable
// Dense matrix in column-major order
class DenseMatrix(
val numRows: Int,
val numCols: Int,
val values: Array[Double],
override val isTransposed: Boolean = false
) extends Matrix {
// Alternative constructor
def this(numRows: Int, numCols: Int, values: Array[Double]) =
this(numRows, numCols, values, false)
}
object DenseMatrix {
def zeros(numRows: Int, numCols: Int): DenseMatrix
def ones(numRows: Int, numCols: Int): DenseMatrix
def eye(n: Int): DenseMatrix
def rand(numRows: Int, numCols: Int, rng: java.util.Random): DenseMatrix
def randn(numRows: Int, numCols: Int, rng: java.util.Random): DenseMatrix
def diag(vector: Vector): DenseMatrix
def unapply(dm: DenseMatrix): Option[(Int, Int, Array[Double], Boolean)]
}
// Sparse matrix in CSC format
class SparseMatrix(
val numRows: Int,
val numCols: Int,
val colPtrs: Array[Int],
val rowIndices: Array[Int],
val values: Array[Double],
override val isTransposed: Boolean = false
) extends Matrix {
// Alternative constructor
def this(
numRows: Int,
numCols: Int,
colPtrs: Array[Int],
rowIndices: Array[Int],
values: Array[Double]
) = this(numRows, numCols, colPtrs, rowIndices, values, false)
}
object SparseMatrix {
def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix
def speye(n: Int): SparseMatrix
def sprand(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): SparseMatrix
def sprandn(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): SparseMatrix
def spdiag(vector: Vector): SparseMatrix
def unapply(sm: SparseMatrix): Option[(Int, Int, Array[Int], Array[Int], Array[Double], Boolean)]
}