Spark Project ML Local Library provides local linear algebra operations for machine learning without requiring a distributed Spark context
npx @tessl/cli install tessl/maven-org-apache-spark--spark-mllib-local-2-13@3.5.0Spark MLlib Local provides local linear algebra operations for machine learning without requiring a distributed Spark context. It includes vector and matrix data structures (dense and sparse), statistical distributions, and utility functions that can operate independently of a distributed Spark cluster.
pom.xml: <groupId>org.apache.spark</groupId><artifactId>spark-mllib-local_2.13</artifactId><version>3.5.6</version>implementation 'org.apache.spark:spark-mllib-local_2.13:3.5.6'import org.apache.spark.ml.linalg.{Vector, DenseVector, SparseVector, Vectors}
import org.apache.spark.ml.linalg.{Matrix, DenseMatrix, SparseMatrix, Matrices}
import org.apache.spark.ml.stat.distribution.MultivariateGaussianimport org.apache.spark.ml.linalg.{Vectors, Matrices}
// Create vectors
val denseVec = Vectors.dense(1.0, 2.0, 3.0)
val sparseVec = Vectors.sparse(5, Array(0, 2, 4), Array(1.0, 3.0, 5.0))
// Create matrices
val denseMatrix = Matrices.dense(2, 3, Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
val sparseMatrix = Matrices.sparse(3, 3, Array(0, 1, 2, 3), Array(0, 1, 2), Array(1.0, 2.0, 3.0))
// Basic operations
val dotProduct = denseVec.dot(sparseVec)
val matVecProduct = denseMatrix.multiply(denseVec)
val norm = Vectors.norm(denseVec, 2.0)Spark MLlib Local is built around several key components:
Core vector functionality supporting both dense and sparse representations with automatic optimization and conversion capabilities.
// Vector trait and factory methods
trait Vector extends Serializable {
def size: Int
def apply(i: Int): Double
def toArray: Array[Double]
def dot(v: Vector): Double
}
object Vectors {
def dense(values: Array[Double]): Vector
def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector
def zeros(size: Int): Vector
def norm(vector: Vector, p: Double): Double
}Matrix operations supporting dense and sparse formats, with efficient multiplication, transposition, and format conversion.
// Matrix trait and factory methods
trait Matrix extends Serializable {
def numRows: Int
def numCols: Int
def apply(i: Int, j: Int): Double
def multiply(y: Vector): DenseVector
def transpose: Matrix
}
object Matrices {
def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix
def sparse(numRows: Int, numCols: Int, colPtrs: Array[Int], rowIndices: Array[Int], values: Array[Double]): Matrix
def zeros(numRows: Int, numCols: Int): Matrix
}Multivariate statistical distributions for machine learning applications with support for probability density functions.
// Multivariate Gaussian distribution
class MultivariateGaussian(mean: Vector, cov: Matrix) extends Serializable {
def pdf(x: Vector): Double
def logpdf(x: Vector): Double
}// Core vector types
trait Vector extends Serializable {
def size: Int
def toArray: Array[Double]
def apply(i: Int): Double
def copy: Vector
def foreachActive(f: (Int, Double) => Unit): Unit
def numActives: Int
def numNonzeros: Int
def toSparse: SparseVector
def toDense: DenseVector
def compressed: Vector
def argmax: Int
def dot(v: Vector): Double
}
class DenseVector(val values: Array[Double]) extends Vector
class SparseVector(override val size: Int, val indices: Array[Int], val values: Array[Double]) extends Vector
// Core matrix types
trait Matrix extends Serializable {
def numRows: Int
def numCols: Int
def apply(i: Int, j: Int): Double
def copy: Matrix
def transpose: Matrix
def multiply(y: DenseMatrix): DenseMatrix
def multiply(y: Vector): DenseVector
def foreachActive(f: (Int, Int, Double) => Unit): Unit
def numNonzeros: Int
def numActives: Int
def toSparse: SparseMatrix
def toDense: DenseMatrix
}
class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double], override val isTransposed: Boolean) extends Matrix
class SparseMatrix(val numRows: Int, val numCols: Int, val colPtrs: Array[Int], val rowIndices: Array[Int], val values: Array[Double], override val isTransposed: Boolean) extends Matrix