or run

tessl search
Log in

Version

Files

docs

distributions.mdindex.mdmatrices.mdvectors.md
tile.json

index.mddocs/

Spark MLlib Local

Spark MLlib Local is a standalone linear algebra library extracted from Apache Spark that provides essential machine learning primitives without requiring a distributed Spark cluster. The library offers comprehensive vector and matrix abstractions with both dense and sparse implementations, high-performance BLAS operations, and statistical distribution utilities.

Package Information

  • Package Name: spark-mllib-local_2.12
  • Package Type: maven
  • Language: Scala
  • Installation:
    • Maven: <dependency><groupId>org.apache.spark</groupId><artifactId>spark-mllib-local_2.12</artifactId><version>3.0.1</version></dependency>
    • SBT: libraryDependencies += "org.apache.spark" %% "spark-mllib-local" % "3.0.1"

Core Imports

import org.apache.spark.ml.linalg.{Vector, Vectors, Matrix, Matrices}
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, DenseMatrix, SparseMatrix}
import org.apache.spark.ml.stat.distribution.MultivariateGaussian

Basic Usage

import org.apache.spark.ml.linalg.{Vectors, Matrices}

// Create vectors
val denseVec = Vectors.dense(1.0, 2.0, 3.0, 4.0)
val sparseVec = Vectors.sparse(4, Array(0, 2), Array(1.0, 3.0))

// Create matrices
val denseMat = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0))
val sparseMat = Matrices.sparse(2, 2, Array(0, 1, 2), Array(0, 1), Array(1.0, 4.0))

// Perform operations
val dotProduct = denseVec.dot(sparseVec)
val matVecProduct = denseMat.multiply(denseVec)

Architecture

The library is organized around several key components:

  • Linear Algebra Core: Vector and Matrix traits with dense/sparse implementations
  • Factory Objects: Vectors, Matrices with creation utilities
  • High-Performance Operations: Optimized vector and matrix operations using native BLAS when available
  • Statistical Distributions: MultivariateGaussian for probability computations
  • Format Conversion: Automatic optimization between dense and sparse representations

Capabilities

Vector Operations

Create, manipulate, and perform operations on vectors with automatic format optimization.

trait Vector {
  def size: Int
  def apply(i: Int): Double
  def dot(v: Vector): Double
  def copy: Vector
  def toArray: Array[Double]
  def compressed: Vector
  def argmax: Int
  def foreachActive(f: (Int, Double) => Unit): Unit
  def numActives: Int
  def numNonzeros: Int
  def toSparse: SparseVector
  def toDense: DenseVector
}
object Vectors {
  def dense(firstValue: Double, otherValues: Double*): Vector
  def dense(values: Array[Double]): Vector
  def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector
  def sparse(size: Int, elements: Seq[(Int, Double)]): Vector
  def sparse(size: Int, elements: java.lang.Iterable[(java.lang.Integer, java.lang.Double)]): Vector
  def zeros(size: Int): Vector
  def norm(vector: Vector, p: Double): Double
  def sqdist(v1: Vector, v2: Vector): Double
}

Vector Operations

Matrix Operations

Create and manipulate matrices with support for both dense and sparse formats.

trait Matrix {
  def numRows: Int
  def numCols: Int
  def apply(i: Int, j: Int): Double
  def multiply(y: Vector): DenseVector
  def multiply(y: DenseMatrix): DenseMatrix
  def transpose: Matrix
  def copy: Matrix
  def toArray: Array[Double]
  def foreachActive(f: (Int, Int, Double) => Unit): Unit
  def colIter: Iterator[Vector]
  def rowIter: Iterator[Vector]
  def numNonzeros: Int
  def numActives: Int
  def toSparse: SparseMatrix
  def toDense: DenseMatrix
  def compressed: Matrix
  val isTransposed: Boolean
}
object Matrices {
  def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix
  def sparse(numRows: Int, numCols: Int, colPtrs: Array[Int], rowIndices: Array[Int], values: Array[Double]): Matrix
  def zeros(numRows: Int, numCols: Int): Matrix
  def ones(numRows: Int, numCols: Int): Matrix
  def eye(n: Int): Matrix
  def speye(n: Int): Matrix
  def rand(numRows: Int, numCols: Int, rng: java.util.Random): Matrix
  def sprand(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): Matrix
  def randn(numRows: Int, numCols: Int, rng: java.util.Random): Matrix
  def sprandn(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): Matrix
  def diag(vector: Vector): Matrix
  def horzcat(matrices: Array[Matrix]): Matrix
  def vertcat(matrices: Array[Matrix]): Matrix
}

Matrix Operations

High-Performance Linear Algebra

All linear algebra operations are automatically optimized using native BLAS implementations when available. High-performance operations are accessed through the Vector and Matrix APIs:

  • Vector operations: dot(), argmax(), etc.
  • Matrix operations: multiply(), transpose(), etc.
  • Automatic optimization: The library automatically selects between pure Java and native BLAS based on data size and availability

Statistical Distributions

Multivariate Gaussian distribution with support for singular covariance matrices.

class MultivariateGaussian(mean: Vector, cov: Matrix) {
  def pdf(x: Vector): Double
  def logpdf(x: Vector): Double
}

Statistical Distributions

Core Types

// Base vector trait - users should not implement directly
sealed trait Vector extends Serializable

// Dense vector implementation
class DenseVector(val values: Array[Double]) extends Vector

object DenseVector {
  def unapply(dv: DenseVector): Option[Array[Double]]
}

// Sparse vector implementation  
class SparseVector(
  override val size: Int,
  val indices: Array[Int], 
  val values: Array[Double]
) extends Vector

object SparseVector {
  def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])]
}

// Base matrix trait
sealed trait Matrix extends Serializable

// Dense matrix in column-major order
class DenseMatrix(
  val numRows: Int,
  val numCols: Int, 
  val values: Array[Double],
  override val isTransposed: Boolean = false
) extends Matrix {
  // Alternative constructor
  def this(numRows: Int, numCols: Int, values: Array[Double]) = 
    this(numRows, numCols, values, false)
}

object DenseMatrix {
  def zeros(numRows: Int, numCols: Int): DenseMatrix
  def ones(numRows: Int, numCols: Int): DenseMatrix
  def eye(n: Int): DenseMatrix
  def rand(numRows: Int, numCols: Int, rng: java.util.Random): DenseMatrix
  def randn(numRows: Int, numCols: Int, rng: java.util.Random): DenseMatrix
  def diag(vector: Vector): DenseMatrix
  def unapply(dm: DenseMatrix): Option[(Int, Int, Array[Double], Boolean)]
}

// Sparse matrix in CSC format
class SparseMatrix(
  val numRows: Int,
  val numCols: Int,
  val colPtrs: Array[Int],
  val rowIndices: Array[Int], 
  val values: Array[Double],
  override val isTransposed: Boolean = false
) extends Matrix {
  // Alternative constructor
  def this(
    numRows: Int,
    numCols: Int,
    colPtrs: Array[Int], 
    rowIndices: Array[Int],
    values: Array[Double]
  ) = this(numRows, numCols, colPtrs, rowIndices, values, false)
}

object SparseMatrix {
  def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix
  def speye(n: Int): SparseMatrix
  def sprand(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): SparseMatrix
  def sprandn(numRows: Int, numCols: Int, density: Double, rng: java.util.Random): SparseMatrix
  def spdiag(vector: Vector): SparseMatrix
  def unapply(sm: SparseMatrix): Option[(Int, Int, Array[Int], Array[Int], Array[Double], Boolean)]
}