Vectors are the fundamental building blocks for machine learning operations. Spark MLlib Local provides both dense and sparse vector implementations with automatic format optimization and comprehensive operations.
Create dense vectors from arrays or individual values.
object Vectors {
def dense(firstValue: Double, otherValues: Double*): Vector
def dense(values: Array[Double]): Vector
def zeros(size: Int): Vector
}Usage examples:
import org.apache.spark.ml.linalg.Vectors
// From individual values
val vec1 = Vectors.dense(1.0, 2.0, 3.0, 4.0)
// From array
val values = Array(1.0, 2.0, 3.0, 4.0)
val vec2 = Vectors.dense(values)
// Zero vector
val zeroVec = Vectors.zeros(4)Create sparse vectors efficiently when most elements are zero.
object Vectors {
def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector
def sparse(size: Int, elements: Seq[(Int, Double)]): Vector
def sparse(size: Int, elements: java.lang.Iterable[(java.lang.Integer, java.lang.Double)]): Vector
}Usage examples:
import org.apache.spark.ml.linalg.Vectors
// From indices and values arrays
val sparseVec1 = Vectors.sparse(4, Array(0, 2), Array(1.0, 3.0))
// From sequence of (index, value) pairs
val sparseVec2 = Vectors.sparse(4, Seq((0, 1.0), (2, 3.0)))
// Represents vector [1.0, 0.0, 3.0, 0.0]Access vector properties and elements.
trait Vector {
def size: Int
def apply(i: Int): Double
def toArray: Array[Double]
def copy: Vector
def numActives: Int
def numNonzeros: Int
}Usage examples:
val vec = Vectors.dense(1.0, 0.0, 3.0, 4.0)
println(vec.size) // 4
println(vec(2)) // 3.0
println(vec.numNonzeros) // 3
val arr = vec.toArray // Array(1.0, 0.0, 3.0, 4.0)
val copied = vec.copy // Deep copyConvert between dense and sparse formats with automatic optimization.
trait Vector {
def toDense: DenseVector
def toSparse: SparseVector
def compressed: Vector
}Usage examples:
val dense = Vectors.dense(1.0, 0.0, 0.0, 4.0)
val sparse = dense.toSparse // Convert to sparse format
val optimized = dense.compressed // Choose optimal format automatically
val backToDense = sparse.toDense // Convert back to densePerform common mathematical operations on vectors.
trait Vector {
def dot(v: Vector): Double
def argmax: Int
}
object Vectors {
def norm(vector: Vector, p: Double): Double
def sqdist(v1: Vector, v2: Vector): Double
}Usage examples:
val vec1 = Vectors.dense(1.0, 2.0, 3.0)
val vec2 = Vectors.dense(4.0, 5.0, 6.0)
// Dot product
val dotResult = vec1.dot(vec2) // 32.0
// Maximum element index
val maxIdx = vec1.argmax // 2 (index of element 3.0)
// Vector norms
val l2Norm = Vectors.norm(vec1, 2.0) // L2 norm
val l1Norm = Vectors.norm(vec1, 1.0) // L1 norm
val infNorm = Vectors.norm(vec1, Double.PositiveInfinity) // Max norm
// Squared distance
val dist = Vectors.sqdist(vec1, vec2) // 27.0Apply functions to vector elements.
trait Vector {
def foreachActive(f: (Int, Double) => Unit): Unit
}Usage examples:
val vec = Vectors.sparse(4, Array(0, 2), Array(1.0, 3.0))
// Process only non-zero elements (efficient for sparse vectors)
vec.foreachActive { (index, value) =>
println(s"vec($index) = $value")
}
// Output:
// vec(0) = 1.0
// vec(2) = 3.0Dense vectors store all elements in a contiguous array.
class DenseVector(val values: Array[Double]) extends Vector {
// Inherits all Vector methods
// Direct access to underlying array via 'values' field
}
object DenseVector {
def unapply(dv: DenseVector): Option[Array[Double]]
}Sparse vectors store only non-zero elements using index and value arrays.
class SparseVector(
override val size: Int,
val indices: Array[Int],
val values: Array[Double]
) extends Vector {
// Inherits all Vector methods
// Direct access to indices and values arrays
}
object SparseVector {
def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])]
}Usage examples:
// Pattern matching on vector types
val vec: Vector = Vectors.dense(1.0, 2.0, 3.0)
vec match {
case DenseVector(values) =>
println(s"Dense vector with ${values.length} elements")
case SparseVector(size, indices, values) =>
println(s"Sparse vector of size $size with ${values.length} non-zeros")
}
// Direct field access
val sparseVec = Vectors.sparse(5, Array(1, 3), Array(2.0, 4.0)).asInstanceOf[SparseVector]
println(sparseVec.size) // 5
println(sparseVec.indices.mkString(",")) // "1,3"
println(sparseVec.values.mkString(",")) // "2.0,4.0"compressed method to automatically choose the most efficient format