CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-apache-spark--spark-mllib-local-2-11

Spark ML Local Library providing linear algebra and statistical utilities for local machine learning operations without requiring a distributed Spark cluster

Pending
Overview
Eval results
Files

vectors.mddocs/

Vector Operations

Core vector functionality providing dense and sparse representations with unified operations. Essential for feature representations and mathematical computations in machine learning applications.

Capabilities

Vector Creation

Create vectors using factory methods from the Vectors object.

/**
 * Factory methods for creating Vector instances
 */
object Vectors {
  /** Creates a dense vector from individual values */
  def dense(firstValue: Double, otherValues: Double*): Vector
  
  /** Creates a dense vector from an array */
  def dense(values: Array[Double]): Vector
  
  /** Creates a sparse vector from indices and values arrays */
  def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector
  
  /** Creates a sparse vector from sequence of (index, value) pairs */
  def sparse(size: Int, elements: Seq[(Int, Double)]): Vector
  
  /** Creates a sparse vector from Java collections */
  def sparse(size: Int, elements: java.lang.Iterable[(java.lang.Integer, java.lang.Double)]): Vector
  
  /** Creates a zero vector of specified size */
  def zeros(size: Int): Vector
}

Usage Examples:

import org.apache.spark.ml.linalg.Vectors

// Dense vectors
val dense1 = Vectors.dense(1.0, 2.0, 3.0)
val dense2 = Vectors.dense(Array(1.0, 2.0, 3.0, 4.0))
val zeros = Vectors.zeros(5)

// Sparse vectors
val sparse1 = Vectors.sparse(5, Array(0, 2, 4), Array(1.0, 3.0, 5.0))
val sparse2 = Vectors.sparse(4, Seq((0, 1.0), (3, 4.0)))

Vector Operations

Common operations available on all vector types.

/**
 * Base Vector trait with common operations
 */
trait Vector extends Serializable {
  /** Size of the vector */
  def size: Int
  
  /** Converts the vector to a double array */
  def toArray: Array[Double]
  
  /** Gets the value at index i */
  def apply(i: Int): Double
  
  /** Creates a deep copy of the vector */
  def copy: Vector
  
  /** Applies function to all active (non-zero) elements */
  def foreachActive(f: (Int, Double) => Unit): Unit
  
  /** Number of active (explicitly stored) entries */
  def numActives: Int
  
  /** Number of non-zero elements */
  def numNonzeros: Int
  
  /** Converts to sparse representation */
  def toSparse: SparseVector
  
  /** Converts to dense representation */
  def toDense: DenseVector
  
  /** Returns optimal representation (dense or sparse) */
  def compressed: Vector
  
  /** Returns index of maximum element, -1 if empty */
  def argmax: Int
}

Usage Examples:

import org.apache.spark.ml.linalg.Vectors

val vector = Vectors.dense(1.0, 0.0, 3.0, 0.0, 5.0)

// Basic operations
println(s"Size: ${vector.size}")           // Size: 5  
println(s"Element at index 2: ${vector(2)}")  // Element at index 2: 3.0
println(s"Non-zeros: ${vector.numNonzeros}")   // Non-zeros: 3
println(s"Max index: ${vector.argmax}")        // Max index: 4

// Conversions
val sparse = vector.toSparse
val dense = sparse.toDense
val compressed = vector.compressed

// Iterate over active elements
vector.foreachActive { (index, value) =>
  if (value != 0.0) println(s"[$index] = $value")
}

Dense Vectors

Dense vector implementation storing all elements in a contiguous array.

/**
 * Dense vector represented by a value array
 */
class DenseVector(val values: Array[Double]) extends Vector {
  /** The underlying array of values */
  val values: Array[Double]
}

object DenseVector {
  /** Extracts values array from dense vector for pattern matching */
  def unapply(dv: DenseVector): Option[Array[Double]]
}

Usage Examples:

import org.apache.spark.ml.linalg.{DenseVector, Vectors}

// Create dense vector
val dense = new DenseVector(Array(1.0, 2.0, 3.0))
val dense2 = Vectors.dense(1.0, 2.0, 3.0).asInstanceOf[DenseVector]

// Access underlying array
val array = dense.values
println(array.mkString("[", ", ", "]"))  // [1.0, 2.0, 3.0]

// Pattern matching
dense match {
  case DenseVector(values) => println(s"Dense with ${values.length} elements")
  case _ => println("Not a dense vector")
}

Sparse Vectors

Sparse vector implementation storing only non-zero elements with separate index and value arrays.

/**
 * Sparse vector represented by index and value arrays
 */
class SparseVector(
  override val size: Int,
  val indices: Array[Int],
  val values: Array[Double]
) extends Vector {
  /** Size of the vector */
  override val size: Int
  
  /** Indices of non-zero elements (strictly increasing) */
  val indices: Array[Int]
  
  /** Values corresponding to the indices */
  val values: Array[Double]
}

object SparseVector {
  /** Extracts size, indices, and values for pattern matching */
  def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])]
}

Usage Examples:

import org.apache.spark.ml.linalg.{SparseVector, Vectors}

// Create sparse vector: [1.0, 0.0, 3.0, 0.0, 5.0]
val sparse = new SparseVector(5, Array(0, 2, 4), Array(1.0, 3.0, 5.0))
val sparse2 = Vectors.sparse(5, Array(0, 2, 4), Array(1.0, 3.0, 5.0)).asInstanceOf[SparseVector]

// Access components
println(s"Size: ${sparse.size}")                    // Size: 5
println(s"Indices: ${sparse.indices.mkString(", ")}")   // Indices: 0, 2, 4
println(s"Values: ${sparse.values.mkString(", ")}")     // Values: 1.0, 3.0, 5.0

// Pattern matching
sparse match {
  case SparseVector(size, indices, values) => 
    println(s"Sparse vector of size $size with ${indices.length} non-zero elements")
  case _ => println("Not a sparse vector")
}

// Convert to dense array
val array = sparse.toArray
println(array.mkString("[", ", ", "]"))  // [1.0, 0.0, 3.0, 0.0, 5.0]

Vector Utility Functions

Utility functions for vector computations.

object Vectors {
  /** Computes the p-norm of a vector */
  def norm(vector: Vector, p: Double): Double
  
  /** Computes squared Euclidean distance between two vectors */
  def sqdist(v1: Vector, v2: Vector): Double
}

Usage Examples:

import org.apache.spark.ml.linalg.Vectors

val v1 = Vectors.dense(1.0, 2.0, 3.0)
val v2 = Vectors.dense(4.0, 5.0, 6.0)

// Compute norms
val l1Norm = Vectors.norm(v1, 1.0)        // L1 norm: 6.0
val l2Norm = Vectors.norm(v1, 2.0)        // L2 norm: √14 ≈ 3.74
val infNorm = Vectors.norm(v1, Double.PositiveInfinity)  // L∞ norm: 3.0

// Compute squared distance
val sqDist = Vectors.sqdist(v1, v2)       // (4-1)² + (5-2)² + (6-3)² = 27.0

Types

// Core vector types
sealed trait Vector extends Serializable
class DenseVector(val values: Array[Double]) extends Vector
class SparseVector(override val size: Int, val indices: Array[Int], val values: Array[Double]) extends Vector

Install with Tessl CLI

npx tessl i tessl/maven-org-apache-spark--spark-mllib-local-2-11

docs

distributions.md

index.md

matrices.md

testing.md

vectors.md

tile.json