or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

classification.mdclustering.mdevaluation-tuning.mdfeature-engineering.mdindex.mdlinear-algebra.mdpipeline-components.mdrecommendation.mdregression.md
tile.json

linear-algebra.mddocs/

Linear Algebra

Distributed linear algebra operations and data structures optimized for large-scale numerical computations across cluster nodes.

Capabilities

Vector Operations

Core vector data structures and operations for representing feature vectors and model parameters.

/**
 * Abstract base class for vectors
 */
abstract class Vector extends Serializable {
  def size: Int
  def apply(i: Int): Double
  def copy: Vector
  def foreachActive(f: (Int, Double) => Unit): Unit
  def numActives: Int
  def numNonzeros: Int
  def toArray: Array[Double]
  def toSparse: SparseVector
  def toDense: DenseVector
  def compressed: Vector
  def argmax: Int
  def dot(v: Vector): Double
  def equals(other: Any): Boolean
  def hashCode(): Int
  def toString: String
}

/**
 * Dense vector implementation storing all values
 */
class DenseVector(val values: Array[Double]) extends Vector {
  def size: Int = values.length
  def apply(i: Int): Double = values(i)
  def copy: DenseVector = new DenseVector(values.clone())
  def update(i: Int, value: Double): Unit = values(i) = value
  def dot(other: Vector): Double
  def norm(p: Double): Double
}

/**
 * Sparse vector implementation storing only non-zero values
 */
class SparseVector(
  override val size: Int,
  val indices: Array[Int],
  val values: Array[Double]
) extends Vector {
  def apply(i: Int): Double = {
    val idx = java.util.Arrays.binarySearch(indices, i)
    if (idx >= 0) values(idx) else 0.0
  }
  def copy: SparseVector = new SparseVector(size, indices.clone(), values.clone())
  def dot(other: Vector): Double
  def norm(p: Double): Double
}

/**
 * Vector factory methods and utilities
 */
object Vectors {
  def dense(firstValue: Double, otherValues: Double*): DenseVector
  def dense(values: Array[Double]): DenseVector
  def sparse(size: Int, elements: Seq[(Int, Double)]): SparseVector
  def sparse(size: Int, indices: Array[Int], values: Array[Double]): SparseVector
  def zeros(size: Int): DenseVector
  def norm(vector: Vector, p: Double): Double
  def sqdist(v1: Vector, v2: Vector): Double
  def fromML(v: org.apache.spark.mllib.linalg.Vector): Vector
  def fromBreeze(bv: breeze.linalg.Vector[Double]): Vector
}

Usage Example:

import org.apache.spark.ml.linalg.{Vector, Vectors}

// Create dense vector
val denseVec = Vectors.dense(1.0, 2.0, 3.0, 4.0)
println(s"Dense vector: $denseVec")

// Create sparse vector
val sparseVec = Vectors.sparse(10, Array(0, 2, 9), Array(1.0, 3.0, 5.0))
println(s"Sparse vector: $sparseVec")

// Vector operations
val norm = Vectors.norm(denseVec, 2.0)
println(s"L2 norm: $norm")

val distance = Vectors.sqdist(denseVec, sparseVec.toDense)
println(s"Squared distance: $distance")

Matrix Operations

Matrix data structures and operations for representing datasets and model parameters.

/**
 * Abstract base class for matrices
 */
abstract class Matrix extends Serializable {
  def numRows: Int
  def numCols: Int
  def apply(i: Int, j: Int): Double
  def copy: Matrix
  def foreachActive(f: (Int, Int, Double) => Unit): Unit
  def numActives: Int
  def numNonzeros: Int
  def toArray: Array[Double]
  def isTransposed: Boolean
  def asML: org.apache.spark.mllib.linalg.Matrix
  def toSparse: SparseMatrix
  def toDense: DenseMatrix
  def transpose: Matrix
  def multiply(y: DenseVector): DenseVector
  def multiply(y: DenseMatrix): DenseMatrix
  def equals(other: Any): Boolean
  def hashCode(): Int
  def toString: String
}

/**
 * Dense matrix implementation storing all values in column-major order
 */
class DenseMatrix(
  val numRows: Int,
  val numCols: Int,
  val values: Array[Double],
  val isTransposed: Boolean = false
) extends Matrix {
  def apply(i: Int, j: Int): Double = {
    if (isTransposed) values(j * numRows + i)
    else values(i + j * numRows)
  }
  def copy: DenseMatrix = new DenseMatrix(numRows, numCols, values.clone(), isTransposed)
  def update(i: Int, j: Int, value: Double): Unit = {
    if (isTransposed) values(j * numRows + i) = value
    else values(i + j * numRows) = value
  }
}

/**
 * Sparse matrix implementation storing only non-zero values in compressed sparse column format
 */
class SparseMatrix(
  val numRows: Int, 
  val numCols: Int,
  val colPtrs: Array[Int],
  val rowIndices: Array[Int],
  val values: Array[Double],
  val isTransposed: Boolean = false
) extends Matrix {
  def apply(i: Int, j: Int): Double = {
    val startIdx = colPtrs(j)
    val endIdx = colPtrs(j + 1)
    val idx = java.util.Arrays.binarySearch(rowIndices, startIdx, endIdx, i)
    if (idx >= 0) values(idx) else 0.0
  }
  def copy: SparseMatrix = new SparseMatrix(
    numRows, numCols, colPtrs.clone(), rowIndices.clone(), values.clone(), isTransposed
  )
}

/**
 * Matrix factory methods and utilities
 */
object Matrices {
  def dense(numRows: Int, numCols: Int, values: Array[Double]): DenseMatrix
  def sparse(numRows: Int, numCols: Int, entries: Seq[(Int, Int, Double)]): SparseMatrix
  def sparse(
    numRows: Int,
    numCols: Int, 
    colPtrs: Array[Int],
    rowIndices: Array[Int],
    values: Array[Double]
  ): SparseMatrix
  def eye(n: Int): DenseMatrix
  def zeros(numRows: Int, numCols: Int): DenseMatrix
  def ones(numRows: Int, numCols: Int): DenseMatrix
  def diag(vector: Vector): DenseMatrix
  def rand(numRows: Int, numCols: Int, rng: Random): DenseMatrix
  def randn(numRows: Int, numCols: Int, rng: Random): DenseMatrix
  def horzcat(matrices: Array[Matrix]): Matrix
  def vertcat(matrices: Array[Matrix]): Matrix
  def fromML(m: org.apache.spark.mllib.linalg.Matrix): Matrix
  def fromBreeze(bm: breeze.linalg.Matrix[Double]): Matrix
}

Usage Example:

import org.apache.spark.ml.linalg.{Matrix, Matrices, Vectors}

// Create dense matrix
val denseMatrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))
println(s"Dense matrix:\n$denseMatrix")

// Create sparse matrix  
val sparseMatrix = Matrices.sparse(3, 2, Seq((0, 0, 9.0), (2, 1, 6.0)))
println(s"Sparse matrix:\n$sparseMatrix")

// Matrix operations
val identity = Matrices.eye(3)
val vector = Vectors.dense(1.0, 2.0, 3.0)
val result = identity.multiply(vector.toDense)
println(s"Matrix-vector multiplication: $result")

Advanced Linear Algebra Operations

Extended operations for complex numerical computations and transformations.

/**
 * BLAS (Basic Linear Algebra Subprograms) operations
 */
object BLAS {
  /**
   * Vector dot product: x^T * y
   */
  def dot(x: Vector, y: Vector): Double
  
  /**
   * Vector L2 norm: ||x||_2
   */
  def nrm2(x: Vector): Double
  
  /**
   * Scalar-vector multiplication: a * x
   */
  def scal(a: Double, x: Vector): Unit
  
  /**
   * Vector addition: y := a * x + y
   */
  def axpy(a: Double, x: Vector, y: Vector): Unit
  
  /**
   * Matrix-vector multiplication: y := alpha * A * x + beta * y
   */
  def gemv(
    alpha: Double, A: Matrix, x: Vector, beta: Double, y: Vector
  ): Unit
  
  /**
   * Matrix-matrix multiplication: C := alpha * A * B + beta * C
   */
  def gemm(
    alpha: Double, A: Matrix, B: Matrix, beta: Double, C: Matrix
  ): Unit
  
  /**
   * Symmetric matrix-vector multiplication
   */
  def symv(
    alpha: Double, A: Matrix, x: Vector, beta: Double, y: Vector
  ): Unit
  
  /**
   * Rank-1 update: A := alpha * x * y^T + A
   */
  def ger(alpha: Double, x: Vector, y: Vector, A: Matrix): Unit
  
  /**
   * Symmetric rank-1 update: A := alpha * x * x^T + A
   */
  def syr(alpha: Double, x: Vector, A: Matrix): Unit
}

/**
 * LAPACK (Linear Algebra Package) operations
 */
object LAPACK {
  /**
   * Cholesky decomposition
   */
  def potrf(A: DenseMatrix): Int
  
  /**
   * Solve linear system using Cholesky decomposition
   */
  def potrs(A: DenseMatrix, B: DenseMatrix): Int
  
  /**
   * QR decomposition
   */
  def geqrf(A: DenseMatrix, tau: Array[Double]): Int
  
  /**
   * Singular Value Decomposition
   */
  def gesvd(
    A: DenseMatrix,
    U: DenseMatrix,
    s: Array[Double],
    Vt: DenseMatrix
  ): Int
  
  /**
   * Eigenvalue decomposition
   */
  def syev(
    A: DenseMatrix,
    w: Array[Double]
  ): Int
}

Vector and Matrix Conversions

Utilities for converting between different vector and matrix representations.

/**
 * Conversion utilities between MLlib and ML linear algebra types
 */
object LinearAlgebraUtils {
  /**
   * Convert ML vector to MLlib vector
   */
  def toMLlib(v: org.apache.spark.ml.linalg.Vector): org.apache.spark.mllib.linalg.Vector
  
  /**
   * Convert MLlib vector to ML vector
   */
  def fromMLlib(v: org.apache.spark.mllib.linalg.Vector): org.apache.spark.ml.linalg.Vector
  
  /**
   * Convert ML matrix to MLlib matrix
   */
  def toMLlib(m: org.apache.spark.ml.linalg.Matrix): org.apache.spark.mllib.linalg.Matrix
  
  /**
   * Convert MLlib matrix to ML matrix
   */
  def fromMLlib(m: org.apache.spark.mllib.linalg.Matrix): org.apache.spark.ml.linalg.Matrix
  
  /**
   * Convert Breeze vector to ML vector
   */
  def fromBreeze(bv: breeze.linalg.Vector[Double]): Vector
  
  /**
   * Convert ML vector to Breeze vector
   */
  def toBreeze(v: Vector): breeze.linalg.Vector[Double]
  
  /**
   * Convert Breeze matrix to ML matrix
   */
  def fromBreeze(bm: breeze.linalg.Matrix[Double]): Matrix
  
  /**
   * Convert ML matrix to Breeze matrix
   */
  def toBreeze(m: Matrix): breeze.linalg.Matrix[Double]
}

Distributed Linear Algebra (Legacy MLlib)

Large-scale distributed matrix operations from the legacy RDD-based API.

/**
 * Base class for distributed matrices
 */
abstract class org.apache.spark.mllib.linalg.distributed.DistributedMatrix {
  def numRows(): Long
  def numCols(): Long
}

/**
 * Row-oriented distributed matrix
 */
class org.apache.spark.mllib.linalg.distributed.RowMatrix(
  val rows: RDD[org.apache.spark.mllib.linalg.Vector]
) extends DistributedMatrix {
  def computeColumnSummaryStatistics(): MultivariateStatisticalSummary
  def computeCovariance(): org.apache.spark.mllib.linalg.Matrix
  def computeGramianMatrix(): org.apache.spark.mllib.linalg.Matrix
  def computePrincipalComponents(k: Int): org.apache.spark.mllib.linalg.Matrix
  def computeSVD(
    k: Int,
    computeU: Boolean = false,
    rCond: Double = 1e-9
  ): SingularValueDecomposition[RowMatrix, org.apache.spark.mllib.linalg.Matrix]
  def multiply(B: org.apache.spark.mllib.linalg.Matrix): RowMatrix
  def columnSimilarities(): CoordinateMatrix
}

/**
 * Indexed row matrix for matrices with meaningful row indices
 */
class org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix(
  val rows: RDD[IndexedRow]
) extends DistributedMatrix {
  def toRowMatrix(): RowMatrix
  def toCoordinateMatrix(): CoordinateMatrix
  def toBlockMatrix(): BlockMatrix
  def multiply(B: org.apache.spark.mllib.linalg.Matrix): IndexedRowMatrix
  def computeGramianMatrix(): org.apache.spark.mllib.linalg.Matrix
}

/**
 * Coordinate matrix for matrices stored as (row, col, value) triplets
 */
class org.apache.spark.mllib.linalg.distributed.CoordinateMatrix(
  val entries: RDD[MatrixEntry]
) extends DistributedMatrix {
  def toRowMatrix(): RowMatrix
  def toIndexedRowMatrix(): IndexedRowMatrix
  def toBlockMatrix(): BlockMatrix
  def transpose(): CoordinateMatrix
}

/**
 * Block matrix for matrices partitioned into blocks
 */
class org.apache.spark.mllib.linalg.distributed.BlockMatrix(
  val blocks: RDD[((Int, Int), org.apache.spark.mllib.linalg.Matrix)],
  val rowsPerBlock: Int,
  val colsPerBlock: Int
) extends DistributedMatrix {
  def add(other: BlockMatrix): BlockMatrix
  def subtract(other: BlockMatrix): BlockMatrix
  def multiply(other: BlockMatrix): BlockMatrix
  def transpose: BlockMatrix
  def toLocalMatrix(): org.apache.spark.mllib.linalg.Matrix
  def toIndexedRowMatrix(): IndexedRowMatrix
  def toCoordinateMatrix(): CoordinateMatrix
}

Statistical Summary

Statistical operations on vectors and matrices for data analysis.

/**
 * Multivariate statistical summary
 */
trait MultivariateStatisticalSummary {
  def mean: org.apache.spark.mllib.linalg.Vector
  def variance: org.apache.spark.mllib.linalg.Vector
  def count: Long
  def numNonzeros: org.apache.spark.mllib.linalg.Vector
  def max: org.apache.spark.mllib.linalg.Vector
  def min: org.apache.spark.mllib.linalg.Vector
  def normL1: org.apache.spark.mllib.linalg.Vector
  def normL2: org.apache.spark.mllib.linalg.Vector
}

/**
 * Online multivariate summarizer for streaming statistics
 */
class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary {
  def add(sample: org.apache.spark.mllib.linalg.Vector): this.type
  def add(sample: org.apache.spark.mllib.linalg.Vector, weight: Double): this.type
  def merge(other: MultivariateOnlineSummarizer): this.type
}

Types

// Core linear algebra imports
import org.apache.spark.ml.linalg._

// Vector types
import org.apache.spark.ml.linalg.{Vector, DenseVector, SparseVector, Vectors}

// Matrix types
import org.apache.spark.ml.linalg.{Matrix, DenseMatrix, SparseMatrix, Matrices}

// BLAS and LAPACK operations
import org.apache.spark.ml.linalg.{BLAS, LAPACK}

// Legacy distributed linear algebra (from mllib)
import org.apache.spark.mllib.linalg.distributed._
import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, MultivariateOnlineSummarizer}

// Conversion utilities
import org.apache.spark.mllib.linalg.{Vector => OldVector, Matrix => OldMatrix}
import breeze.linalg.{Vector => BreezeVector, Matrix => BreezeMatrix}

// Supporting types
case class IndexedRow(index: Long, vector: org.apache.spark.mllib.linalg.Vector)
case class MatrixEntry(i: Long, j: Long, value: Double)
case class SingularValueDecomposition[RowType, MatrixType](
  U: RowType,
  s: org.apache.spark.mllib.linalg.Vector,
  V: MatrixType
)