Spark ML Local Library providing linear algebra and statistical utilities for local machine learning operations without requiring a distributed Spark cluster
—
Numerical testing utilities with tolerance-based comparisons for vectors, matrices, and doubles. Essential for testing numerical algorithms where exact equality is impractical due to floating-point precision.
Enhanced comparison operators for double values with absolute and relative tolerance support.
object TestingUtils {
/** Helper case class for double comparisons */
case class CompareDoubleRightSide(
fun: (Double, Double, Double) => Boolean,
y: Double,
eps: Double,
method: String
)
/**
* Implicit class adding tolerance-based comparison operators to Double values
*/
implicit class DoubleWithAlmostEquals(val x: Double) {
/** Returns true if values are within tolerance */
def ~=(r: CompareDoubleRightSide): Boolean
/** Returns true if values are NOT within tolerance */
def !~=(r: CompareDoubleRightSide): Boolean
/** Throws exception if values are NOT within tolerance, otherwise returns true */
def ~==(r: CompareDoubleRightSide): Boolean
/** Throws exception if values ARE within tolerance, otherwise returns true */
def !~==(r: CompareDoubleRightSide): Boolean
/** Creates absolute tolerance comparison */
def absTol(eps: Double): CompareDoubleRightSide
/** Creates relative tolerance comparison */
def relTol(eps: Double): CompareDoubleRightSide
}
/** Message constants */
val ABS_TOL_MSG: String = " using absolute tolerance"
val REL_TOL_MSG: String = " using relative tolerance"
}Usage Examples:
import org.apache.spark.ml.util.TestingUtils._
val x = 1.0000001
val y = 1.0000002
// Absolute tolerance comparison
val closeEnough = x ~= y absTol 1e-6
println(s"Close enough with absolute tolerance: $closeEnough") // true
val tooFar = x ~= y absTol 1e-8
println(s"Close enough with stricter tolerance: $tooFar") // false
// Relative tolerance comparison
val relativelyClose = x ~= y relTol 1e-6
println(s"Relatively close: $relativelyClose") // true
// Assertion-style comparisons (throw exceptions on failure)
try {
x ~== y absTol 1e-6 // Passes
println("Absolute tolerance assertion passed")
} catch {
case e: org.scalatest.exceptions.TestFailedException =>
println(s"Assertion failed: ${e.getMessage}")
}
try {
x ~== y absTol 1e-8 // Fails and throws exception
} catch {
case e: org.scalatest.exceptions.TestFailedException =>
println(s"Strict assertion failed: ${e.getMessage}")
}
// Negation comparisons
val notClose = x !~= y absTol 1e-8
println(s"Not close with strict tolerance: $notClose") // trueTolerance-based comparisons for Vector objects, comparing element-wise with specified tolerance.
object TestingUtils {
/** Helper case class for vector comparisons */
case class CompareVectorRightSide(
fun: (Vector, Vector, Double) => Boolean,
y: Vector,
eps: Double,
method: String
)
/**
* Implicit class adding tolerance-based comparison operators to Vector values
*/
implicit class VectorWithAlmostEquals(val x: Vector) {
/** Returns true if all vector elements are within tolerance */
def ~=(r: CompareVectorRightSide): Boolean
/** Returns true if any vector elements are NOT within tolerance */
def !~=(r: CompareVectorRightSide): Boolean
/** Throws exception if any elements are NOT within tolerance, otherwise returns true */
def ~==(r: CompareVectorRightSide): Boolean
/** Throws exception if all elements ARE within tolerance, otherwise returns true */
def !~==(r: CompareVectorRightSide): Boolean
/** Creates absolute tolerance comparison for vectors */
def absTol(eps: Double): CompareVectorRightSide
/** Creates relative tolerance comparison for vectors */
def relTol(eps: Double): CompareVectorRightSide
}
}Usage Examples:
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.util.TestingUtils._
val v1 = Vectors.dense(1.0, 2.0, 3.0)
val v2 = Vectors.dense(1.000001, 1.999999, 3.000001)
val v3 = Vectors.dense(1.1, 2.1, 3.1)
// Absolute tolerance comparison
val closeVectors = v1 ~= v2 absTol 1e-5
println(s"Vectors close with absolute tolerance: $closeVectors") // true
val farVectors = v1 ~= v3 absTol 1e-5
println(s"Vectors close with strict tolerance: $farVectors") // false
// Relative tolerance comparison
val relativelyCloseVectors = v1 ~= v2 relTol 1e-5
println(s"Vectors relatively close: $relativelyCloseVectors") // true
// Assertion-style comparisons
try {
v1 ~== v2 absTol 1e-5 // Passes
println("Vector absolute tolerance assertion passed")
} catch {
case e: org.scalatest.exceptions.TestFailedException =>
println(s"Vector assertion failed: ${e.getMessage}")
}
// Working with sparse vectors
val sparse1 = Vectors.sparse(5, Array(0, 2, 4), Array(1.0, 3.0, 5.0))
val sparse2 = Vectors.sparse(5, Array(0, 2, 4), Array(1.000001, 2.999999, 5.000001))
val sparsesClose = sparse1 ~= sparse2 absTol 1e-5
println(s"Sparse vectors close: $sparsesClose") // true
// Mixed dense/sparse comparison
val dense = Vectors.dense(1.0, 0.0, 3.0, 0.0, 5.0)
val mixedClose = dense ~= sparse1 absTol 1e-10
println(s"Dense and sparse vectors close: $mixedClose") // trueTolerance-based comparisons for Matrix objects, comparing element-wise with specified tolerance.
object TestingUtils {
/** Helper case class for matrix comparisons */
case class CompareMatrixRightSide(
fun: (Matrix, Matrix, Double) => Boolean,
y: Matrix,
eps: Double,
method: String
)
/**
* Implicit class adding tolerance-based comparison operators to Matrix values
*/
implicit class MatrixWithAlmostEquals(val x: Matrix) {
/** Returns true if all matrix elements are within tolerance */
def ~=(r: CompareMatrixRightSide): Boolean
/** Returns true if any matrix elements are NOT within tolerance */
def !~=(r: CompareMatrixRightSide): Boolean
/** Throws exception if any elements are NOT within tolerance, otherwise returns true */
def ~==(r: CompareMatrixRightSide): Boolean
/** Throws exception if all elements ARE within tolerance, otherwise returns true */
def !~==(r: CompareMatrixRightSide): Boolean
/** Creates absolute tolerance comparison for matrices */
def absTol(eps: Double): CompareMatrixRightSide
/** Creates relative tolerance comparison for matrices */
def relTol(eps: Double): CompareMatrixRightSide
}
}Usage Examples:
import org.apache.spark.ml.linalg.Matrices
import org.apache.spark.ml.util.TestingUtils._
val m1 = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0))
val m2 = Matrices.dense(2, 2, Array(1.000001, 1.999999, 3.000001, 3.999999))
val m3 = Matrices.dense(2, 2, Array(1.1, 2.1, 3.1, 4.1))
// Absolute tolerance comparison
val closeMatrices = m1 ~= m2 absTol 1e-5
println(s"Matrices close with absolute tolerance: $closeMatrices") // true
val farMatrices = m1 ~= m3 absTol 1e-5
println(s"Matrices close with strict tolerance: $farMatrices") // false
// Relative tolerance comparison
val relativelyCloseMatrices = m1 ~= m2 relTol 1e-5
println(s"Matrices relatively close: $relativelyCloseMatrices") // true
// Assertion-style comparisons
try {
m1 ~== m2 absTol 1e-5 // Passes
println("Matrix absolute tolerance assertion passed")
} catch {
case e: org.scalatest.exceptions.TestFailedException =>
println(s"Matrix assertion failed: ${e.getMessage}")
}
// Working with sparse matrices
val sparse1 = Matrices.sparse(3, 3, Array(0, 1, 2, 3), Array(0, 1, 2), Array(1.0, 2.0, 3.0))
val sparse2 = Matrices.sparse(3, 3, Array(0, 1, 2, 3), Array(0, 1, 2), Array(1.000001, 1.999999, 3.000001))
val sparsesClose = sparse1 ~= sparse2 absTol 1e-5
println(s"Sparse matrices close: $sparsesClose") // true
// Mixed dense/sparse comparison
val denseEquiv = Matrices.dense(3, 3, Array(1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0))
val mixedClose = denseEquiv ~= sparse1 absTol 1e-10
println(s"Dense and sparse matrices close: $mixedClose") // trueimport org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.util.TestingUtils._
def testVectorOperations(): Unit = {
val v1 = Vectors.dense(1.0, 2.0, 3.0)
val v2 = Vectors.dense(4.0, 5.0, 6.0)
val expected = 32.0 // 1*4 + 2*5 + 3*6
// Calculate dot product using vector operations
val result = v1.toArray.zip(v2.toArray).map{ case (a, b) => a * b }.sum
// Test with tolerance
assert(result ~== expected absTol 1e-10)
println("Vector dot product test passed")
}
def testVectorNorms(): Unit = {
val vector = Vectors.dense(3.0, 4.0)
val expectedL2Norm = 5.0 // sqrt(3^2 + 4^2)
val l2Norm = Vectors.norm(vector, 2.0)
// Test with tolerance
assert(l2Norm ~== expectedL2Norm absTol 1e-10)
println("Vector norm test passed")
}
testVectorOperations()
testVectorNorms()import org.apache.spark.ml.linalg.Matrices
import org.apache.spark.ml.util.TestingUtils._
def testMatrixMultiplication(): Unit = {
val A = Matrices.dense(2, 2, Array(1.0, 2.0, 3.0, 4.0)) // [[1,3],[2,4]]
val B = Matrices.dense(2, 2, Array(5.0, 6.0, 7.0, 8.0)) // [[5,7],[6,8]]
val expected = Matrices.dense(2, 2, Array(19.0, 22.0, 43.0, 50.0))
val result = A.multiply(B)
// Test with tolerance
assert(result ~== expected absTol 1e-10)
println("Matrix multiplication test passed")
}
testMatrixMultiplication()import org.apache.spark.ml.util.TestingUtils._
// For values near zero, use absolute tolerance
val nearZero1 = 1e-10
val nearZero2 = 2e-10
assert(nearZero1 ~== nearZero2 absTol 1e-9) // Good
// nearZero1 ~== nearZero2 relTol 1e-9 // Would throw exception - relative tolerance meaningless
// For larger values, relative tolerance is often better
val large1 = 1e6
val large2 = 1.000001e6
assert(large1 ~== large2 relTol 1e-5) // Good
// large1 ~== large2 absTol 1e-5 // Would fail - absolute difference is 1.0import org.apache.spark.ml.util.TestingUtils._
val nan1 = Double.NaN
val nan2 = Double.NaN
// NaN values are considered equal in tolerance comparisons
assert(nan1 ~== nan2 absTol 1e-10) // Passes
assert(nan1 ~== nan2 relTol 1e-10) // Passes
println("NaN comparison tests passed")// Helper case classes for comparison operations
case class CompareDoubleRightSide(
fun: (Double, Double, Double) => Boolean,
y: Double,
eps: Double,
method: String
)
case class CompareVectorRightSide(
fun: (Vector, Vector, Double) => Boolean,
y: Vector,
eps: Double,
method: String
)
case class CompareMatrixRightSide(
fun: (Matrix, Matrix, Double) => Boolean,
y: Matrix,
eps: Double,
method: String
)
// Implicit classes that add comparison operators
implicit class DoubleWithAlmostEquals(val x: Double)
implicit class VectorWithAlmostEquals(val x: Vector)
implicit class MatrixWithAlmostEquals(val x: Matrix)Install with Tessl CLI
npx tessl i tessl/maven-org-apache-spark--spark-mllib-local-2-11