or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

classification.mdclustering.mdevaluation-tuning.mdfeature-engineering.mdindex.mdlinear-algebra.mdpipeline-components.mdrecommendation.mdregression.md
tile.json

evaluation-tuning.mddocs/

Evaluation and Tuning

Comprehensive model evaluation metrics and automated hyperparameter tuning capabilities for assessing model performance and optimizing ML pipelines.

Capabilities

Model Evaluators

Statistical metrics for assessing model performance across different machine learning tasks.

/**
 * Base evaluator abstraction
 */
abstract class Evaluator extends Params {
  def evaluate(dataset: Dataset[_]): Double
  def isLargerBetter: Boolean
}

/**
 * Binary classification evaluation metrics
 */
class BinaryClassificationEvaluator extends Evaluator {
  def setRawPredictionCol(value: String): this.type
  def setLabelCol(value: String): this.type
  def setMetricName(value: String): this.type
  def setWeightCol(value: String): this.type
  def setNumBins(value: Int): this.type
}

/**
 * Multiclass classification evaluation metrics
 */
class MulticlassClassificationEvaluator extends Evaluator {
  def setPredictionCol(value: String): this.type
  def setLabelCol(value: String): this.type
  def setMetricName(value: String): this.type
  def setWeightCol(value: String): this.type
  def setMetricLabel(value: Double): this.type
  def setProbabilityCol(value: String): this.type
  def setBeta(value: Double): this.type
  def setEps(value: Double): this.type
}

/**
 * Regression evaluation metrics
 */
class RegressionEvaluator extends Evaluator {
  def setPredictionCol(value: String): this.type
  def setLabelCol(value: String): this.type
  def setMetricName(value: String): this.type
  def setWeightCol(value: String): this.type
  def setThroughOrigin(value: Boolean): this.type
}

/**
 * Clustering evaluation metrics
 */
class ClusteringEvaluator extends Evaluator {
  def setPredictionCol(value: String): this.type
  def setFeaturesCol(value: String): this.type
  def setMetricName(value: String): this.type
  def setDistanceMeasure(value: String): this.type
  def setWeightCol(value: String): this.type
}

/**
 * Ranking evaluation metrics
 */
class RankingEvaluator extends Evaluator {
  def setPredictionCol(value: String): this.type
  def setLabelCol(value: String): this.type
  def setMetricName(value: String): this.type
  def setK(value: Int): this.type
}

Usage Example:

import org.apache.spark.ml.evaluation.{BinaryClassificationEvaluator, MulticlassClassificationEvaluator}

// Binary classification evaluation
val binaryEvaluator = new BinaryClassificationEvaluator()
  .setLabelCol("label")
  .setRawPredictionCol("rawPrediction")
  .setMetricName("areaUnderROC")

val auc = binaryEvaluator.evaluate(predictions)
println(s"Area under ROC = $auc")

// Multiclass classification evaluation
val multiclassEvaluator = new MulticlassClassificationEvaluator()
  .setLabelCol("label")
  .setPredictionCol("prediction")
  .setMetricName("accuracy")

val accuracy = multiclassEvaluator.evaluate(predictions)
println(s"Test set accuracy = $accuracy")

Hyperparameter Tuning

Automated model selection and hyperparameter optimization using cross-validation and grid search.

/**
 * Parameter grid builder for hyperparameter tuning
 */
class ParamGridBuilder {
  def addGrid(param: Param[_], values: Array[_]): this.type
  def baseOn(paramMap: ParamMap): this.type
  def baseOn(paramMaps: ParamMap*): this.type
  def build(): Array[ParamMap]
}

/**
 * K-fold cross-validation for model selection
 */
class CrossValidator extends Estimator[CrossValidatorModel] {
  def setEstimator(value: Estimator[_]): this.type
  def setEstimatorParamMaps(value: Array[ParamMap]): this.type
  def setEvaluator(value: Evaluator): this.type
  def setNumFolds(value: Int): this.type
  def setParallelism(value: Int): this.type
  def setCollectSubModels(value: Boolean): this.type
  def setSeed(value: Long): this.type
  def setFoldCol(value: String): this.type
}

class CrossValidatorModel extends Model[CrossValidatorModel] with CrossValidatorParams {
  def bestModel: Model[_]
  def avgMetrics: Array[Double]
  def stdMetrics: Array[Double]
  def subModels: Array[Array[Model[_]]]
  def hasSubModels: Boolean
}

/**
 * Train-validation split for model selection
 */
class TrainValidationSplit extends Estimator[TrainValidationSplitModel] {
  def setEstimator(value: Estimator[_]): this.type
  def setEstimatorParamMaps(value: Array[ParamMap]): this.type
  def setEvaluator(value: Evaluator): this.type
  def setTrainRatio(value: Double): this.type
  def setParallelism(value: Int): this.type
  def setCollectSubModels(value: Boolean): this.type
  def setSeed(value: Long): this.type
}

class TrainValidationSplitModel extends Model[TrainValidationSplitModel] with TrainValidationSplitParams {
  def bestModel: Model[_]
  def validationMetrics: Array[Double]
  def subModels: Array[Model[_]]
  def hasSubModels: Boolean
}

Usage Example:

import org.apache.spark.ml.tuning.{ParamGridBuilder, CrossValidator}
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator

// Create the model
val lr = new LogisticRegression()

// Create parameter grid
val paramGrid = new ParamGridBuilder()
  .addGrid(lr.regParam, Array(0.1, 0.01))
  .addGrid(lr.fitIntercept)
  .addGrid(lr.elasticNetParam, Array(0.0, 0.5, 1.0))
  .build()

// Create cross validator
val cv = new CrossValidator()
  .setEstimator(lr)
  .setEvaluator(new BinaryClassificationEvaluator)
  .setEstimatorParamMaps(paramGrid)
  .setNumFolds(3)
  .setParallelism(2)

// Run cross-validation and choose the best set of parameters
val cvModel = cv.fit(training)

// Make predictions on test data
val predictions = cvModel.transform(test)

Advanced Evaluation

Specialized evaluation methods for complex model assessment scenarios.

/**
 * Multilabel classification evaluation
 */
class MultilabelClassificationEvaluator extends Evaluator {
  def setPredictionCol(value: String): this.type
  def setLabelCol(value: String): this.type
  def setMetricName(value: String): this.type
}

/**
 * Recommendation system evaluation
 */
class RecommendationEvaluator extends Evaluator {
  def setPredictionCol(value: String): this.type
  def setLabelCol(value: String): this.type
  def setMetricName(value: String): this.type
  def setK(value: Int): this.type
  def setColdStartStrategy(value: String): this.type
}

Model Comparison and Statistical Tests

Tools for comparing model performance and conducting statistical significance tests.

/**
 * Statistical utilities for model comparison
 */
object ModelComparison {
  def compareModels(
    model1Metrics: Array[Double],
    model2Metrics: Array[Double]
  ): StatisticalTestResult
  
  def pairedTTest(
    differences: Array[Double],
    confidenceLevel: Double = 0.95
  ): TTestResult
}

case class StatisticalTestResult(
  pValue: Double,
  statistic: Double,
  confidenceInterval: (Double, Double),
  isSignificant: Boolean
)

case class TTestResult(
  pValue: Double,
  tStatistic: Double,
  degreesOfFreedom: Int,
  confidenceInterval: (Double, Double)
)

Pipeline Validation

Tools for validating entire ML pipelines and ensuring data consistency.

/**
 * Pipeline validation utilities
 */
object PipelineValidator {
  def validatePipeline(
    pipeline: Pipeline,
    dataset: Dataset[_]
  ): ValidationReport
  
  def checkDataLeakage(
    transformers: Array[Transformer],
    dataset: Dataset[_]
  ): LeakageReport
}

case class ValidationReport(
  isValid: Boolean,
  errors: Array[ValidationError],
  warnings: Array[ValidationWarning]
)

case class ValidationError(
  stage: String,
  message: String,
  severity: String
)

case class ValidationWarning(
  stage: String,
  message: String,
  recommendation: String
)

case class LeakageReport(
  hasLeakage: Boolean,
  suspiciousTransformers: Array[String],
  details: Map[String, String]
)

Custom Evaluation Metrics

Framework for creating custom evaluation metrics for specialized use cases.

/**
 * Base trait for custom evaluation metrics
 */
trait CustomEvaluator extends Evaluator {
  def computeMetric(predictions: DataFrame): Double
  def getMetricName: String
}

/**
 * Example custom evaluator implementation
 */
class CustomRegressionEvaluator extends CustomEvaluator {
  def setPredictionCol(value: String): this.type
  def setLabelCol(value: String): this.type
  def setCustomParams(params: Map[String, Any]): this.type
  
  def computeMetric(predictions: DataFrame): Double = {
    // Custom metric computation logic
    0.0
  }
  
  def getMetricName: String = "customMetric"
}

Evaluation Metrics Reference

Binary Classification Metrics

  • areaUnderROC: Area under the Receiver Operating Characteristic curve
  • areaUnderPR: Area under the Precision-Recall curve

Multiclass Classification Metrics

  • accuracy: Overall accuracy (correct predictions / total predictions)
  • weightedPrecision: Weighted precision across all classes
  • weightedRecall: Weighted recall across all classes
  • weightedFMeasure: Weighted F1-score across all classes
  • hammingLoss: Hamming loss for multilabel classification

Regression Metrics

  • rmse: Root Mean Squared Error
  • mse: Mean Squared Error
  • r2: R-squared (coefficient of determination)
  • mae: Mean Absolute Error
  • var: Explained variance

Clustering Metrics

  • silhouette: Silhouette coefficient
  • squaredEuclidean: Squared Euclidean distance

Ranking Metrics

  • meanAveragePrecision: Mean Average Precision
  • meanAveragePrecisionAtK: MAP at K
  • precisionAtK: Precision at K
  • recallAtK: Recall at K
  • ndcgAtK: Normalized Discounted Cumulative Gain at K

Types

// Evaluation and tuning imports
import org.apache.spark.ml.evaluation._
import org.apache.spark.ml.tuning._
import org.apache.spark.ml.param.{ParamMap, ParamGridBuilder}
import org.apache.spark.sql.{DataFrame, Dataset}

// Model selection types
import org.apache.spark.ml.tuning.{
  CrossValidator,
  CrossValidatorModel,
  TrainValidationSplit,
  TrainValidationSplitModel
}

// Evaluator types
import org.apache.spark.ml.evaluation.{
  BinaryClassificationEvaluator,
  MulticlassClassificationEvaluator,
  RegressionEvaluator,
  ClusteringEvaluator,
  RankingEvaluator
}