or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

classification.mdclustering.mdevaluation-tuning.mdfeature-engineering.mdindex.mdlinear-algebra.mdpipeline-components.mdrecommendation.mdregression.md
tile.json

regression.mddocs/

Regression

Supervised learning algorithms for predicting continuous numerical values, including linear models, tree-based methods, and survival analysis with comprehensive residual analysis.

Capabilities

Linear Regression

Linear regression algorithm with L1/L2 regularization and comprehensive statistical summaries.

/**
 * Linear regression with regularization support
 */
class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegressionModel] {
  def setMaxIter(value: Int): this.type
  def setRegParam(value: Double): this.type
  def setElasticNetParam(value: Double): this.type
  def setTol(value: Double): this.type
  def setFitIntercept(value: Boolean): this.type
  def setStandardization(value: Boolean): this.type
  def setWeightCol(value: String): this.type
  def setSolver(value: String): this.type
  def setAggregationDepth(value: Int): this.type
  def setLoss(value: String): this.type
  def setEpsilon(value: Double): this.type
}

class LinearRegressionModel extends RegressionModel[Vector, LinearRegressionModel] with LinearRegressionParams {
  def coefficients: Vector
  def intercept: Double
  def scale: Double
  def summary: LinearRegressionTrainingSummary
  def hasSummary: Boolean
  def evaluate(dataset: Dataset[_]): LinearRegressionSummary
}

class LinearRegressionSummary {
  def predictions: DataFrame
  def predictionCol: String
  def labelCol: String
  def featuresCol: String
  def explainedVariance: Double
  def meanAbsoluteError: Double
  def meanSquaredError: Double
  def rootMeanSquaredError: Double
  def r2: Double
  def residuals: DataFrame
}

class LinearRegressionTrainingSummary extends LinearRegressionSummary {
  def totalIterations: Int
  def objectiveHistory: Array[Double]
  def devianceResiduals: Array[Double]
  def coefficientStandardErrors: Array[Double]
  def tValues: Array[Double]
  def pValues: Array[Double]
}

Usage Example:

import org.apache.spark.ml.regression.LinearRegression

val lr = new LinearRegression()
  .setMaxIter(20)
  .setRegParam(0.3)
  .setElasticNetParam(0.8)

val lrModel = lr.fit(trainingData)
val predictions = lrModel.transform(testData)

// Print coefficients and intercept
println(s"Coefficients: ${lrModel.coefficients}")
println(s"Intercept: ${lrModel.intercept}")

// Summarize the model over the training set
val trainingSummary = lrModel.summary
println(s"RMSE: ${trainingSummary.rootMeanSquaredError}")
println(s"R2: ${trainingSummary.r2}")

Generalized Linear Regression

Generalized linear models supporting various exponential family distributions and link functions.

/**
 * Generalized Linear Regression with multiple family distributions
 */
class GeneralizedLinearRegression extends Regressor[Vector, GeneralizedLinearRegression, GeneralizedLinearRegressionModel] {
  def setFamily(value: String): this.type
  def setLink(value: String): this.type
  def setFitIntercept(value: Boolean): this.type
  def setMaxIter(value: Int): this.type
  def setTol(value: Double): this.type
  def setRegParam(value: Double): this.type
  def setWeightCol(value: String): this.type
  def setSolver(value: String): this.type
  def setLinkPredictionCol(value: String): this.type
  def setVariancePower(value: Double): this.type
  def setLinkPower(value: Double): this.type
  def setOffsetCol(value: String): this.type
}

class GeneralizedLinearRegressionModel extends RegressionModel[Vector, GeneralizedLinearRegressionModel] with GeneralizedLinearRegressionParams {
  def coefficients: Vector
  def intercept: Double
  def summary: GeneralizedLinearRegressionTrainingSummary
  def hasSummary: Boolean
  def evaluate(dataset: Dataset[_]): GeneralizedLinearRegressionSummary
}

class GeneralizedLinearRegressionSummary {
  def predictions: DataFrame
  def predictionCol: String
  def labelCol: String
  def featuresCol: String
  def rank: Long
  def degreesOfFreedom: Long
  def residualDegreeOfFreedom: Long
  def residualDegreeOfFreedomNull: Long
  def aic: Double
  def deviance: Double
  def nullDeviance: Double
  def dispersion: Double
}

Decision Tree Regressor

Tree-based regression algorithm using recursive binary splits for continuous target variables.

/**
 * Decision tree regressor with configurable tree parameters
 */
class DecisionTreeRegressor extends Regressor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel] {
  def setMaxDepth(value: Int): this.type
  def setMaxBins(value: Int): this.type
  def setMinInstancesPerNode(value: Int): this.type
  def setMinInfoGain(value: Double): this.type
  def setMaxMemoryInMB(value: Int): this.type
  def setCacheNodeIds(value: Boolean): this.type
  def setCheckpointInterval(value: Int): this.type
  def setImpurity(value: String): this.type
  def setSeed(value: Long): this.type
  def setVarianceCol(value: String): this.type
}

class DecisionTreeRegressionModel extends RegressionModel[Vector, DecisionTreeRegressionModel] with DecisionTreeRegressorParams {
  def rootNode: Node
  def depth: Int
  def numNodes: Int
  def toDebugString: String
  def featureImportances: Vector
}

Random Forest Regressor

Ensemble regression method combining multiple decision trees with bootstrap aggregating.

/**
 * Random Forest regressor using ensemble of decision trees
 */
class RandomForestRegressor extends Regressor[Vector, RandomForestRegressor, RandomForestRegressionModel] {
  def setNumTrees(value: Int): this.type
  def setMaxDepth(value: Int): this.type
  def setMaxBins(value: Int): this.type
  def setMinInstancesPerNode(value: Int): this.type
  def setMinInfoGain(value: Double): this.type
  def setMaxMemoryInMB(value: Int): this.type
  def setCacheNodeIds(value: Boolean): this.type
  def setCheckpointInterval(value: Int): this.type
  def setImpurity(value: String): this.type
  def setSubsamplingRate(value: Double): this.type
  def setSeed(value: Long): this.type
  def setFeatureSubsetStrategy(value: String): this.type
}

class RandomForestRegressionModel extends RegressionModel[Vector, RandomForestRegressionModel] with RandomForestRegressorParams {
  def trees: Array[DecisionTreeRegressionModel]
  def treeWeights: Array[Double]
  def numFeatures: Int
  def totalNumNodes: Int
  def toDebugString: String
  def featureImportances: Vector
}

Gradient Boosted Tree Regressor

Sequential ensemble method where each tree corrects errors from previous trees.

/**
 * Gradient-boosted tree regressor
 */
class GBTRegressor extends Regressor[Vector, GBTRegressor, GBTRegressionModel] {
  def setLossType(value: String): this.type
  def setMaxIter(value: Int): this.type
  def setStepSize(value: Double): this.type
  def setMaxDepth(value: Int): this.type
  def setMaxBins(value: Int): this.type
  def setMinInstancesPerNode(value: Int): this.type
  def setMinInfoGain(value: Double): this.type
  def setMaxMemoryInMB(value: Int): this.type
  def setCacheNodeIds(value: Boolean): this.type
  def setCheckpointInterval(value: Int): this.type
  def setImpurity(value: String): this.type
  def setSubsamplingRate(value: Double): this.type
  def setSeed(value: Long): this.type
  def setFeatureSubsetStrategy(value: String): this.type
  def setValidationTol(value: Double): this.type
  def setValidationIndicatorCol(value: String): this.type
}

class GBTRegressionModel extends RegressionModel[Vector, GBTRegressionModel] with GBTRegressorParams {
  def trees: Array[DecisionTreeRegressionModel]
  def treeWeights: Array[Double]
  def numFeatures: Int
  def totalNumNodes: Int
  def toDebugString: String
  def featureImportances: Vector
}

Isotonic Regression

Non-parametric regression that fits a monotonic function to the data.

/**
 * Isotonic regression for monotonic relationships
 */
class IsotonicRegression extends Regressor[Double, IsotonicRegression, IsotonicRegressionModel] {
  def setIsotonic(value: Boolean): this.type
  def setFeatureIndex(value: Int): this.type
  def setWeightCol(value: String): this.type
}

class IsotonicRegressionModel extends RegressionModel[Double, IsotonicRegressionModel] with IsotonicRegressionParams {
  def boundaries: Vector
  def predictions: Vector
  def numFeatures: Int
}

Survival Regression

Accelerated failure time model for survival analysis with censored data.

/**
 * Accelerated Failure Time survival regression
 */
class AFTSurvivalRegression extends Regressor[Vector, AFTSurvivalRegression, AFTSurvivalRegressionModel] {
  def setCensorCol(value: String): this.type
  def setQuantileProbabilities(value: Array[Double]): this.type
  def setQuantilesCol(value: String): this.type
  def setMaxIter(value: Int): this.type
  def setTol(value: Double): this.type
  def setFitIntercept(value: Boolean): this.type
  def setAggregationDepth(value: Int): this.type
}

class AFTSurvivalRegressionModel extends RegressionModel[Vector, AFTSurvivalRegressionModel] with AFTSurvivalRegressionParams {
  def coefficients: Vector
  def intercept: Double
  def scale: Double
  def predictQuantiles(features: Vector): Vector
}

Factorization Machine Regressor

Factorization machine for regression tasks modeling feature interactions.

/**
 * Factorization Machine regressor
 */
class FMRegressor extends Regressor[Vector, FMRegressor, FMRegressionModel] {
  def setFactorSize(value: Int): this.type
  def setFitIntercept(value: Boolean): this.type
  def setFitLinear(value: Boolean): this.type
  def setRegParam(value: Double): this.type
  def setMiniBatchFraction(value: Double): this.type
  def setInitStd(value: Double): this.type
  def setMaxIter(value: Int): this.type
  def setStepSize(value: Double): this.type
  def setTol(value: Double): this.type
  def setSolver(value: String): this.type
  def setSeed(value: Long): this.type
}

class FMRegressionModel extends RegressionModel[Vector, FMRegressionModel] with FMRegressorParams {
  def intercept: Double
  def linear: Vector
  def factors: Matrix
}

Shared Regression Components

Base Classes and Traits

/**
 * Base regressor abstraction
 */
abstract class Regressor[
  FeaturesType,
  E <: Regressor[FeaturesType, E, M],
  M <: RegressionModel[FeaturesType, M]
] extends Estimator[M] with RegressorParams {
  def fit(dataset: Dataset[_]): M
}

/**
 * Base regression model
 */
abstract class RegressionModel[FeaturesType, M <: RegressionModel[FeaturesType, M]]
  extends Model[M] with RegressionParams {
  def predict(features: FeaturesType): Double
}

Types

// Regression-specific imports
import org.apache.spark.ml.regression._
import org.apache.spark.ml.linalg.{Vector, Matrix}
import org.apache.spark.sql.{DataFrame, Dataset}

// Parameter traits
import org.apache.spark.ml.param.shared._

// Model summary types
import org.apache.spark.ml.regression.{
  LinearRegressionSummary,
  LinearRegressionTrainingSummary,
  GeneralizedLinearRegressionSummary,
  GeneralizedLinearRegressionTrainingSummary
}

// Tree model components (shared with classification)
import org.apache.spark.ml.tree.{Node, InternalNode, LeafNode}