or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

classification.mdclustering.mdevaluation-tuning.mdfeature-engineering.mdindex.mdlinear-algebra.mdpipeline-components.mdrecommendation.mdregression.md

regression.mddocs/

0

# Regression

1

2

Supervised learning algorithms for predicting continuous numerical values, including linear models, tree-based methods, and survival analysis with comprehensive residual analysis.

3

4

## Capabilities

5

6

### Linear Regression

7

8

Linear regression algorithm with L1/L2 regularization and comprehensive statistical summaries.

9

10

```scala { .api }

11

/**

12

* Linear regression with regularization support

13

*/

14

class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegressionModel] {

15

def setMaxIter(value: Int): this.type

16

def setRegParam(value: Double): this.type

17

def setElasticNetParam(value: Double): this.type

18

def setTol(value: Double): this.type

19

def setFitIntercept(value: Boolean): this.type

20

def setStandardization(value: Boolean): this.type

21

def setWeightCol(value: String): this.type

22

def setSolver(value: String): this.type

23

def setAggregationDepth(value: Int): this.type

24

def setLoss(value: String): this.type

25

def setEpsilon(value: Double): this.type

26

}

27

28

class LinearRegressionModel extends RegressionModel[Vector, LinearRegressionModel] with LinearRegressionParams {

29

def coefficients: Vector

30

def intercept: Double

31

def scale: Double

32

def summary: LinearRegressionTrainingSummary

33

def hasSummary: Boolean

34

def evaluate(dataset: Dataset[_]): LinearRegressionSummary

35

}

36

37

class LinearRegressionSummary {

38

def predictions: DataFrame

39

def predictionCol: String

40

def labelCol: String

41

def featuresCol: String

42

def explainedVariance: Double

43

def meanAbsoluteError: Double

44

def meanSquaredError: Double

45

def rootMeanSquaredError: Double

46

def r2: Double

47

def residuals: DataFrame

48

}

49

50

class LinearRegressionTrainingSummary extends LinearRegressionSummary {

51

def totalIterations: Int

52

def objectiveHistory: Array[Double]

53

def devianceResiduals: Array[Double]

54

def coefficientStandardErrors: Array[Double]

55

def tValues: Array[Double]

56

def pValues: Array[Double]

57

}

58

```

59

60

**Usage Example:**

61

62

```scala

63

import org.apache.spark.ml.regression.LinearRegression

64

65

val lr = new LinearRegression()

66

.setMaxIter(20)

67

.setRegParam(0.3)

68

.setElasticNetParam(0.8)

69

70

val lrModel = lr.fit(trainingData)

71

val predictions = lrModel.transform(testData)

72

73

// Print coefficients and intercept

74

println(s"Coefficients: ${lrModel.coefficients}")

75

println(s"Intercept: ${lrModel.intercept}")

76

77

// Summarize the model over the training set

78

val trainingSummary = lrModel.summary

79

println(s"RMSE: ${trainingSummary.rootMeanSquaredError}")

80

println(s"R2: ${trainingSummary.r2}")

81

```

82

83

### Generalized Linear Regression

84

85

Generalized linear models supporting various exponential family distributions and link functions.

86

87

```scala { .api }

88

/**

89

* Generalized Linear Regression with multiple family distributions

90

*/

91

class GeneralizedLinearRegression extends Regressor[Vector, GeneralizedLinearRegression, GeneralizedLinearRegressionModel] {

92

def setFamily(value: String): this.type

93

def setLink(value: String): this.type

94

def setFitIntercept(value: Boolean): this.type

95

def setMaxIter(value: Int): this.type

96

def setTol(value: Double): this.type

97

def setRegParam(value: Double): this.type

98

def setWeightCol(value: String): this.type

99

def setSolver(value: String): this.type

100

def setLinkPredictionCol(value: String): this.type

101

def setVariancePower(value: Double): this.type

102

def setLinkPower(value: Double): this.type

103

def setOffsetCol(value: String): this.type

104

}

105

106

class GeneralizedLinearRegressionModel extends RegressionModel[Vector, GeneralizedLinearRegressionModel] with GeneralizedLinearRegressionParams {

107

def coefficients: Vector

108

def intercept: Double

109

def summary: GeneralizedLinearRegressionTrainingSummary

110

def hasSummary: Boolean

111

def evaluate(dataset: Dataset[_]): GeneralizedLinearRegressionSummary

112

}

113

114

class GeneralizedLinearRegressionSummary {

115

def predictions: DataFrame

116

def predictionCol: String

117

def labelCol: String

118

def featuresCol: String

119

def rank: Long

120

def degreesOfFreedom: Long

121

def residualDegreeOfFreedom: Long

122

def residualDegreeOfFreedomNull: Long

123

def aic: Double

124

def deviance: Double

125

def nullDeviance: Double

126

def dispersion: Double

127

}

128

```

129

130

### Decision Tree Regressor

131

132

Tree-based regression algorithm using recursive binary splits for continuous target variables.

133

134

```scala { .api }

135

/**

136

* Decision tree regressor with configurable tree parameters

137

*/

138

class DecisionTreeRegressor extends Regressor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel] {

139

def setMaxDepth(value: Int): this.type

140

def setMaxBins(value: Int): this.type

141

def setMinInstancesPerNode(value: Int): this.type

142

def setMinInfoGain(value: Double): this.type

143

def setMaxMemoryInMB(value: Int): this.type

144

def setCacheNodeIds(value: Boolean): this.type

145

def setCheckpointInterval(value: Int): this.type

146

def setImpurity(value: String): this.type

147

def setSeed(value: Long): this.type

148

def setVarianceCol(value: String): this.type

149

}

150

151

class DecisionTreeRegressionModel extends RegressionModel[Vector, DecisionTreeRegressionModel] with DecisionTreeRegressorParams {

152

def rootNode: Node

153

def depth: Int

154

def numNodes: Int

155

def toDebugString: String

156

def featureImportances: Vector

157

}

158

```

159

160

### Random Forest Regressor

161

162

Ensemble regression method combining multiple decision trees with bootstrap aggregating.

163

164

```scala { .api }

165

/**

166

* Random Forest regressor using ensemble of decision trees

167

*/

168

class RandomForestRegressor extends Regressor[Vector, RandomForestRegressor, RandomForestRegressionModel] {

169

def setNumTrees(value: Int): this.type

170

def setMaxDepth(value: Int): this.type

171

def setMaxBins(value: Int): this.type

172

def setMinInstancesPerNode(value: Int): this.type

173

def setMinInfoGain(value: Double): this.type

174

def setMaxMemoryInMB(value: Int): this.type

175

def setCacheNodeIds(value: Boolean): this.type

176

def setCheckpointInterval(value: Int): this.type

177

def setImpurity(value: String): this.type

178

def setSubsamplingRate(value: Double): this.type

179

def setSeed(value: Long): this.type

180

def setFeatureSubsetStrategy(value: String): this.type

181

}

182

183

class RandomForestRegressionModel extends RegressionModel[Vector, RandomForestRegressionModel] with RandomForestRegressorParams {

184

def trees: Array[DecisionTreeRegressionModel]

185

def treeWeights: Array[Double]

186

def numFeatures: Int

187

def totalNumNodes: Int

188

def toDebugString: String

189

def featureImportances: Vector

190

}

191

```

192

193

### Gradient Boosted Tree Regressor

194

195

Sequential ensemble method where each tree corrects errors from previous trees.

196

197

```scala { .api }

198

/**

199

* Gradient-boosted tree regressor

200

*/

201

class GBTRegressor extends Regressor[Vector, GBTRegressor, GBTRegressionModel] {

202

def setLossType(value: String): this.type

203

def setMaxIter(value: Int): this.type

204

def setStepSize(value: Double): this.type

205

def setMaxDepth(value: Int): this.type

206

def setMaxBins(value: Int): this.type

207

def setMinInstancesPerNode(value: Int): this.type

208

def setMinInfoGain(value: Double): this.type

209

def setMaxMemoryInMB(value: Int): this.type

210

def setCacheNodeIds(value: Boolean): this.type

211

def setCheckpointInterval(value: Int): this.type

212

def setImpurity(value: String): this.type

213

def setSubsamplingRate(value: Double): this.type

214

def setSeed(value: Long): this.type

215

def setFeatureSubsetStrategy(value: String): this.type

216

def setValidationTol(value: Double): this.type

217

def setValidationIndicatorCol(value: String): this.type

218

}

219

220

class GBTRegressionModel extends RegressionModel[Vector, GBTRegressionModel] with GBTRegressorParams {

221

def trees: Array[DecisionTreeRegressionModel]

222

def treeWeights: Array[Double]

223

def numFeatures: Int

224

def totalNumNodes: Int

225

def toDebugString: String

226

def featureImportances: Vector

227

}

228

```

229

230

### Isotonic Regression

231

232

Non-parametric regression that fits a monotonic function to the data.

233

234

```scala { .api }

235

/**

236

* Isotonic regression for monotonic relationships

237

*/

238

class IsotonicRegression extends Regressor[Double, IsotonicRegression, IsotonicRegressionModel] {

239

def setIsotonic(value: Boolean): this.type

240

def setFeatureIndex(value: Int): this.type

241

def setWeightCol(value: String): this.type

242

}

243

244

class IsotonicRegressionModel extends RegressionModel[Double, IsotonicRegressionModel] with IsotonicRegressionParams {

245

def boundaries: Vector

246

def predictions: Vector

247

def numFeatures: Int

248

}

249

```

250

251

### Survival Regression

252

253

Accelerated failure time model for survival analysis with censored data.

254

255

```scala { .api }

256

/**

257

* Accelerated Failure Time survival regression

258

*/

259

class AFTSurvivalRegression extends Regressor[Vector, AFTSurvivalRegression, AFTSurvivalRegressionModel] {

260

def setCensorCol(value: String): this.type

261

def setQuantileProbabilities(value: Array[Double]): this.type

262

def setQuantilesCol(value: String): this.type

263

def setMaxIter(value: Int): this.type

264

def setTol(value: Double): this.type

265

def setFitIntercept(value: Boolean): this.type

266

def setAggregationDepth(value: Int): this.type

267

}

268

269

class AFTSurvivalRegressionModel extends RegressionModel[Vector, AFTSurvivalRegressionModel] with AFTSurvivalRegressionParams {

270

def coefficients: Vector

271

def intercept: Double

272

def scale: Double

273

def predictQuantiles(features: Vector): Vector

274

}

275

```

276

277

### Factorization Machine Regressor

278

279

Factorization machine for regression tasks modeling feature interactions.

280

281

```scala { .api }

282

/**

283

* Factorization Machine regressor

284

*/

285

class FMRegressor extends Regressor[Vector, FMRegressor, FMRegressionModel] {

286

def setFactorSize(value: Int): this.type

287

def setFitIntercept(value: Boolean): this.type

288

def setFitLinear(value: Boolean): this.type

289

def setRegParam(value: Double): this.type

290

def setMiniBatchFraction(value: Double): this.type

291

def setInitStd(value: Double): this.type

292

def setMaxIter(value: Int): this.type

293

def setStepSize(value: Double): this.type

294

def setTol(value: Double): this.type

295

def setSolver(value: String): this.type

296

def setSeed(value: Long): this.type

297

}

298

299

class FMRegressionModel extends RegressionModel[Vector, FMRegressionModel] with FMRegressorParams {

300

def intercept: Double

301

def linear: Vector

302

def factors: Matrix

303

}

304

```

305

306

## Shared Regression Components

307

308

### Base Classes and Traits

309

310

```scala { .api }

311

/**

312

* Base regressor abstraction

313

*/

314

abstract class Regressor[

315

FeaturesType,

316

E <: Regressor[FeaturesType, E, M],

317

M <: RegressionModel[FeaturesType, M]

318

] extends Estimator[M] with RegressorParams {

319

def fit(dataset: Dataset[_]): M

320

}

321

322

/**

323

* Base regression model

324

*/

325

abstract class RegressionModel[FeaturesType, M <: RegressionModel[FeaturesType, M]]

326

extends Model[M] with RegressionParams {

327

def predict(features: FeaturesType): Double

328

}

329

```

330

331

## Types

332

333

```scala { .api }

334

// Regression-specific imports

335

import org.apache.spark.ml.regression._

336

import org.apache.spark.ml.linalg.{Vector, Matrix}

337

import org.apache.spark.sql.{DataFrame, Dataset}

338

339

// Parameter traits

340

import org.apache.spark.ml.param.shared._

341

342

// Model summary types

343

import org.apache.spark.ml.regression.{

344

LinearRegressionSummary,

345

LinearRegressionTrainingSummary,

346

GeneralizedLinearRegressionSummary,

347

GeneralizedLinearRegressionTrainingSummary

348

}

349

350

// Tree model components (shared with classification)

351

import org.apache.spark.ml.tree.{Node, InternalNode, LeafNode}

352

```