or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

classification.mdclustering.mddata-utilities.mdfeatures.mdindex.mdmodel-selection.mdregression.mdtext.md

regression.mddocs/

0

# Regression Analysis

1

2

Diagnostic visualizers for regression model evaluation, providing insights into prediction accuracy, residual patterns, model assumptions, and outlier detection. These tools help assess regression model performance and guide model improvement.

3

4

## Capabilities

5

6

### Residuals Analysis

7

8

Residuals plots for evaluating regression model assumptions, detecting heteroscedasticity, non-linearity, and outliers. Essential for validating linear regression assumptions and identifying model inadequacies.

9

10

```python { .api }

11

class ResidualsPlot(RegressionScoreVisualizer):

12

"""

13

Residuals plot visualizer for regression models.

14

15

Parameters:

16

- estimator: scikit-learn regressor

17

- hist: bool, whether to draw histogram of residuals

18

- qqplot: bool, whether to draw Q-Q plot of residuals

19

"""

20

def __init__(self, estimator, hist=True, qqplot=False, **kwargs): ...

21

def fit(self, X, y, **kwargs): ...

22

def score(self, X, y, **kwargs): ...

23

def show(self, **kwargs): ...

24

25

def residuals_plot(estimator, X_train, y_train, X_test=None, y_test=None, **kwargs):

26

"""

27

Functional API for residuals plot visualization.

28

29

Parameters:

30

- estimator: scikit-learn regressor

31

- X_train: training features

32

- y_train: training target values

33

- X_test: test features (optional)

34

- y_test: test target values (optional)

35

36

Returns:

37

ResidualsPlot visualizer instance

38

"""

39

```

40

41

**Usage Example:**

42

43

```python

44

from yellowbrick.regressor import ResidualsPlot, residuals_plot

45

from sklearn.linear_model import LinearRegression

46

from sklearn.model_selection import train_test_split

47

48

# Class-based API

49

model = LinearRegression()

50

visualizer = ResidualsPlot(model, hist=True, qqplot=True)

51

visualizer.fit(X_train, y_train)

52

visualizer.score(X_test, y_test)

53

visualizer.show()

54

55

# Functional API

56

residuals_plot(model, X_train, y_train, X_test, y_test, hist=True)

57

```

58

59

### Prediction Error Analysis

60

61

Prediction error plots showing the relationship between predicted and actual values, helping assess overall model accuracy and identify systematic prediction errors.

62

63

```python { .api }

64

class PredictionError(RegressionScoreVisualizer):

65

"""

66

Prediction error visualizer for regression models.

67

68

Parameters:

69

- estimator: scikit-learn regressor

70

- identity: bool, whether to draw identity line (perfect prediction)

71

- bestfit: bool, whether to draw best fit line through predictions

72

- alpha: float, transparency of scatter points

73

"""

74

def __init__(self, estimator, identity=True, bestfit=True, alpha=0.75, **kwargs): ...

75

def fit(self, X, y, **kwargs): ...

76

def score(self, X, y, **kwargs): ...

77

def show(self, **kwargs): ...

78

79

def prediction_error(estimator, X_train, y_train, X_test=None, y_test=None, **kwargs):

80

"""

81

Functional API for prediction error visualization.

82

83

Parameters:

84

- estimator: scikit-learn regressor

85

- X_train: training features

86

- y_train: training target values

87

- X_test: test features (optional)

88

- y_test: test target values (optional)

89

90

Returns:

91

PredictionError visualizer instance

92

"""

93

```

94

95

### Alpha Selection

96

97

Regularization parameter (alpha) selection for regularized regression models like Ridge, Lasso, and Elastic Net. Helps find optimal regularization strength through cross-validation.

98

99

```python { .api }

100

class AlphaSelection(RegressionScoreVisualizer):

101

"""

102

Alpha selection visualizer for regularized regression models.

103

104

Parameters:

105

- estimator: scikit-learn regularized regressor (Ridge, Lasso, etc.)

106

- alphas: array-like, alpha values to test

107

- cv: int or cross-validation generator, cross-validation strategy

108

- scoring: str, scoring metric for evaluation

109

- normalize_error_bars: bool, whether to normalize error bars

110

"""

111

def __init__(self, estimator, alphas=None, cv=None, scoring='neg_mean_squared_error', normalize_error_bars=False, **kwargs): ...

112

def fit(self, X, y, **kwargs): ...

113

def show(self, **kwargs): ...

114

115

class ManualAlphaSelection(RegressionScoreVisualizer):

116

"""

117

Manual alpha selection visualizer with user-specified alpha values.

118

119

Parameters:

120

- estimator: scikit-learn regularized regressor

121

- alphas: array-like, specific alpha values to evaluate

122

- cv: int or cross-validation generator

123

- scoring: str, scoring metric

124

"""

125

def __init__(self, estimator, alphas, cv=None, scoring='neg_mean_squared_error', **kwargs): ...

126

def fit(self, X, y, **kwargs): ...

127

def show(self, **kwargs): ...

128

```

129

130

**Usage Example:**

131

132

```python

133

from yellowbrick.regressor import AlphaSelection

134

from sklearn.linear_model import Ridge

135

import numpy as np

136

137

# Alpha selection for Ridge regression

138

alphas = np.logspace(-3, 3, 50)

139

model = Ridge()

140

alpha_viz = AlphaSelection(model, alphas=alphas, cv=5)

141

alpha_viz.fit(X, y)

142

alpha_viz.show()

143

144

# Get optimal alpha

145

optimal_alpha = alpha_viz.alpha_

146

```

147

148

### Cook's Distance

149

150

Cook's distance analysis for identifying influential observations that disproportionately affect regression model parameters. Helps detect outliers and leverage points.

151

152

```python { .api }

153

class CooksDistance(RegressionScoreVisualizer):

154

"""

155

Cook's distance visualizer for influence analysis.

156

157

Parameters:

158

- estimator: scikit-learn regressor

159

- draw_threshold: bool, whether to draw influence threshold line

160

- linefmt: str, format string for threshold line

161

"""

162

def __init__(self, estimator, draw_threshold=True, linefmt='r--', **kwargs): ...

163

def fit(self, X, y, **kwargs): ...

164

def show(self, **kwargs): ...

165

```

166

167

**Usage Example:**

168

169

```python

170

from yellowbrick.regressor import CooksDistance

171

from sklearn.linear_model import LinearRegression

172

173

# Analyze influential observations

174

model = LinearRegression()

175

cooks_viz = CooksDistance(model)

176

cooks_viz.fit(X, y)

177

cooks_viz.show()

178

179

# Access Cook's distance values

180

distances = cooks_viz.distance_

181

influential_points = cooks_viz.outliers_

182

```

183

184

## Base Classes

185

186

```python { .api }

187

class RegressionScoreVisualizer(ScoreVisualizer):

188

"""

189

Base class for regression scoring visualizers.

190

Provides common functionality for regression model evaluation.

191

"""

192

def __init__(self, estimator, **kwargs): ...

193

def fit(self, X, y, **kwargs): ...

194

def score(self, X, y, **kwargs): ...

195

```

196

197

## Usage Patterns

198

199

### Comprehensive Regression Diagnostics

200

201

```python

202

from yellowbrick.regressor import ResidualsPlot, PredictionError, CooksDistance

203

from sklearn.linear_model import LinearRegression

204

from sklearn.model_selection import train_test_split

205

206

# Prepare data and model

207

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

208

model = LinearRegression()

209

210

# Residuals analysis - check assumptions

211

residuals_viz = ResidualsPlot(model, hist=True, qqplot=True)

212

residuals_viz.fit(X_train, y_train)

213

residuals_viz.score(X_test, y_test)

214

residuals_viz.show()

215

216

# Prediction accuracy assessment

217

pred_error_viz = PredictionError(model)

218

pred_error_viz.fit(X_train, y_train)

219

pred_error_viz.score(X_test, y_test)

220

pred_error_viz.show()

221

222

# Influence analysis

223

cooks_viz = CooksDistance(model)

224

cooks_viz.fit(X_train, y_train)

225

cooks_viz.show()

226

```

227

228

### Regularized Regression Tuning

229

230

```python

231

from yellowbrick.regressor import AlphaSelection

232

from sklearn.linear_model import Ridge, Lasso, ElasticNet

233

import numpy as np

234

235

# Ridge regression alpha selection

236

ridge_alphas = np.logspace(-3, 3, 50)

237

ridge_model = Ridge()

238

ridge_alpha_viz = AlphaSelection(ridge_model, alphas=ridge_alphas, cv=10)

239

ridge_alpha_viz.fit(X, y)

240

ridge_alpha_viz.show()

241

242

# Lasso regression alpha selection

243

lasso_alphas = np.logspace(-4, 1, 50)

244

lasso_model = Lasso()

245

lasso_alpha_viz = AlphaSelection(lasso_model, alphas=lasso_alphas, cv=10)

246

lasso_alpha_viz.fit(X, y)

247

lasso_alpha_viz.show()

248

249

# ElasticNet alpha selection

250

elastic_alphas = np.logspace(-4, 1, 20)

251

elastic_model = ElasticNet(l1_ratio=0.5)

252

elastic_alpha_viz = AlphaSelection(elastic_model, alphas=elastic_alphas, cv=10)

253

elastic_alpha_viz.fit(X, y)

254

elastic_alpha_viz.show()

255

```

256

257

### Model Comparison

258

259

```python

260

from yellowbrick.regressor import PredictionError

261

from sklearn.linear_model import LinearRegression, Ridge, Lasso

262

from sklearn.ensemble import RandomForestRegressor

263

import matplotlib.pyplot as plt

264

265

# Compare multiple regression models

266

models = {

267

'Linear Regression': LinearRegression(),

268

'Ridge': Ridge(alpha=1.0),

269

'Lasso': Lasso(alpha=0.1),

270

'Random Forest': RandomForestRegressor(n_estimators=100)

271

}

272

273

fig, axes = plt.subplots(2, 2, figsize=(12, 10))

274

axes = axes.ravel()

275

276

for idx, (name, model) in enumerate(models.items()):

277

viz = PredictionError(model, ax=axes[idx])

278

viz.fit(X_train, y_train)

279

viz.score(X_test, y_test)

280

viz.finalize()

281

282

plt.tight_layout()

283

plt.show()

284

```

285

286

### Pipeline Integration

287

288

```python

289

from yellowbrick.regressor import ResidualsPlot

290

from sklearn.pipeline import Pipeline

291

from sklearn.preprocessing import StandardScaler

292

from sklearn.linear_model import LinearRegression

293

294

# Create pipeline with preprocessing

295

pipeline = Pipeline([

296

('scaler', StandardScaler()),

297

('regressor', LinearRegression())

298

])

299

300

# Visualize pipeline results

301

viz = ResidualsPlot(pipeline)

302

viz.fit(X_train, y_train)

303

viz.score(X_test, y_test)

304

viz.show()

305

```

306

307

### Advanced Residuals Analysis

308

309

```python

310

from yellowbrick.regressor import ResidualsPlot

311

from sklearn.linear_model import LinearRegression

312

import matplotlib.pyplot as plt

313

314

# Create comprehensive residuals analysis

315

model = LinearRegression()

316

317

# Standard residuals plot

318

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

319

320

# Fitted values vs residuals

321

viz1 = ResidualsPlot(model, ax=axes[0], hist=False, qqplot=False)

322

viz1.fit(X_train, y_train)

323

viz1.score(X_test, y_test)

324

viz1.finalize()

325

326

# Histogram of residuals

327

viz2 = ResidualsPlot(model, ax=axes[1], hist=True, qqplot=False)

328

viz2.fit(X_train, y_train)

329

viz2.score(X_test, y_test)

330

viz2.finalize()

331

332

# Q-Q plot of residuals

333

viz3 = ResidualsPlot(model, ax=axes[2], hist=False, qqplot=True)

334

viz3.fit(X_train, y_train)

335

viz3.score(X_test, y_test)

336

viz3.finalize()

337

338

plt.tight_layout()

339

plt.show()

340

```