Tessl Tile for pypi/yellowbrick@1.5.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

classification.md clustering.md data-utilities.md features.md index.md model-selection.md regression.md text.md

regression.mddocs/

0
# Regression Analysis
1

2
Diagnostic visualizers for regression model evaluation, providing insights into prediction accuracy, residual patterns, model assumptions, and outlier detection. These tools help assess regression model performance and guide model improvement.
3

4
## Capabilities
5

6
### Residuals Analysis
7

8
Residuals plots for evaluating regression model assumptions, detecting heteroscedasticity, non-linearity, and outliers. Essential for validating linear regression assumptions and identifying model inadequacies.
9

10
```python { .api }
11
class ResidualsPlot(RegressionScoreVisualizer):
12
    """
13
    Residuals plot visualizer for regression models.
14
    
15
    Parameters:
16
    - estimator: scikit-learn regressor
17
    - hist: bool, whether to draw histogram of residuals
18
    - qqplot: bool, whether to draw Q-Q plot of residuals
19
    """
20
    def __init__(self, estimator, hist=True, qqplot=False, **kwargs): ...
21
    def fit(self, X, y, **kwargs): ...
22
    def score(self, X, y, **kwargs): ...
23
    def show(self, **kwargs): ...
24

25
def residuals_plot(estimator, X_train, y_train, X_test=None, y_test=None, **kwargs):
26
    """
27
    Functional API for residuals plot visualization.
28
    
29
    Parameters:
30
    - estimator: scikit-learn regressor
31
    - X_train: training features
32
    - y_train: training target values
33
    - X_test: test features (optional)
34
    - y_test: test target values (optional)
35
    
36
    Returns:
37
    ResidualsPlot visualizer instance
38
    """
39
```
40

41
**Usage Example:**
42

43
```python
44
from yellowbrick.regressor import ResidualsPlot, residuals_plot
45
from sklearn.linear_model import LinearRegression
46
from sklearn.model_selection import train_test_split
47

48
# Class-based API
49
model = LinearRegression()
50
visualizer = ResidualsPlot(model, hist=True, qqplot=True)
51
visualizer.fit(X_train, y_train)
52
visualizer.score(X_test, y_test)
53
visualizer.show()
54

55
# Functional API
56
residuals_plot(model, X_train, y_train, X_test, y_test, hist=True)
57
```
58

59
### Prediction Error Analysis
60

61
Prediction error plots showing the relationship between predicted and actual values, helping assess overall model accuracy and identify systematic prediction errors.
62

63
```python { .api }
64
class PredictionError(RegressionScoreVisualizer):
65
    """
66
    Prediction error visualizer for regression models.
67
    
68
    Parameters:
69
    - estimator: scikit-learn regressor
70
    - identity: bool, whether to draw identity line (perfect prediction)
71
    - bestfit: bool, whether to draw best fit line through predictions
72
    - alpha: float, transparency of scatter points
73
    """
74
    def __init__(self, estimator, identity=True, bestfit=True, alpha=0.75, **kwargs): ...
75
    def fit(self, X, y, **kwargs): ...
76
    def score(self, X, y, **kwargs): ...
77
    def show(self, **kwargs): ...
78

79
def prediction_error(estimator, X_train, y_train, X_test=None, y_test=None, **kwargs):
80
    """
81
    Functional API for prediction error visualization.
82
    
83
    Parameters:
84
    - estimator: scikit-learn regressor
85
    - X_train: training features
86
    - y_train: training target values
87
    - X_test: test features (optional)
88
    - y_test: test target values (optional)
89
    
90
    Returns:
91
    PredictionError visualizer instance
92
    """
93
```
94

95
### Alpha Selection
96

97
Regularization parameter (alpha) selection for regularized regression models like Ridge, Lasso, and Elastic Net. Helps find optimal regularization strength through cross-validation.
98

99
```python { .api }
100
class AlphaSelection(RegressionScoreVisualizer):
101
    """
102
    Alpha selection visualizer for regularized regression models.
103
    
104
    Parameters:
105
    - estimator: scikit-learn regularized regressor (Ridge, Lasso, etc.)
106
    - alphas: array-like, alpha values to test
107
    - cv: int or cross-validation generator, cross-validation strategy
108
    - scoring: str, scoring metric for evaluation
109
    - normalize_error_bars: bool, whether to normalize error bars
110
    """
111
    def __init__(self, estimator, alphas=None, cv=None, scoring='neg_mean_squared_error', normalize_error_bars=False, **kwargs): ...
112
    def fit(self, X, y, **kwargs): ...
113
    def show(self, **kwargs): ...
114

115
class ManualAlphaSelection(RegressionScoreVisualizer):
116
    """
117
    Manual alpha selection visualizer with user-specified alpha values.
118
    
119
    Parameters:
120
    - estimator: scikit-learn regularized regressor
121
    - alphas: array-like, specific alpha values to evaluate
122
    - cv: int or cross-validation generator
123
    - scoring: str, scoring metric
124
    """
125
    def __init__(self, estimator, alphas, cv=None, scoring='neg_mean_squared_error', **kwargs): ...
126
    def fit(self, X, y, **kwargs): ...
127
    def show(self, **kwargs): ...
128
```
129

130
**Usage Example:**
131

132
```python
133
from yellowbrick.regressor import AlphaSelection
134
from sklearn.linear_model import Ridge
135
import numpy as np
136

137
# Alpha selection for Ridge regression
138
alphas = np.logspace(-3, 3, 50)
139
model = Ridge()
140
alpha_viz = AlphaSelection(model, alphas=alphas, cv=5)
141
alpha_viz.fit(X, y)
142
alpha_viz.show()
143

144
# Get optimal alpha
145
optimal_alpha = alpha_viz.alpha_
146
```
147

148
### Cook's Distance
149

150
Cook's distance analysis for identifying influential observations that disproportionately affect regression model parameters. Helps detect outliers and leverage points.
151

152
```python { .api }
153
class CooksDistance(RegressionScoreVisualizer):
154
    """
155
    Cook's distance visualizer for influence analysis.
156
    
157
    Parameters:
158
    - estimator: scikit-learn regressor
159
    - draw_threshold: bool, whether to draw influence threshold line
160
    - linefmt: str, format string for threshold line
161
    """
162
    def __init__(self, estimator, draw_threshold=True, linefmt='r--', **kwargs): ...
163
    def fit(self, X, y, **kwargs): ...
164
    def show(self, **kwargs): ...
165
```
166

167
**Usage Example:**
168

169
```python
170
from yellowbrick.regressor import CooksDistance
171
from sklearn.linear_model import LinearRegression
172

173
# Analyze influential observations
174
model = LinearRegression()
175
cooks_viz = CooksDistance(model)
176
cooks_viz.fit(X, y)
177
cooks_viz.show()
178

179
# Access Cook's distance values
180
distances = cooks_viz.distance_
181
influential_points = cooks_viz.outliers_
182
```
183

184
## Base Classes
185

186
```python { .api }
187
class RegressionScoreVisualizer(ScoreVisualizer):
188
    """
189
    Base class for regression scoring visualizers.
190
    Provides common functionality for regression model evaluation.
191
    """
192
    def __init__(self, estimator, **kwargs): ...
193
    def fit(self, X, y, **kwargs): ...
194
    def score(self, X, y, **kwargs): ...
195
```
196

197
## Usage Patterns
198

199
### Comprehensive Regression Diagnostics
200

201
```python
202
from yellowbrick.regressor import ResidualsPlot, PredictionError, CooksDistance
203
from sklearn.linear_model import LinearRegression
204
from sklearn.model_selection import train_test_split
205

206
# Prepare data and model
207
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
208
model = LinearRegression()
209

210
# Residuals analysis - check assumptions
211
residuals_viz = ResidualsPlot(model, hist=True, qqplot=True)
212
residuals_viz.fit(X_train, y_train)
213
residuals_viz.score(X_test, y_test)
214
residuals_viz.show()
215

216
# Prediction accuracy assessment
217
pred_error_viz = PredictionError(model)
218
pred_error_viz.fit(X_train, y_train)
219
pred_error_viz.score(X_test, y_test)
220
pred_error_viz.show()
221

222
# Influence analysis
223
cooks_viz = CooksDistance(model)
224
cooks_viz.fit(X_train, y_train)
225
cooks_viz.show()
226
```
227

228
### Regularized Regression Tuning
229

230
```python
231
from yellowbrick.regressor import AlphaSelection
232
from sklearn.linear_model import Ridge, Lasso, ElasticNet
233
import numpy as np
234

235
# Ridge regression alpha selection
236
ridge_alphas = np.logspace(-3, 3, 50)
237
ridge_model = Ridge()
238
ridge_alpha_viz = AlphaSelection(ridge_model, alphas=ridge_alphas, cv=10)
239
ridge_alpha_viz.fit(X, y)
240
ridge_alpha_viz.show()
241

242
# Lasso regression alpha selection
243
lasso_alphas = np.logspace(-4, 1, 50)
244
lasso_model = Lasso()
245
lasso_alpha_viz = AlphaSelection(lasso_model, alphas=lasso_alphas, cv=10)
246
lasso_alpha_viz.fit(X, y)
247
lasso_alpha_viz.show()
248

249
# ElasticNet alpha selection
250
elastic_alphas = np.logspace(-4, 1, 20)
251
elastic_model = ElasticNet(l1_ratio=0.5)
252
elastic_alpha_viz = AlphaSelection(elastic_model, alphas=elastic_alphas, cv=10)
253
elastic_alpha_viz.fit(X, y)
254
elastic_alpha_viz.show()
255
```
256

257
### Model Comparison
258

259
```python
260
from yellowbrick.regressor import PredictionError
261
from sklearn.linear_model import LinearRegression, Ridge, Lasso
262
from sklearn.ensemble import RandomForestRegressor
263
import matplotlib.pyplot as plt
264

265
# Compare multiple regression models
266
models = {
267
    'Linear Regression': LinearRegression(),
268
    'Ridge': Ridge(alpha=1.0),
269
    'Lasso': Lasso(alpha=0.1),
270
    'Random Forest': RandomForestRegressor(n_estimators=100)
271
}
272

273
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
274
axes = axes.ravel()
275

276
for idx, (name, model) in enumerate(models.items()):
277
    viz = PredictionError(model, ax=axes[idx])
278
    viz.fit(X_train, y_train)
279
    viz.score(X_test, y_test)
280
    viz.finalize()
281

282
plt.tight_layout()
283
plt.show()
284
```
285

286
### Pipeline Integration
287

288
```python
289
from yellowbrick.regressor import ResidualsPlot
290
from sklearn.pipeline import Pipeline
291
from sklearn.preprocessing import StandardScaler
292
from sklearn.linear_model import LinearRegression
293

294
# Create pipeline with preprocessing
295
pipeline = Pipeline([
296
    ('scaler', StandardScaler()),
297
    ('regressor', LinearRegression())
298
])
299

300
# Visualize pipeline results
301
viz = ResidualsPlot(pipeline)
302
viz.fit(X_train, y_train)
303
viz.score(X_test, y_test)
304
viz.show()
305
```
306

307
### Advanced Residuals Analysis
308

309
```python
310
from yellowbrick.regressor import ResidualsPlot
311
from sklearn.linear_model import LinearRegression
312
import matplotlib.pyplot as plt
313

314
# Create comprehensive residuals analysis
315
model = LinearRegression()
316

317
# Standard residuals plot
318
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
319

320
# Fitted values vs residuals
321
viz1 = ResidualsPlot(model, ax=axes[0], hist=False, qqplot=False)
322
viz1.fit(X_train, y_train)
323
viz1.score(X_test, y_test)
324
viz1.finalize()
325

326
# Histogram of residuals
327
viz2 = ResidualsPlot(model, ax=axes[1], hist=True, qqplot=False)
328
viz2.fit(X_train, y_train)
329
viz2.score(X_test, y_test)
330
viz2.finalize()
331

332
# Q-Q plot of residuals
333
viz3 = ResidualsPlot(model, ax=axes[2], hist=False, qqplot=True)
334
viz3.fit(X_train, y_train)
335
viz3.score(X_test, y_test)
336
viz3.finalize()
337

338
plt.tight_layout()
339
plt.show()
340
```

Version

Tile

Files

regression.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

regression.mddocs/