0
# Regression Algorithms
1
2
Ensemble regression methods including stacking for improved prediction performance and linear regression with gradient descent optimization.
3
4
## Capabilities
5
6
### Linear Regression
7
8
Linear regression with gradient descent optimization and configurable learning parameters.
9
10
```python { .api }
11
class LinearRegression:
12
def __init__(self, eta=0.01, epochs=50, minibatches=None, random_seed=None,
13
print_progress=0):
14
"""
15
Linear regression with gradient descent.
16
17
Parameters:
18
- eta: float, learning rate
19
- epochs: int, number of training epochs
20
- minibatches: int, number of minibatches for SGD (None for batch GD)
21
- random_seed: int, random seed for reproducibility
22
- print_progress: int, print cost every n epochs (0 for no output)
23
"""
24
25
def fit(self, X, y):
26
"""
27
Fit linear regression model.
28
29
Parameters:
30
- X: array-like, feature matrix (shape: [n_samples, n_features])
31
- y: array-like, target values (shape: [n_samples])
32
33
Returns:
34
- self: fitted estimator
35
"""
36
37
def predict(self, X):
38
"""
39
Make predictions using fitted model.
40
41
Parameters:
42
- X: array-like, feature matrix
43
44
Returns:
45
- predictions: array, predicted values
46
"""
47
48
w_: # Fitted weights/coefficients
49
b_: # Fitted bias/intercept
50
cost_: # Training cost history
51
```
52
53
### Stacking Regressor
54
55
Meta-learning ensemble regressor that combines multiple base regressors using a meta-regressor.
56
57
```python { .api }
58
class StackingRegressor:
59
def __init__(self, regressors, meta_regressor, verbose=0,
60
use_features_in_secondary=False):
61
"""
62
Stacking ensemble regressor.
63
64
Parameters:
65
- regressors: list, base regression models
66
- meta_regressor: regressor, meta-learning model
67
- verbose: int, verbosity level
68
- use_features_in_secondary: bool, include original features in meta-learning
69
"""
70
71
def fit(self, X, y):
72
"""Fit stacking regressor"""
73
74
def predict(self, X):
75
"""Make predictions using meta-regressor"""
76
77
def get_params(self, deep=True):
78
"""Get parameters for this estimator"""
79
80
def set_params(self, **params):
81
"""Set parameters for this estimator"""
82
```
83
84
### Stacking CV Regressor
85
86
Cross-validation stacking regressor that uses cross-validation to generate meta-features.
87
88
```python { .api }
89
class StackingCVRegressor:
90
def __init__(self, regressors, meta_regressor, cv=2, shuffle=True,
91
random_state=0, verbose=0, use_features_in_secondary=False,
92
store_train_meta_features=False, use_clones=True, n_jobs=1):
93
"""
94
Cross-validation stacking regressor.
95
96
Parameters:
97
- regressors: list, base regression models
98
- meta_regressor: regressor, meta-learning model
99
- cv: int, number of cross-validation folds
100
- shuffle: bool, shuffle data before cross-validation
101
- random_state: int, random state for cross-validation
102
- verbose: int, verbosity level
103
- use_features_in_secondary: bool, include original features
104
- store_train_meta_features: bool, store training meta-features
105
- use_clones: bool, clone base regressors
106
- n_jobs: int, number of parallel jobs
107
"""
108
109
def fit(self, X, y, groups=None):
110
"""Fit CV stacking regressor"""
111
112
def predict(self, X):
113
"""Make predictions using meta-regressor"""
114
115
def predict_meta_features(self, X):
116
"""Generate meta-features from base regressors"""
117
```
118
119
## Usage Examples
120
121
### Linear Regression Example
122
123
```python
124
from mlxtend.regressor import LinearRegression
125
from sklearn.datasets import make_regression
126
from sklearn.model_selection import train_test_split
127
import matplotlib.pyplot as plt
128
import numpy as np
129
130
# Generate regression dataset
131
X, y = make_regression(n_samples=100, n_features=1, noise=10, random_state=42)
132
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
133
134
# Train linear regression
135
lr = LinearRegression(eta=0.01, epochs=100, print_progress=10)
136
lr.fit(X_train, y_train)
137
138
# Make predictions
139
y_pred = lr.predict(X_test)
140
141
# Plot results
142
plt.figure(figsize=(10, 6))
143
plt.scatter(X_test, y_test, alpha=0.6, label='True values')
144
plt.scatter(X_test, y_pred, alpha=0.6, label='Predictions')
145
plt.plot(X_test, y_pred, 'r--', alpha=0.8)
146
plt.xlabel('Feature')
147
plt.ylabel('Target')
148
plt.title('Linear Regression Results')
149
plt.legend()
150
plt.show()
151
152
# Plot cost history
153
plt.figure(figsize=(8, 6))
154
plt.plot(lr.cost_)
155
plt.xlabel('Epochs')
156
plt.ylabel('Cost')
157
plt.title('Training Cost History')
158
plt.show()
159
```
160
161
### Stacking Regressor Example
162
163
```python
164
from mlxtend.regressor import StackingCVRegressor
165
from sklearn.ensemble import RandomForestRegressor
166
from sklearn.linear_model import LinearRegression as SklearnLR
167
from sklearn.svm import SVR
168
from sklearn.datasets import make_regression
169
from sklearn.model_selection import train_test_split
170
from sklearn.metrics import mean_squared_error
171
import numpy as np
172
173
# Generate dataset
174
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)
175
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
176
177
# Create base regressors
178
rf = RandomForestRegressor(n_estimators=50, random_state=42)
179
svr = SVR(kernel='rbf', C=0.1, gamma=0.1)
180
lr_sklearn = SklearnLR()
181
182
# Create meta-regressor
183
meta_regressor = LinearRegression()
184
185
# Create stacking regressor
186
stacking_regressor = StackingCVRegressor(
187
regressors=[rf, svr, lr_sklearn],
188
meta_regressor=meta_regressor,
189
cv=5,
190
use_features_in_secondary=True
191
)
192
193
# Fit and predict
194
stacking_regressor.fit(X_train, y_train)
195
y_pred_stacking = stacking_regressor.predict(X_test)
196
197
# Compare with individual regressors
198
rf.fit(X_train, y_train)
199
y_pred_rf = rf.predict(X_test)
200
201
# Calculate errors
202
mse_stacking = mean_squared_error(y_test, y_pred_stacking)
203
mse_rf = mean_squared_error(y_test, y_pred_rf)
204
205
print(f"Stacking Regressor MSE: {mse_stacking:.4f}")
206
print(f"Random Forest MSE: {mse_rf:.4f}")
207
print(f"Improvement: {((mse_rf - mse_stacking) / mse_rf * 100):.2f}%")
208
```
209
210
### Comparing Different Stacking Strategies
211
212
```python
213
from mlxtend.regressor import StackingRegressor, StackingCVRegressor
214
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
215
from sklearn.linear_model import Ridge
216
from sklearn.datasets import make_regression
217
from sklearn.model_selection import cross_val_score
218
import numpy as np
219
220
# Generate dataset
221
X, y = make_regression(n_samples=500, n_features=8, noise=0.1, random_state=42)
222
223
# Create base regressors
224
base_regressors = [
225
RandomForestRegressor(n_estimators=30, random_state=42),
226
GradientBoostingRegressor(n_estimators=30, random_state=42),
227
Ridge(alpha=1.0)
228
]
229
230
# Create meta-regressor
231
meta_regressor = Ridge(alpha=0.1)
232
233
# Compare stacking strategies
234
regressors = {
235
'Regular Stacking': StackingRegressor(base_regressors, meta_regressor),
236
'CV Stacking': StackingCVRegressor(base_regressors, meta_regressor, cv=5),
237
'CV Stacking + Features': StackingCVRegressor(
238
base_regressors, meta_regressor, cv=5, use_features_in_secondary=True
239
)
240
}
241
242
# Evaluate each strategy
243
for name, regressor in regressors.items():
244
scores = cross_val_score(regressor, X, y, cv=5, scoring='neg_mean_squared_error')
245
rmse_scores = np.sqrt(-scores)
246
print(f"{name}:")
247
print(f" RMSE: {rmse_scores.mean():.4f} (+/- {rmse_scores.std() * 2:.4f})")
248
```