or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced.mdclustering.mddaal4py-mb.mddecomposition.mdensemble.mdindex.mdlinear-models.mdmetrics-model-selection.mdneighbors.mdpatching-config.mdstats-manifold.mdsvm.md

ensemble.mddocs/

0

# Ensemble Methods

1

2

Intel-accelerated ensemble algorithms including Random Forest and Extra Trees for both classification and regression. These implementations provide significant performance improvements through optimized tree construction and parallel processing.

3

4

## Capabilities

5

6

### Random Forest Classifier

7

8

Intel-optimized Random Forest for classification with accelerated tree building and prediction.

9

10

```python { .api }

11

class RandomForestClassifier:

12

"""

13

Random Forest classifier with Intel optimization.

14

15

Ensemble of decision trees with optimized parallel tree construction

16

and Intel hardware acceleration for improved performance.

17

"""

18

19

def __init__(

20

self,

21

n_estimators=100,

22

criterion='gini',

23

max_depth=None,

24

min_samples_split=2,

25

min_samples_leaf=1,

26

min_weight_fraction_leaf=0.0,

27

max_features='sqrt',

28

max_leaf_nodes=None,

29

min_impurity_decrease=0.0,

30

bootstrap=True,

31

oob_score=False,

32

n_jobs=None,

33

random_state=None,

34

verbose=0,

35

warm_start=False,

36

class_weight=None,

37

ccp_alpha=0.0,

38

max_samples=None

39

):

40

"""Initialize Random Forest Classifier with Intel optimization."""

41

42

def fit(self, X, y, sample_weight=None):

43

"""

44

Build forest of trees from training set.

45

46

Parameters:

47

X (array-like): Training data

48

y (array-like): Target values

49

sample_weight (array-like): Sample weights

50

51

Returns:

52

self: Fitted estimator

53

"""

54

55

def predict(self, X):

56

"""Predict class for samples."""

57

58

def predict_proba(self, X):

59

"""Predict class probabilities."""

60

61

def predict_log_proba(self, X):

62

"""Predict class log-probabilities."""

63

64

def score(self, X, y, sample_weight=None):

65

"""Return mean accuracy."""

66

67

# Attributes

68

estimators_: ... # Collection of fitted sub-estimators

69

classes_: ... # Class labels

70

n_classes_: ... # Number of classes

71

feature_importances_: ... # Feature importances

72

n_features_in_: ... # Number of features

73

oob_score_: ... # Out-of-bag score

74

```

75

76

### Random Forest Regressor

77

78

Intel-optimized Random Forest for regression tasks.

79

80

```python { .api }

81

class RandomForestRegressor:

82

"""

83

Random Forest regressor with Intel optimization.

84

85

Ensemble of decision trees optimized for regression with

86

Intel hardware acceleration.

87

"""

88

89

def __init__(

90

self,

91

n_estimators=100,

92

criterion='squared_error',

93

max_depth=None,

94

min_samples_split=2,

95

min_samples_leaf=1,

96

min_weight_fraction_leaf=0.0,

97

max_features=1.0,

98

max_leaf_nodes=None,

99

min_impurity_decrease=0.0,

100

bootstrap=True,

101

oob_score=False,

102

n_jobs=None,

103

random_state=None,

104

verbose=0,

105

warm_start=False,

106

ccp_alpha=0.0,

107

max_samples=None

108

):

109

"""Initialize Random Forest Regressor with Intel optimization."""

110

111

def fit(self, X, y, sample_weight=None):

112

"""Build forest of trees."""

113

114

def predict(self, X):

115

"""Predict regression target."""

116

117

def score(self, X, y, sample_weight=None):

118

"""Return R² score."""

119

120

# Attributes

121

estimators_: ...

122

feature_importances_: ...

123

n_features_in_: ...

124

oob_score_: ...

125

```

126

127

### Extra Trees Classifier

128

129

Extremely Randomized Trees classifier with Intel optimization.

130

131

```python { .api }

132

class ExtraTreesClassifier:

133

"""

134

Extra Trees classifier with Intel optimization.

135

136

Ensemble method using extremely randomized trees with

137

optimized tree construction algorithms.

138

"""

139

140

def __init__(

141

self,

142

n_estimators=100,

143

criterion='gini',

144

max_depth=None,

145

min_samples_split=2,

146

min_samples_leaf=1,

147

min_weight_fraction_leaf=0.0,

148

max_features='sqrt',

149

max_leaf_nodes=None,

150

min_impurity_decrease=0.0,

151

bootstrap=False,

152

oob_score=False,

153

n_jobs=None,

154

random_state=None,

155

verbose=0,

156

warm_start=False,

157

class_weight=None,

158

ccp_alpha=0.0,

159

max_samples=None

160

):

161

"""Initialize Extra Trees Classifier."""

162

163

def fit(self, X, y, sample_weight=None):

164

"""Build forest of extremely randomized trees."""

165

166

def predict(self, X):

167

"""Predict class for samples."""

168

169

def predict_proba(self, X):

170

"""Predict class probabilities."""

171

172

# Attributes similar to RandomForestClassifier

173

```

174

175

### Extra Trees Regressor

176

177

Extremely Randomized Trees regressor with Intel optimization.

178

179

```python { .api }

180

class ExtraTreesRegressor:

181

"""

182

Extra Trees regressor with Intel optimization.

183

184

Regression ensemble using extremely randomized trees

185

with Intel hardware acceleration.

186

"""

187

188

def __init__(

189

self,

190

n_estimators=100,

191

criterion='squared_error',

192

max_depth=None,

193

min_samples_split=2,

194

min_samples_leaf=1,

195

min_weight_fraction_leaf=0.0,

196

max_features=1.0,

197

max_leaf_nodes=None,

198

min_impurity_decrease=0.0,

199

bootstrap=False,

200

oob_score=False,

201

n_jobs=None,

202

random_state=None,

203

verbose=0,

204

warm_start=False,

205

ccp_alpha=0.0,

206

max_samples=None

207

):

208

"""Initialize Extra Trees Regressor."""

209

210

def fit(self, X, y, sample_weight=None):

211

"""Build forest of extremely randomized trees."""

212

213

def predict(self, X):

214

"""Predict regression target."""

215

216

# Attributes similar to RandomForestRegressor

217

```

218

219

## Usage Examples

220

221

### Random Forest Classification

222

223

```python

224

import numpy as np

225

from sklearnex.ensemble import RandomForestClassifier

226

from sklearn.datasets import make_classification

227

from sklearn.model_selection import train_test_split

228

229

# Generate classification dataset

230

X, y = make_classification(

231

n_samples=1000, n_features=20, n_informative=10,

232

n_redundant=10, n_classes=3, random_state=42

233

)

234

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

235

236

# Create and train Random Forest

237

rf = RandomForestClassifier(

238

n_estimators=100,

239

max_depth=10,

240

random_state=42,

241

n_jobs=-1

242

)

243

rf.fit(X_train, y_train)

244

245

# Make predictions

246

y_pred = rf.predict(X_test)

247

y_proba = rf.predict_proba(X_test)

248

accuracy = rf.score(X_test, y_test)

249

250

print(f"Accuracy: {accuracy:.3f}")

251

print(f"Number of trees: {len(rf.estimators_)}")

252

print(f"Feature importances shape: {rf.feature_importances_.shape}")

253

254

# Top 5 most important features

255

feature_importance = rf.feature_importances_

256

top_features = np.argsort(feature_importance)[-5:][::-1]

257

print(f"Top 5 features: {top_features}")

258

```

259

260

### Random Forest Regression

261

262

```python

263

import numpy as np

264

from sklearnex.ensemble import RandomForestRegressor

265

from sklearn.datasets import make_regression

266

from sklearn.model_selection import train_test_split

267

268

# Generate regression dataset

269

X, y = make_regression(

270

n_samples=1000, n_features=15, noise=0.1, random_state=42

271

)

272

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

273

274

# Create and train Random Forest Regressor

275

rf_reg = RandomForestRegressor(

276

n_estimators=100,

277

max_depth=15,

278

min_samples_split=5,

279

random_state=42,

280

oob_score=True

281

)

282

rf_reg.fit(X_train, y_train)

283

284

# Evaluate model

285

y_pred = rf_reg.predict(X_test)

286

r2_score = rf_reg.score(X_test, y_test)

287

oob_score = rf_reg.oob_score_

288

289

print(f"R² Score: {r2_score:.3f}")

290

print(f"Out-of-bag Score: {oob_score:.3f}")

291

print(f"Feature importances sum: {rf_reg.feature_importances_.sum():.3f}")

292

```

293

294

### Comparing Ensemble Methods

295

296

```python

297

import time

298

import numpy as np

299

from sklearnex.ensemble import RandomForestClassifier, ExtraTreesClassifier

300

from sklearn.datasets import make_classification

301

from sklearn.model_selection import cross_val_score

302

303

# Generate dataset

304

X, y = make_classification(

305

n_samples=2000, n_features=30, n_informative=15,

306

n_classes=4, random_state=42

307

)

308

309

# Compare Random Forest vs Extra Trees

310

models = {

311

'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),

312

'Extra Trees': ExtraTreesClassifier(n_estimators=100, random_state=42)

313

}

314

315

for name, model in models.items():

316

start_time = time.time()

317

318

# Cross-validation

319

scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')

320

321

fit_time = time.time() - start_time

322

323

print(f"{name}:")

324

print(f" Mean CV Accuracy: {scores.mean():.3f} (+/- {scores.std() * 2:.3f})")

325

print(f" Training Time: {fit_time:.2f} seconds")

326

327

# Fit for feature importance analysis

328

model.fit(X, y)

329

print(f" Feature Importance Range: {model.feature_importances_.min():.4f} - {model.feature_importances_.max():.4f}")

330

print()

331

```

332

333

### Performance Comparison with Standard Scikit-learn

334

335

```python

336

import time

337

import numpy as np

338

from sklearn.datasets import make_classification

339

340

# Generate large dataset

341

X, y = make_classification(

342

n_samples=10000, n_features=50, n_informative=25,

343

n_classes=5, random_state=42

344

)

345

346

# Intel-optimized Random Forest

347

from sklearnex.ensemble import RandomForestClassifier as IntelRF

348

349

start_time = time.time()

350

intel_rf = IntelRF(n_estimators=100, random_state=42, n_jobs=-1)

351

intel_rf.fit(X, y)

352

intel_time = time.time() - start_time

353

intel_accuracy = intel_rf.score(X, y)

354

355

print(f"Intel Random Forest:")

356

print(f" Training Time: {intel_time:.2f} seconds")

357

print(f" Accuracy: {intel_accuracy:.3f}")

358

359

# Standard scikit-learn Random Forest (for comparison)

360

from sklearn.ensemble import RandomForestClassifier as StandardRF

361

362

start_time = time.time()

363

standard_rf = StandardRF(n_estimators=100, random_state=42, n_jobs=-1)

364

standard_rf.fit(X, y)

365

standard_time = time.time() - start_time

366

standard_accuracy = standard_rf.score(X, y)

367

368

print(f"\nStandard Random Forest:")

369

print(f" Training Time: {standard_time:.2f} seconds")

370

print(f" Accuracy: {standard_accuracy:.3f}")

371

print(f" Speedup: {standard_time / intel_time:.1f}x")

372

```

373

374

## Performance Notes

375

376

- Significant speedups on datasets with >1000 samples and >10 features

377

- Tree construction is highly optimized with Intel acceleration

378

- Parallel processing scales well with available CPU cores

379

- Memory usage comparable to standard scikit-learn implementations

380

- Feature importance calculations are accelerated

381

- Out-of-bag scoring benefits from optimization when enabled