or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced.mdclustering.mddaal4py-mb.mddecomposition.mdensemble.mdindex.mdlinear-models.mdmetrics-model-selection.mdneighbors.mdpatching-config.mdstats-manifold.mdsvm.md

neighbors.mddocs/

0

# Nearest Neighbors

1

2

High-performance implementations of k-nearest neighbors algorithms with Intel hardware acceleration. These algorithms provide significant speedups for classification, regression, and unsupervised neighbor searches on large datasets.

3

4

## Capabilities

5

6

### K-Nearest Neighbors Classifier

7

8

Intel-accelerated k-nearest neighbors classifier with optimized distance calculations and neighbor search algorithms.

9

10

```python { .api }

11

class KNeighborsClassifier:

12

"""

13

K-nearest neighbors classifier with Intel optimization.

14

15

Provides significant speedup over standard scikit-learn implementation

16

through vectorized distance computations and Intel hardware acceleration.

17

"""

18

19

def __init__(

20

self,

21

n_neighbors=5,

22

weights='uniform',

23

algorithm='auto',

24

leaf_size=30,

25

p=2,

26

metric='minkowski',

27

metric_params=None,

28

n_jobs=None

29

):

30

"""

31

Initialize k-nearest neighbors classifier.

32

33

Parameters:

34

n_neighbors (int): Number of neighbors to use

35

weights (str): Weight function ('uniform', 'distance')

36

algorithm (str): Algorithm to compute nearest neighbors

37

leaf_size (int): Leaf size for tree algorithms

38

p (int): Power parameter for Minkowski metric

39

metric (str): Distance metric to use

40

metric_params (dict): Additional parameters for distance metric

41

n_jobs (int): Number of parallel jobs

42

"""

43

44

def fit(self, X, y):

45

"""

46

Fit the k-nearest neighbors classifier.

47

48

Parameters:

49

X (array-like): Training data of shape (n_samples, n_features)

50

y (array-like): Target values of shape (n_samples,)

51

52

Returns:

53

self: Fitted estimator

54

"""

55

56

def predict(self, X):

57

"""

58

Predict class labels for samples.

59

60

Parameters:

61

X (array-like): Test samples of shape (n_samples, n_features)

62

63

Returns:

64

array: Predicted class labels

65

"""

66

67

def predict_proba(self, X):

68

"""

69

Return probability estimates for test data.

70

71

Parameters:

72

X (array-like): Test samples

73

74

Returns:

75

array: Class probabilities of shape (n_samples, n_classes)

76

"""

77

78

def kneighbors(self, X=None, n_neighbors=None, return_distance=True):

79

"""

80

Find k-neighbors of a point.

81

82

Parameters:

83

X (array-like): Query points, or None for training data

84

n_neighbors (int): Number of neighbors to get

85

return_distance (bool): Whether to return distances

86

87

Returns:

88

tuple: (distances, indices) or just indices

89

"""

90

91

def score(self, X, y, sample_weight=None):

92

"""

93

Return mean accuracy on given test data and labels.

94

95

Parameters:

96

X (array-like): Test samples

97

y (array-like): True labels

98

sample_weight (array-like): Sample weights

99

100

Returns:

101

float: Mean accuracy score

102

"""

103

104

# Attributes available after fitting

105

classes_: ... # Unique class labels

106

effective_metric_: ... # Effective distance metric used

107

effective_metric_params_: ... # Effective additional metric parameters

108

n_features_in_: ... # Number of features in training data

109

n_samples_fit_: ... # Number of samples in training data

110

```

111

112

### K-Nearest Neighbors Regressor

113

114

Intel-accelerated k-nearest neighbors regressor for continuous target prediction.

115

116

```python { .api }

117

class KNeighborsRegressor:

118

"""

119

K-nearest neighbors regressor with Intel optimization.

120

121

Efficient regression based on k-nearest neighbors with optimized

122

distance calculations and neighbor averaging.

123

"""

124

125

def __init__(

126

self,

127

n_neighbors=5,

128

weights='uniform',

129

algorithm='auto',

130

leaf_size=30,

131

p=2,

132

metric='minkowski',

133

metric_params=None,

134

n_jobs=None

135

):

136

"""

137

Initialize k-nearest neighbors regressor.

138

139

Parameters:

140

n_neighbors (int): Number of neighbors to use

141

weights (str): Weight function ('uniform', 'distance')

142

algorithm (str): Algorithm to compute nearest neighbors

143

leaf_size (int): Leaf size for tree algorithms

144

p (int): Power parameter for Minkowski metric

145

metric (str): Distance metric to use

146

metric_params (dict): Additional parameters for distance metric

147

n_jobs (int): Number of parallel jobs

148

"""

149

150

def fit(self, X, y):

151

"""

152

Fit the k-nearest neighbors regressor.

153

154

Parameters:

155

X (array-like): Training data of shape (n_samples, n_features)

156

y (array-like): Target values of shape (n_samples,) or (n_samples, n_outputs)

157

158

Returns:

159

self: Fitted estimator

160

"""

161

162

def predict(self, X):

163

"""

164

Predict target values for samples.

165

166

Parameters:

167

X (array-like): Test samples of shape (n_samples, n_features)

168

169

Returns:

170

array: Predicted target values

171

"""

172

173

def kneighbors(self, X=None, n_neighbors=None, return_distance=True):

174

"""

175

Find k-neighbors of a point.

176

177

Parameters:

178

X (array-like): Query points, or None for training data

179

n_neighbors (int): Number of neighbors to get

180

return_distance (bool): Whether to return distances

181

182

Returns:

183

tuple: (distances, indices) or just indices

184

"""

185

186

def score(self, X, y, sample_weight=None):

187

"""

188

Return coefficient of determination R^2 of prediction.

189

190

Parameters:

191

X (array-like): Test samples

192

y (array-like): True values

193

sample_weight (array-like): Sample weights

194

195

Returns:

196

float: R^2 score

197

"""

198

199

# Attributes available after fitting

200

effective_metric_: ... # Effective distance metric used

201

effective_metric_params_: ... # Effective additional metric parameters

202

n_features_in_: ... # Number of features in training data

203

n_samples_fit_: ... # Number of samples in training data

204

```

205

206

### Nearest Neighbors (Unsupervised)

207

208

Intel-accelerated unsupervised nearest neighbors for neighbor queries without target labels.

209

210

```python { .api }

211

class NearestNeighbors:

212

"""

213

Unsupervised nearest neighbors with Intel optimization.

214

215

Efficient neighbor search algorithm for finding nearest neighbors

216

without requiring target labels.

217

"""

218

219

def __init__(

220

self,

221

n_neighbors=5,

222

radius=1.0,

223

algorithm='auto',

224

leaf_size=30,

225

metric='minkowski',

226

p=2,

227

metric_params=None,

228

n_jobs=None

229

):

230

"""

231

Initialize nearest neighbors searcher.

232

233

Parameters:

234

n_neighbors (int): Number of neighbors to use

235

radius (float): Range of parameter space for radius neighbors

236

algorithm (str): Algorithm to compute nearest neighbors

237

leaf_size (int): Leaf size for tree algorithms

238

metric (str): Distance metric to use

239

p (int): Power parameter for Minkowski metric

240

metric_params (dict): Additional parameters for distance metric

241

n_jobs (int): Number of parallel jobs

242

"""

243

244

def fit(self, X, y=None):

245

"""

246

Fit the nearest neighbors estimator.

247

248

Parameters:

249

X (array-like): Training data of shape (n_samples, n_features)

250

y: Ignored, present for API consistency

251

252

Returns:

253

self: Fitted estimator

254

"""

255

256

def kneighbors(self, X=None, n_neighbors=None, return_distance=True):

257

"""

258

Find k-neighbors of a point.

259

260

Parameters:

261

X (array-like): Query points, or None for training data

262

n_neighbors (int): Number of neighbors to get

263

return_distance (bool): Whether to return distances

264

265

Returns:

266

tuple: (distances, indices) or just indices

267

"""

268

269

def radius_neighbors(self, X=None, radius=None, return_distance=True, sort_results=False):

270

"""

271

Find neighbors within given radius.

272

273

Parameters:

274

X (array-like): Query points, or None for training data

275

radius (float): Limiting distance of neighbors to return

276

return_distance (bool): Whether to return distances

277

sort_results (bool): Whether to sort results by distance

278

279

Returns:

280

tuple: (distances, indices) arrays of neighbors

281

"""

282

283

def kneighbors_graph(self, X=None, n_neighbors=None, mode='connectivity'):

284

"""

285

Compute k-neighbors connectivity graph.

286

287

Parameters:

288

X (array-like): Query points, or None for training data

289

n_neighbors (int): Number of neighbors for each sample

290

mode (str): Type of returned matrix ('connectivity', 'distance')

291

292

Returns:

293

sparse matrix: Graph representing k-neighbors connectivity

294

"""

295

296

def radius_neighbors_graph(self, X=None, radius=None, mode='connectivity', sort_results=False):

297

"""

298

Compute radius-based neighbors connectivity graph.

299

300

Parameters:

301

X (array-like): Query points, or None for training data

302

radius (float): Radius of neighborhoods

303

mode (str): Type of returned matrix ('connectivity', 'distance')

304

sort_results (bool): Whether to sort results by distance

305

306

Returns:

307

sparse matrix: Graph representing radius neighbors connectivity

308

"""

309

310

# Attributes available after fitting

311

effective_metric_: ... # Effective distance metric used

312

effective_metric_params_: ... # Effective additional metric parameters

313

n_features_in_: ... # Number of features in training data

314

n_samples_fit_: ... # Number of samples in training data

315

```

316

317

### Local Outlier Factor

318

319

Intel-accelerated Local Outlier Factor for unsupervised outlier detection.

320

321

```python { .api }

322

class LocalOutlierFactor:

323

"""

324

Local Outlier Factor with Intel optimization.

325

326

Efficient unsupervised outlier detection using local density

327

deviation of given data point with respect to its neighbors.

328

"""

329

330

def __init__(

331

self,

332

n_neighbors=20,

333

algorithm='auto',

334

leaf_size=30,

335

metric='minkowski',

336

p=2,

337

metric_params=None,

338

contamination='auto',

339

novelty=False,

340

n_jobs=None

341

):

342

"""

343

Initialize Local Outlier Factor.

344

345

Parameters:

346

n_neighbors (int): Number of neighbors to use

347

algorithm (str): Algorithm to compute nearest neighbors

348

leaf_size (int): Leaf size for tree algorithms

349

metric (str): Distance metric to use

350

p (int): Power parameter for Minkowski metric

351

metric_params (dict): Additional parameters for distance metric

352

contamination (str or float): Proportion of outliers in data set

353

novelty (bool): Whether to use LOF for novelty detection

354

n_jobs (int): Number of parallel jobs

355

"""

356

357

def fit(self, X, y=None):

358

"""

359

Fit the Local Outlier Factor detector.

360

361

Parameters:

362

X (array-like): Training data of shape (n_samples, n_features)

363

y: Ignored, present for API consistency

364

365

Returns:

366

self: Fitted estimator

367

"""

368

369

def fit_predict(self, X, y=None):

370

"""

371

Fit detector and return binary labels (1 for inliers, -1 for outliers).

372

373

Parameters:

374

X (array-like): Training data

375

y: Ignored

376

377

Returns:

378

array: Binary labels for each sample

379

"""

380

381

def decision_function(self, X):

382

"""

383

Shifted opposite of Local Outlier Factor of X.

384

385

Parameters:

386

X (array-like): Query samples

387

388

Returns:

389

array: Shifted opposite of LOF scores

390

"""

391

392

def score_samples(self, X):

393

"""

394

Opposite of Local Outlier Factor of X.

395

396

Parameters:

397

X (array-like): Query samples

398

399

Returns:

400

array: Opposite of LOF scores

401

"""

402

403

def predict(self, X):

404

"""

405

Predict raw anomaly score of X using fitted detector.

406

407

Only available when novelty=True.

408

409

Parameters:

410

X (array-like): Query samples

411

412

Returns:

413

array: Anomaly scores for each sample

414

"""

415

416

def kneighbors(self, X=None, n_neighbors=None, return_distance=True):

417

"""

418

Find k-neighbors of a point.

419

420

Parameters:

421

X (array-like): Query points, or None for training data

422

n_neighbors (int): Number of neighbors to get

423

return_distance (bool): Whether to return distances

424

425

Returns:

426

tuple: (distances, indices) or just indices

427

"""

428

429

# Attributes available after fitting

430

negative_outlier_factor_: ... # Opposite of LOF of training samples

431

n_neighbors_: ... # Actual number of neighbors used

432

offset_: ... # Offset used to obtain binary labels

433

effective_metric_: ... # Effective distance metric used

434

effective_metric_params_: ... # Effective additional metric parameters

435

n_features_in_: ... # Number of features in training data

436

n_samples_fit_: ... # Number of samples in training data

437

```

438

439

## Usage Examples

440

441

### K-Nearest Neighbors Classification

442

443

```python

444

import numpy as np

445

from sklearnex.neighbors import KNeighborsClassifier

446

from sklearn.datasets import make_classification

447

from sklearn.model_selection import train_test_split

448

449

# Generate sample data

450

X, y = make_classification(n_samples=1000, n_features=20, n_informative=10,

451

n_redundant=10, n_classes=3, random_state=42)

452

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

453

454

# Create and fit KNN classifier

455

knn = KNeighborsClassifier(n_neighbors=5, weights='distance')

456

knn.fit(X_train, y_train)

457

458

# Make predictions

459

y_pred = knn.predict(X_test)

460

y_proba = knn.predict_proba(X_test)

461

462

print(f"Accuracy: {knn.score(X_test, y_test):.3f}")

463

print(f"Classes: {knn.classes_}")

464

print(f"Prediction shape: {y_pred.shape}")

465

print(f"Probability shape: {y_proba.shape}")

466

467

# Find neighbors for specific points

468

distances, indices = knn.kneighbors(X_test[:5], n_neighbors=3)

469

print(f"Neighbor distances shape: {distances.shape}")

470

print(f"Neighbor indices shape: {indices.shape}")

471

```

472

473

### K-Nearest Neighbors Regression

474

475

```python

476

import numpy as np

477

from sklearnex.neighbors import KNeighborsRegressor

478

from sklearn.datasets import make_regression

479

from sklearn.model_selection import train_test_split

480

from sklearn.metrics import mean_squared_error, r2_score

481

482

# Generate sample data

483

X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)

484

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

485

486

# Create and fit KNN regressor

487

knn_reg = KNeighborsRegressor(n_neighbors=5, weights='distance')

488

knn_reg.fit(X_train, y_train)

489

490

# Make predictions

491

y_pred = knn_reg.predict(X_test)

492

493

print(f"R² Score: {knn_reg.score(X_test, y_test):.3f}")

494

print(f"MSE: {mean_squared_error(y_test, y_pred):.3f}")

495

print(f"Features in training: {knn_reg.n_features_in_}")

496

print(f"Samples in training: {knn_reg.n_samples_fit_}")

497

498

# Multi-output regression example

499

y_multi = np.column_stack([y, y * 0.5 + np.random.normal(0, 0.1, len(y))])

500

X_train, X_test, y_train_multi, y_test_multi = train_test_split(X, y_multi, test_size=0.2, random_state=42)

501

502

knn_multi = KNeighborsRegressor(n_neighbors=3)

503

knn_multi.fit(X_train, y_train_multi)

504

y_pred_multi = knn_multi.predict(X_test)

505

506

print(f"Multi-output prediction shape: {y_pred_multi.shape}")

507

```

508

509

### Unsupervised Nearest Neighbors

510

511

```python

512

import numpy as np

513

from sklearnex.neighbors import NearestNeighbors

514

from sklearn.datasets import make_blobs

515

516

# Generate sample data

517

X, _ = make_blobs(n_samples=500, centers=5, n_features=10, random_state=42)

518

519

# Create and fit nearest neighbors model

520

nn = NearestNeighbors(n_neighbors=10, metric='euclidean')

521

nn.fit(X)

522

523

# Find k-nearest neighbors

524

distances, indices = nn.kneighbors(X[:5])

525

print(f"Distances shape: {distances.shape}")

526

print(f"Indices shape: {indices.shape}")

527

528

# Find radius neighbors

529

radius_distances, radius_indices = nn.radius_neighbors(X[:3], radius=2.0)

530

print(f"Radius neighbors found for first 3 points:")

531

for i, (dists, idxs) in enumerate(zip(radius_distances, radius_indices)):

532

print(f" Point {i}: {len(idxs)} neighbors within radius 2.0")

533

534

# Create connectivity graphs

535

knn_graph = nn.kneighbors_graph(X[:10], n_neighbors=3, mode='connectivity')

536

print(f"KNN graph shape: {knn_graph.shape}")

537

print(f"KNN graph density: {knn_graph.nnz / (knn_graph.shape[0] * knn_graph.shape[1]):.3f}")

538

539

radius_graph = nn.radius_neighbors_graph(X[:10], radius=1.5, mode='distance')

540

print(f"Radius graph shape: {radius_graph.shape}")

541

print(f"Radius graph density: {radius_graph.nnz / (radius_graph.shape[0] * radius_graph.shape[1]):.3f}")

542

```

543

544

### Local Outlier Factor for Outlier Detection

545

546

```python

547

import numpy as np

548

from sklearnex.neighbors import LocalOutlierFactor

549

from sklearn.datasets import make_blobs

550

551

# Generate normal data with some outliers

552

X_inliers, _ = make_blobs(n_samples=200, centers=1, cluster_std=0.5, random_state=42)

553

X_outliers = np.random.uniform(low=-6, high=6, size=(20, 2))

554

X = np.concatenate([X_inliers, X_outliers])

555

556

# Outlier detection (unsupervised)

557

lof = LocalOutlierFactor(n_neighbors=20, contamination=0.1)

558

y_pred = lof.fit_predict(X)

559

560

# Count inliers and outliers

561

n_outliers = np.sum(y_pred == -1)

562

n_inliers = np.sum(y_pred == 1)

563

564

print(f"Outliers detected: {n_outliers}")

565

print(f"Inliers detected: {n_inliers}")

566

print(f"Outlier factor shape: {lof.negative_outlier_factor_.shape}")

567

568

# Get outlier scores

569

outlier_scores = lof.negative_outlier_factor_

570

print(f"Score range: [{outlier_scores.min():.3f}, {outlier_scores.max():.3f}]")

571

572

# Novelty detection example

573

X_train = X_inliers # Train only on inliers

574

X_test = np.concatenate([X_inliers[:50], X_outliers[:10]]) # Test on mix

575

576

lof_novelty = LocalOutlierFactor(n_neighbors=20, novelty=True, contamination=0.1)

577

lof_novelty.fit(X_train)

578

579

# Predict on new data

580

y_pred_novelty = lof_novelty.predict(X_test)

581

decision_scores = lof_novelty.decision_function(X_test)

582

583

print(f"Novelty detection - Outliers: {np.sum(y_pred_novelty == -1)}")

584

print(f"Decision scores range: [{decision_scores.min():.3f}, {decision_scores.max():.3f}]")

585

```

586

587

### Performance Comparison

588

589

```python

590

import time

591

import numpy as np

592

from sklearn.datasets import make_classification

593

594

# Generate large dataset

595

X, y = make_classification(n_samples=50000, n_features=20, n_informative=15,

596

n_redundant=5, n_classes=5, random_state=42)

597

X_train, X_test = X[:40000], X[40000:]

598

y_train, y_test = y[:40000], y[40000:]

599

600

# Intel-optimized version

601

from sklearnex.neighbors import KNeighborsClassifier as IntelKNN

602

603

start_time = time.time()

604

intel_knn = IntelKNN(n_neighbors=5)

605

intel_knn.fit(X_train, y_train)

606

intel_pred = intel_knn.predict(X_test)

607

intel_time = time.time() - start_time

608

609

print(f"Intel KNN time: {intel_time:.2f} seconds")

610

print(f"Intel KNN accuracy: {intel_knn.score(X_test, y_test):.3f}")

611

612

# Standard scikit-learn version (for comparison)

613

from sklearn.neighbors import KNeighborsClassifier as StandardKNN

614

615

start_time = time.time()

616

standard_knn = StandardKNN(n_neighbors=5)

617

standard_knn.fit(X_train, y_train)

618

standard_pred = standard_knn.predict(X_test)

619

standard_time = time.time() - start_time

620

621

print(f"Standard KNN time: {standard_time:.2f} seconds")

622

print(f"Standard KNN accuracy: {standard_knn.score(X_test, y_test):.3f}")

623

print(f"Speedup: {standard_time / intel_time:.1f}x")

624

625

# Verify results are identical

626

print(f"Predictions identical: {np.allclose(intel_pred, standard_pred)}")

627

```

628

629

## Performance Notes

630

631

- K-nearest neighbors algorithms show significant speedups on datasets with >5000 samples

632

- Distance calculations benefit most from Intel optimization, especially with high-dimensional data

633

- LOF shows substantial improvements on datasets with >10000 samples

634

- Memory usage is comparable to standard scikit-learn versions

635

- All algorithms maintain identical results to scikit-learn implementations