A comprehensive Python library for detecting anomalous/outlying objects in multivariate data with 45+ algorithms.
—
Combination methods that leverage multiple base detectors to improve detection performance through diversity and aggregation strategies. Ensemble methods often provide more robust and reliable outlier detection than individual models.
Combines multiple base detectors trained on different feature subsets. This approach increases diversity and reduces the impact of irrelevant features on outlier detection.
class FeatureBagging:
def __init__(self, base_estimator=None, n_estimators=10, max_features=1.0,
bootstrap_features=False, check_detector=True, check_estimator=False,
n_jobs=1, random_state=None, combination='average',
verbose=0, estimator_params=None, contamination=0.1):
"""
Parameters:
- base_estimator: Base detector (default: LOF)
- n_estimators (int): Number of estimators in ensemble
- max_features (int or float): Number/fraction of features per estimator
- bootstrap_features (bool): Whether to use bootstrap sampling for features
- n_jobs (int): Number of parallel jobs
- combination (str): Method to combine scores ('average', 'max')
- contamination (float): Proportion of outliers in dataset
- estimator_params (dict): Parameters for base estimator
"""Usage example:
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.lof import LOF
from pyod.utils.data import generate_data
X_train, X_test, y_train, y_test = generate_data(
n_train=500, n_test=200, n_features=10, contamination=0.1, random_state=42
)
# Use LOF as base estimator
clf = FeatureBagging(
base_estimator=LOF(),
n_estimators=10,
max_features=0.7,
contamination=0.1,
n_jobs=2
)
clf.fit(X_train)
y_pred = clf.predict(X_test)Combines multiple detectors by selecting the most competent detector for each data point based on local performance. This adaptive approach leverages detector diversity more effectively.
class LSCP:
def __init__(self, detector_list, local_region_size=30, local_max_features=1.0,
n_bins=10, random_state=None, contamination=0.1):
"""
Parameters:
- detector_list (list): List of fitted detectors to combine
- local_region_size (int): Size of local region for competence estimation
- local_max_features (float): Maximum features for local region construction
- n_bins (int): Number of bins for histogram-based selection
- contamination (float): Proportion of outliers in dataset
"""Usage example:
from pyod.models.lscp import LSCP
from pyod.models.lof import LOF
from pyod.models.iforest import IForest
from pyod.models.ocsvm import OCSVM
# Train base detectors
lof = LOF()
iforest = IForest()
ocsvm = OCSVM()
lof.fit(X_train)
iforest.fit(X_train)
ocsvm.fit(X_train)
# Combine with LSCP
clf = LSCP(
detector_list=[lof, iforest, ocsvm],
local_region_size=30,
contamination=0.1
)
clf.fit(X_train)
y_pred = clf.predict(X_test)PyOD provides several functions for combining outlier scores from multiple detectors:
def average(scores):
"""
Simple average combination of multiple outlier score matrices.
Parameters:
- scores (array): Score matrix of shape (n_samples, n_detectors)
Returns:
- combined_scores (array): Combined outlier scores
"""
def maximization(scores):
"""
Maximization combination: take maximum score across detectors.
Parameters:
- scores (array): Score matrix of shape (n_samples, n_detectors)
Returns:
- combined_scores (array): Combined outlier scores
"""
def aom(scores, n_buckets=5, method='static'):
"""
Average of Maximum: divide detectors into buckets and average the maximum scores.
Parameters:
- scores (array): Score matrix of shape (n_samples, n_detectors)
- n_buckets (int): Number of buckets to divide detectors
- method (str): Bucketing method ('static', 'dynamic')
Returns:
- combined_scores (array): Combined outlier scores
"""
def moa(scores, n_buckets=5, method='static'):
"""
Maximum of Average: take maximum of averaged scores from each bucket.
Parameters:
- scores (array): Score matrix of shape (n_samples, n_detectors)
- n_buckets (int): Number of buckets to divide detectors
- method (str): Bucketing method ('static', 'dynamic')
Returns:
- combined_scores (array): Combined outlier scores
"""
def median(scores):
"""
Median combination of multiple outlier score matrices.
Parameters:
- scores (array): Score matrix of shape (n_samples, n_detectors)
Returns:
- combined_scores (array): Combined outlier scores
"""from pyod.models.combination import average, aom, moa
from pyod.models.lof import LOF
from pyod.models.iforest import IForest
from pyod.models.ocsvm import OCSVM
import numpy as np
# Train multiple detectors
detectors = [LOF(), IForest(), OCSVM()]
for detector in detectors:
detector.fit(X_train)
# Get scores from all detectors
train_scores = np.zeros((len(X_train), len(detectors)))
test_scores = np.zeros((len(X_test), len(detectors)))
for i, detector in enumerate(detectors):
train_scores[:, i] = detector.decision_scores_
test_scores[:, i] = detector.decision_function(X_test)
# Combine scores using different methods
combined_avg = average(test_scores)
combined_max = maximization(test_scores)
combined_aom = aom(test_scores, n_buckets=3)
combined_moa = moa(test_scores, n_buckets=3)from pyod.models.lscp import LSCP
from pyod.models.lof import LOF
from pyod.models.iforest import IForest
from pyod.models.knn import KNN
from pyod.models.ecod import ECOD
# Create diverse set of detectors
detectors = [
LOF(n_neighbors=20),
LOF(n_neighbors=40), # Different parameters
IForest(n_estimators=100),
KNN(n_neighbors=5, method='mean'),
ECOD()
]
# Fit detectors
for detector in detectors:
detector.fit(X_train)
# Use LSCP for adaptive combination
clf = LSCP(
detector_list=detectors,
local_region_size=40,
contamination=0.1
)
clf.fit(X_train)
y_pred = clf.predict(X_test)from pyod.models.feature_bagging import FeatureBagging
from pyod.models.lof import LOF
from pyod.models.iforest import IForest
# Create ensembles of different base detectors
lof_ensemble = FeatureBagging(
base_estimator=LOF(n_neighbors=20),
n_estimators=10,
max_features=0.8,
contamination=0.1
)
iforest_ensemble = FeatureBagging(
base_estimator=IForest(n_estimators=50),
n_estimators=5,
max_features=0.9,
contamination=0.1
)
# Fit ensembles
lof_ensemble.fit(X_train)
iforest_ensemble.fit(X_train)
# Combine ensemble scores
lof_scores = lof_ensemble.decision_function(X_test)
iforest_scores = iforest_ensemble.decision_function(X_test)
ensemble_scores = np.column_stack([lof_scores, iforest_scores])
final_scores = average(ensemble_scores)Install with Tessl CLI
npx tessl i tessl/pypi-pyod