A comprehensive Python library for detecting anomalous/outlying objects in multivariate data with 45+ algorithms.
npx @tessl/cli install tessl/pypi-pyod@2.0.0A comprehensive Python library for detecting anomalous/outlying objects in multivariate data. PyOD provides 45+ detection algorithms ranging from classical methods like Local Outlier Factor (LOF) to cutting-edge approaches like ECOD and deep learning models, all with a unified scikit-learn-compatible interface.
pip install pyod==2.0.5import pyodModels are imported directly from individual files:
from pyod.models.lof import LOF
from pyod.models.iforest import IForest
from pyod.models.ecod import ECODUtilities:
from pyod.utils.data import generate_data, evaluate_print
from pyod.utils.utility import standardizer, score_to_labelfrom pyod.models.lof import LOF
from pyod.utils.data import generate_data, evaluate_print
# Generate sample data
X_train, X_test, y_train, y_test = generate_data(
n_train=200, n_test=100, n_features=2,
contamination=0.1, random_state=42
)
# Initialize and fit detector
clf = LOF(contamination=0.1)
clf.fit(X_train)
# Access fitted results
y_train_pred = clf.labels_ # Training labels (0: inlier, 1: outlier)
y_train_scores = clf.decision_scores_ # Training anomaly scores
threshold = clf.threshold_ # Decision threshold
# Predict on new data
y_test_pred = clf.predict(X_test)
y_test_scores = clf.decision_function(X_test)
y_test_proba = clf.predict_proba(X_test)
# Evaluate results
evaluate_print('LOF', y_test, y_test_scores)PyOD follows a consistent architecture based on the BaseDetector abstract class:
All detectors inherit from BaseDetector and implement the same core methods, ensuring consistent behavior across different algorithms. This design enables easy model comparison, ensemble creation, and integration into machine learning pipelines.
Traditional outlier detection algorithms including Local Outlier Factor, Isolation Forest, One-Class SVM, k-Nearest Neighbors, and statistical methods. These algorithms form the foundation of anomaly detection with proven effectiveness across various domains.
class LOF:
def __init__(self, n_neighbors=20, algorithm='auto', leaf_size=30,
metric='minkowski', p=2, metric_params=None,
contamination=0.1, n_jobs=1, novelty=True, **kwargs): ...
class IForest:
def __init__(self, n_estimators=100, max_samples='auto', contamination=0.1, **kwargs): ...
class OCSVM:
def __init__(self, kernel='rbf', degree=3, gamma='scale', contamination=0.1, **kwargs): ...
class KNN:
def __init__(self, contamination=0.1, n_neighbors=5, method='largest', **kwargs): ...State-of-the-art outlier detection algorithms including ECOD, COPOD, SUOD, and other recent advances. These methods often provide better performance and scalability compared to classical approaches.
class ECOD:
def __init__(self, contamination=0.1, n_jobs=1): ...
class COPOD:
def __init__(self, contamination=0.1, n_jobs=1): ...
class SUOD:
def __init__(self, base_estimators=None, n_jobs=1, contamination=0.1, **kwargs): ...Neural network-based outlier detection including autoencoders, variational autoencoders, Deep SVDD, and generative adversarial models. These models excel with high-dimensional data and complex patterns.
class AutoEncoder:
def __init__(self, hidden_neurons=[64, 32, 32, 64], contamination=0.1, **kwargs): ...
class VAE:
def __init__(self, encoder_neurons=[32, 16], decoder_neurons=[16, 32], contamination=0.1, **kwargs): ...
class DeepSVDD:
def __init__(self, hidden_neurons=[64, 32], contamination=0.1, **kwargs): ...Combination methods that leverage multiple base detectors to improve detection performance through diversity and aggregation strategies.
class FeatureBagging:
def __init__(self, base_estimator=None, n_estimators=10, contamination=0.1, **kwargs): ...
class LSCP:
def __init__(self, detector_list, local_region_size=30, contamination=0.1, **kwargs): ...Comprehensive utilities for data generation, preprocessing, evaluation, and visualization to support the complete outlier detection workflow.
def generate_data(n_train=200, n_test=100, n_features=2, contamination=0.1, **kwargs):
"""Generate synthetic datasets for outlier detection"""
def evaluate_print(clf_name, y, y_scores):
"""Print comprehensive evaluation metrics"""
def standardizer(X, X_t=None, method='minmax', keep_scalar=False):
"""Standardize datasets using various methods"""class BaseDetector:
"""Abstract base class for all outlier detection algorithms."""
def __init__(self, contamination=0.1):
"""
Parameters:
- contamination (float): Proportion of outliers in dataset (0 < contamination <= 0.5)
"""
def fit(self, X, y=None):
"""
Fit detector on training data.
Parameters:
- X (array-like): Training data of shape (n_samples, n_features)
- y: Ignored (present for API consistency)
Returns:
- self: Fitted estimator
"""
def predict(self, X, return_confidence=False):
"""
Binary prediction on test data.
Parameters:
- X (array-like): Test data of shape (n_samples, n_features)
- return_confidence (bool): Whether to return confidence scores
Returns:
- y_pred (array): Binary labels (0: inlier, 1: outlier)
"""
def decision_function(self, X):
"""
Raw anomaly scores on test data.
Parameters:
- X (array-like): Test data of shape (n_samples, n_features)
Returns:
- scores (array): Anomaly scores (higher = more anomalous)
"""
def predict_proba(self, X, method='linear', return_confidence=False):
"""
Probability of being an outlier.
Parameters:
- X (array-like): Test data of shape (n_samples, n_features)
- method (str): Probability conversion method ('linear' or 'unify')
- return_confidence (bool): If True, also return confidence scores
Returns:
- proba (array): Probability matrix of shape (n_samples, 2)
"""
# Fitted attributes (available after calling fit())
decision_scores_: array # Outlier scores of training data
labels_: array # Binary labels of training data
threshold_: float # Decision threshold