A comprehensive Python library for detecting anomalous/outlying objects in multivariate data with 45+ algorithms.
—
Neural network-based outlier detection methods that excel with high-dimensional data and complex patterns. PyOD provides a comprehensive PyTorch-based framework with 12+ deep learning models for anomaly detection.
Uses reconstruction error from a neural autoencoder as the outlier score. Normal data should reconstruct well, while outliers will have high reconstruction errors.
class AutoEncoder:
def __init__(self, hidden_neuron_list=[64, 32], hidden_activation='relu',
output_activation='sigmoid', loss='mse', optimizer='adam',
epochs=100, batch_size=32, dropout_rate=0.2,
l2_regularizer=0.1, validation_size=0.1, preprocessing=True,
verbose=1, random_state=None, contamination=0.1):
"""
Parameters:
- hidden_neuron_list (list): Number of neurons per hidden layer
- hidden_activation (str): Activation function for hidden layers
- output_activation (str): Activation function for output layer
- loss (str): Loss function ('mse', 'mae')
- optimizer (str): Optimizer ('adam', 'sgd', 'rmsprop')
- epochs (int): Number of training epochs
- batch_size (int): Training batch size
- dropout_rate (float): Dropout rate for regularization
- l2_regularizer (float): L2 regularization strength
- validation_size (float): Fraction of data for validation
- contamination (float): Proportion of outliers in dataset
"""Usage example:
from pyod.models.auto_encoder import AutoEncoder
from pyod.utils.data import generate_data
X_train, X_test, y_train, y_test = generate_data(
n_train=500, n_test=200, n_features=10, contamination=0.1, random_state=42
)
clf = AutoEncoder(
hidden_neuron_list=[64, 32, 16, 32, 64],
epochs=100,
batch_size=32,
contamination=0.1
)
clf.fit(X_train)
y_pred = clf.predict(X_test)Uses the reconstruction probability from a variational autoencoder, incorporating uncertainty in the latent representation for more robust anomaly detection.
class VAE:
def __init__(self, encoder_neurons=[32, 16], decoder_neurons=[16, 32],
latent_dim=2, hidden_activation='relu', output_activation='sigmoid',
loss='mse', optimizer='adam', epochs=100, batch_size=32,
dropout_rate=0.2, l2_regularizer=0.1, validation_size=0.1,
preprocessing=True, verbose=1, random_state=None,
contamination=0.1, gamma=1.0, capacity=0.0):
"""
Parameters:
- encoder_neurons (list): Neurons in encoder layers
- decoder_neurons (list): Neurons in decoder layers
- latent_dim (int): Dimensionality of latent space
- gamma (float): Weight for KL divergence loss
- capacity (float): Capacity parameter for β-VAE
- Other parameters same as AutoEncoder
"""Trains a neural network to map normal data to a hypersphere with minimal volume. Points far from the hypersphere center are considered outliers.
class DeepSVDD:
def __init__(self, c=None, use_ae=False, hidden_neurons=[64, 32],
hidden_activation='relu', output_activation='linear',
optimizer='adam', epochs=100, batch_size=32, dropout_rate=0.2,
l2_regularizer=0.1, validation_size=0.1, preprocessing=True,
verbose=1, random_state=None, contamination=0.1):
"""
Parameters:
- c (array): Center of hypersphere (computed automatically if None)
- use_ae (bool): Whether to pre-train with autoencoder
- hidden_neurons (list): Number of neurons per hidden layer
- Other parameters same as AutoEncoder
"""Uses generative adversarial networks with active learning to improve outlier detection by generating synthetic outliers.
class SO_GAAL:
def __init__(self, contamination=0.1, stop_epochs=20, lr_d=0.01, lr_g=0.0001,
decay=1e-6, momentum=0.9, verbose=0):
"""
Parameters:
- contamination (float): Proportion of outliers in dataset
- stop_epochs (int): Number of epochs for early stopping
- lr_d (float): Learning rate for discriminator
- lr_g (float): Learning rate for generator
- decay (float): Learning rate decay
- momentum (float): Momentum for optimization
- verbose (int): Verbosity level
"""Extends SO_GAAL with multiple objectives to improve the diversity and quality of generated outliers.
class MO_GAAL:
def __init__(self, k=10, stop_epochs=20, lr_d=0.01, lr_g=0.0001,
decay=1e-6, momentum=0.9, contamination=0.1, verbose=0):
"""
Parameters:
- k (int): Number of sub-generators
- stop_epochs (int): Number of epochs for early stopping
- lr_d (float): Learning rate for discriminator
- lr_g (float): Learning rate for generator
- contamination (float): Proportion of outliers in dataset
"""Bidirectional generative adversarial network that learns to map data to latent space and back, using reconstruction errors for anomaly detection.
class ALAD:
def __init__(self, contamination=0.1, preprocessing=True, lr_d=0.0001,
lr_g=0.0001, decay=1e-6, momentum=0.9, epoch_num=500,
verbose=0, device=None):
"""
Parameters:
- contamination (float): Proportion of outliers in dataset
- preprocessing (bool): Whether to preprocess data
- lr_d (float): Learning rate for discriminator
- lr_g (float): Learning rate for generator
- epoch_num (int): Number of training epochs
- device (str): PyTorch device ('cpu', 'cuda')
"""Uses a GAN trained on normal data and detects anomalies based on reconstruction error and discrimination scores.
class AnoGAN:
def __init__(self, contamination=0.1, preprocessing=True, lr_d=0.0001,
lr_g=0.0001, decay=1e-6, momentum=0.9, epoch_num=500,
verbose=0, device=None):
"""
Parameters:
- contamination (float): Proportion of outliers in dataset
- preprocessing (bool): Whether to preprocess data
- lr_d (float): Learning rate for discriminator
- lr_g (float): Learning rate for generator
- epoch_num (int): Number of training epochs
- device (str): PyTorch device ('cpu', 'cuda')
"""Deep anomaly detection network that uses deviation loss to explicitly optimize for anomaly detection rather than reconstruction.
class DevNet:
def __init__(self, contamination=0.1, preprocessing=True, lr_d=0.0001,
epochs=100, verbose=0, device=None):
"""
Parameters:
- contamination (float): Proportion of outliers in dataset
- preprocessing (bool): Whether to preprocess data
- lr_d (float): Learning rate
- epochs (int): Number of training epochs
- device (str): PyTorch device ('cpu', 'cuda')
"""Combines the benefits of Isolation Forest with deep learning by using neural networks to create better splitting criteria.
class DIF:
def __init__(self, n_ensemble=50, n_estimators=6, max_samples=256,
max_depth=8, contamination=0.1, random_state=None,
device=None):
"""
Parameters:
- n_ensemble (int): Number of ensemble models
- n_estimators (int): Number of estimators per ensemble
- max_samples (int): Maximum samples per estimator
- max_depth (int): Maximum depth of isolation trees
- contamination (float): Proportion of outliers in dataset
- device (str): PyTorch device ('cpu', 'cuda')
"""class AE1SVM:
"""Autoencoder + One-Class SVM combination"""
def __init__(self, contamination=0.1, preprocessing=True, **kwargs): ...
class XGBOD:
"""Extreme Gradient Boosting Outlier Detection"""
def __init__(self, contamination=0.1, max_depth=3, learning_rate=0.1, **kwargs): ...Deep learning models require more careful parameter tuning and longer training times:
from pyod.models.auto_encoder import AutoEncoder
from pyod.utils.data import generate_data
import numpy as np
# Generate higher-dimensional data for deep learning
X_train, X_test, y_train, y_test = generate_data(
n_train=1000, n_test=300, n_features=20,
contamination=0.1, random_state=42
)
# Configure autoencoder architecture
clf = AutoEncoder(
hidden_neuron_list=[32, 16, 8, 16, 32], # Encoder-decoder architecture
hidden_activation='relu',
output_activation='sigmoid',
loss='mse',
optimizer='adam',
epochs=100,
batch_size=32,
dropout_rate=0.1,
l2_regularizer=0.01,
validation_size=0.1,
preprocessing=True,
contamination=0.1,
verbose=1
)
# Fit with early stopping based on validation loss
clf.fit(X_train)
# Get predictions and scores
y_pred = clf.predict(X_test)
scores = clf.decision_function(X_test)Deep learning models benefit significantly from GPU acceleration:
# Enable GPU if available
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
clf = AutoEncoder(
hidden_neuron_list=[64, 32, 16, 32, 64],
epochs=100,
device=device, # Specify device for PyTorch models
contamination=0.1
)Install with Tessl CLI
npx tessl i tessl/pypi-pyod