Python library for backtesting and analyzing trading strategies at scale
—
Look-ahead analysis tools for generating labels from future price movements, enabling machine learning model training on financial time series data. The labels module provides various methods to create target variables for supervised learning applications in quantitative finance.
Generators for statistical measures computed over future time windows, commonly used for regression and forecasting tasks.
class FMEAN:
"""
Future mean label generator.
Calculates the mean of future values over a specified window,
useful for predicting future average prices or returns.
"""
@classmethod
def run(cls, close, window, **kwargs):
"""
Calculate future mean labels.
Parameters:
- close: pd.Series or pd.DataFrame, price data
- window: int, forward-looking window size
- pct_change: bool, use percentage change (default: False)
Returns:
FMEAN: Label generator with fmean attribute
"""
class FSTD:
"""
Future standard deviation label generator.
Calculates the standard deviation of future values over a window,
useful for volatility prediction and risk modeling.
"""
@classmethod
def run(cls, close, window, **kwargs):
"""
Calculate future standard deviation labels.
Parameters:
- close: pd.Series or pd.DataFrame, price data
- window: int, forward-looking window size
- pct_change: bool, use percentage change (default: False)
- ddof: int, degrees of freedom (default: 1)
Returns:
FSTD: Label generator with fstd attribute
"""
class FMIN:
"""
Future minimum label generator.
Finds the minimum value over future time windows,
useful for support level prediction and drawdown analysis.
"""
@classmethod
def run(cls, close, window, **kwargs):
"""
Calculate future minimum labels.
Parameters:
- close: pd.Series or pd.DataFrame, price data
- window: int, forward-looking window size
- pct_change: bool, use percentage change from current (default: False)
Returns:
FMIN: Label generator with fmin attribute
"""
class FMAX:
"""
Future maximum label generator.
Finds the maximum value over future time windows,
useful for resistance level prediction and profit target analysis.
"""
@classmethod
def run(cls, close, window, **kwargs):
"""
Calculate future maximum labels.
Parameters:
- close: pd.Series or pd.DataFrame, price data
- window: int, forward-looking window size
- pct_change: bool, use percentage change from current (default: False)
Returns:
FMAX: Label generator with fmax attribute
"""Simple labeling methods for basic classification and regression tasks.
class FIXLB:
"""
Fixed label generator.
Generates constant labels across all time periods,
useful for baseline models and control experiments.
"""
@classmethod
def run(cls, shape, value=1, **kwargs):
"""
Generate fixed labels.
Parameters:
- shape: tuple, output shape (n_rows, n_cols)
- value: scalar, fixed label value
- dtype: data type for labels
Returns:
FIXLB: Label generator with fixed labels
"""
class MEANLB:
"""
Mean-based label generator.
Generates labels based on deviations from mean values,
useful for mean reversion strategies and anomaly detection.
"""
@classmethod
def run(cls, close, window, threshold=0, **kwargs):
"""
Generate mean-based labels.
Parameters:
- close: pd.Series or pd.DataFrame, price data
- window: int, rolling window for mean calculation
- threshold: float, threshold for label generation
- above: bool, label when above mean (default: True)
Returns:
MEANLB: Label generator with mean-based labels
"""Advanced labeling methods for ranking and relative performance analysis.
class LEXLB:
"""
Lexicographic label generator.
Generates labels based on lexicographic ordering of multiple criteria,
useful for multi-objective optimization and ranking problems.
"""
@classmethod
def run(cls, *args, **kwargs):
"""
Generate lexicographic labels.
Parameters:
- args: sequence of arrays for lexicographic comparison
- descending: bool, use descending order (default: False)
Returns:
LEXLB: Label generator with lexicographic rankings
"""Sophisticated trend analysis and classification for directional predictions.
class TRENDLB:
"""
Trend-based label generator.
Analyzes price trends over various time horizons and generates
labels for trend direction, strength, and continuation patterns.
"""
@classmethod
def run(cls, close, window=20, mode='binary', **kwargs):
"""
Generate trend-based labels.
Parameters:
- close: pd.Series or pd.DataFrame, price data
- window: int, trend analysis window
- mode: str, trend mode (see TrendMode enum)
- min_pct_change: float, minimum change for trend (default: 0.01)
- smooth_window: int, smoothing window for trend (default: None)
Returns:
TRENDLB: Label generator with trend labels
"""
class TrendMode(IntEnum):
"""
Trend calculation modes for TRENDLB.
Defines different methods for calculating and categorizing trends
in financial time series data.
"""
Binary = 0 # Simple up/down binary classification
BinaryCont = 1 # Binary with continuation signals
BinaryContSat = 2 # Binary with continuation and saturation
PctChange = 3 # Percentage change-based trends
PctChangeNorm = 4 # Normalized percentage change trendsSpecialized generators for binary classification tasks in trading applications.
class BOLB:
"""
Binary outcome label generator.
Generates binary labels for classification tasks such as
profitable/unprofitable trades or directional movements.
"""
@classmethod
def run(cls, close, window, threshold=0, **kwargs):
"""
Generate binary outcome labels.
Parameters:
- close: pd.Series or pd.DataFrame, price data
- window: int, forward-looking window for outcome
- threshold: float, threshold for binary classification
- return_type: str, type of return calculation ('simple', 'log')
- min_periods: int, minimum periods for valid calculation
Returns:
BOLB: Label generator with binary outcome labels
"""import vectorbt as vbt
import pandas as pd
# Download data
data = vbt.YFData.download("AAPL", start="2020-01-01", end="2023-01-01")
close = data.get("Close")
# Generate future statistical labels
future_mean = vbt.FMEAN.run(close, window=5)
future_std = vbt.FSTD.run(close, window=10)
future_min = vbt.FMIN.run(close, window=20, pct_change=True)
future_max = vbt.FMAX.run(close, window=20, pct_change=True)
# Access label values
mean_labels = future_mean.fmean
std_labels = future_std.fstd
min_labels = future_min.fmin # Future minimum % change
max_labels = future_max.fmax # Future maximum % change# Generate trend-based labels with different modes
trend_binary = vbt.TRENDLB.run(
close,
window=20,
mode='binary'
)
trend_pct = vbt.TRENDLB.run(
close,
window=20,
mode='pct_change',
min_pct_change=0.02 # 2% minimum change
)
trend_smooth = vbt.TRENDLB.run(
close,
window=20,
mode='binary_cont',
smooth_window=5
)
# Access trend labels
binary_trends = trend_binary.trend
pct_trends = trend_pct.trend
smooth_trends = trend_smooth.trend# Binary outcome labels for profitable trades
profitable_trades = vbt.BOLB.run(
close,
window=10, # 10-day forward window
threshold=0.05, # 5% profit threshold
return_type='simple'
)
# Mean reversion labels
mean_reversion = vbt.MEANLB.run(
close,
window=20, # 20-day rolling mean
threshold=0.02, # 2% deviation threshold
above=True # Label when above mean
)
# Access binary labels
profit_labels = profitable_trades.labels # True for profitable periods
reversion_labels = mean_reversion.labels # True when above mean# Download multiple assets
symbols = ["AAPL", "GOOGL", "MSFT", "TSLA"]
data = vbt.YFData.download(symbols, start="2020-01-01", end="2023-01-01")
close = data.get("Close")
# Generate labels for all assets
future_returns = {}
trend_labels = {}
for symbol in symbols:
# Future return labels
future_returns[symbol] = vbt.FMEAN.run(
close[symbol],
window=5,
pct_change=True
).fmean
# Trend labels
trend_labels[symbol] = vbt.TRENDLB.run(
close[symbol],
window=20,
mode='binary'
).trend
# Combine into DataFrames
future_returns_df = pd.DataFrame(future_returns)
trend_labels_df = pd.DataFrame(trend_labels)# Generate labels for different time horizons
short_term = vbt.FMAX.run(close, window=5, pct_change=True) # 5-day max return
medium_term = vbt.FMAX.run(close, window=20, pct_change=True) # 20-day max return
long_term = vbt.FMAX.run(close, window=60, pct_change=True) # 60-day max return
# Create multi-horizon labels
horizon_labels = pd.DataFrame({
'short_max': short_term.fmax,
'medium_max': medium_term.fmax,
'long_max': long_term.fmax
})
# Classification thresholds
horizon_labels['short_profitable'] = horizon_labels['short_max'] > 0.03
horizon_labels['medium_profitable'] = horizon_labels['medium_max'] > 0.10
horizon_labels['long_profitable'] = horizon_labels['long_max'] > 0.25import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
# Generate features (indicators)
ma_20 = vbt.MA.run(close, 20).ma
ma_50 = vbt.MA.run(close, 50).ma
rsi = vbt.RSI.run(close, 14).rsi
macd = vbt.MACD.run(close)
# Create feature matrix
features = pd.DataFrame({
'ma_ratio': ma_20 / ma_50,
'rsi': rsi,
'macd': macd.macd,
'macd_signal': macd.signal,
'returns_5d': close.pct_change(5),
'volatility': close.rolling(20).std()
})
# Generate labels
target = vbt.BOLB.run(
close,
window=10,
threshold=0.05, # 5% profit in next 10 days
return_type='simple'
).labels
# Prepare data for ML
X = features.dropna()
y = target.reindex(X.index).dropna()
# Align X and y
common_index = X.index.intersection(y.index)
X = X.loc[common_index]
y = y.loc[common_index]
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Evaluate
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print(f"Train Score: {train_score:.3f}")
print(f"Test Score: {test_score:.3f}")class CustomVolatilityLabel:
"""Custom label for volatility regime classification."""
@classmethod
def run(cls, close, short_window=5, long_window=20, threshold=1.5):
# Calculate short and long-term volatility
short_vol = close.rolling(short_window).std()
long_vol = close.rolling(long_window).std()
# Volatility ratio
vol_ratio = short_vol / long_vol
# Classify regime
labels = pd.Series(0, index=close.index) # Low volatility
labels[vol_ratio > threshold] = 1 # High volatility
labels[vol_ratio > threshold * 1.5] = 2 # Very high volatility
return labels
# Use custom label generator
vol_labels = CustomVolatilityLabel.run(close)Install with Tessl CLI
npx tessl i tessl/pypi-vectorbt