Python library for backtesting and analyzing trading strategies at scale
—
Unified data acquisition and management system supporting multiple financial data providers with automatic synchronization, caching, and preprocessing capabilities. The data module provides consistent interfaces for accessing market data from various sources.
Access to Yahoo Finance historical and real-time market data with automatic caching and data validation.
class YFData:
"""
Yahoo Finance data provider with caching and update capabilities.
Provides access to historical OHLCV data, dividends, stock splits,
and basic fundamental data from Yahoo Finance.
"""
@classmethod
def download(cls, symbols, start=None, end=None, **kwargs):
"""
Download historical data from Yahoo Finance.
Parameters:
- symbols: str or list, ticker symbols to download
- start: str or datetime, start date (default: 1 year ago)
- end: str or datetime, end date (default: today)
- period: str, period instead of start/end ('1d', '5d', '1mo', etc.)
- interval: str, data interval ('1d', '1h', '5m', etc.)
- auto_adjust: bool, adjust OHLC for splits/dividends (default: True)
- prepost: bool, include pre/post market data (default: False)
- threads: bool, use threading for multiple symbols (default: True)
Returns:
YFData: Data instance with downloaded data
"""
def get(self, column=None):
"""
Get data columns.
Parameters:
- column: str, column name ('Open', 'High', 'Low', 'Close', 'Volume')
Returns:
pd.DataFrame or pd.Series: Requested data
"""
def update(self, **kwargs):
"""
Update data with latest available data.
Returns:
YFData: Updated data instance
"""
def save(self, path):
"""Save data to file."""
@classmethod
def load(cls, path):
"""Load data from file."""Access to Binance cryptocurrency exchange data including spot and futures markets.
class BinanceData:
"""
Binance exchange data provider for cryptocurrency markets.
Supports spot and futures data with various intervals and
comprehensive symbol coverage.
"""
@classmethod
def download(cls, symbols, start=None, end=None, **kwargs):
"""
Download data from Binance.
Parameters:
- symbols: str or list, trading pairs (e.g., 'BTCUSDT')
- start: str or datetime, start date
- end: str or datetime, end date
- interval: str, kline interval ('1m', '5m', '1h', '1d', etc.)
- market: str, market type ('spot', 'futures')
Returns:
BinanceData: Data instance with downloaded data
"""
def get(self, column=None):
"""Get data columns."""
def update(self, **kwargs):
"""Update with latest data."""Universal cryptocurrency exchange data access through the CCXT library supporting 100+ exchanges.
class CCXTData:
"""
Universal cryptocurrency exchange data via CCXT library.
Provides unified access to data from 100+ cryptocurrency exchanges
with consistent interface and automatic rate limiting.
"""
@classmethod
def download(cls, symbols, start=None, end=None, exchange='binance', **kwargs):
"""
Download data from CCXT-supported exchange.
Parameters:
- symbols: str or list, trading pairs
- start: str or datetime, start date
- end: str or datetime, end date
- exchange: str, exchange name (e.g., 'binance', 'coinbase')
- timeframe: str, timeframe ('1m', '5m', '1h', '1d', etc.)
Returns:
CCXTData: Data instance with exchange data
"""
def get_exchanges(self):
"""Get list of supported exchanges."""
def get_symbols(self, exchange):
"""Get available symbols for exchange."""Access to Alpaca trading API for US equities and ETFs with commission-free trading integration.
class AlpacaData:
"""
Alpaca trading API data provider.
Provides access to US equity and ETF data with real-time and
historical data capabilities.
"""
@classmethod
def download(cls, symbols, start=None, end=None, **kwargs):
"""
Download data from Alpaca.
Parameters:
- symbols: str or list, US equity symbols
- start: str or datetime, start date
- end: str or datetime, end date
- timeframe: str, bar timeframe ('1Min', '5Min', '1Hour', '1Day')
- api_key: str, Alpaca API key
- secret_key: str, Alpaca secret key
- paper: bool, use paper trading endpoint (default: True)
Returns:
AlpacaData: Data instance with Alpaca data
"""Core data management functionality providing the foundation for all data sources.
class Data:
"""
Base data management class.
Provides common functionality for data storage, manipulation,
and preprocessing across all data sources.
"""
def __init__(self, data, **kwargs):
"""
Initialize data instance.
Parameters:
- data: pd.DataFrame, market data
- symbols: list, symbol names
- wrapper: ArrayWrapper, data wrapper configuration
"""
def get(self, column=None, **kwargs):
"""
Get data columns with optional preprocessing.
Parameters:
- column: str or list, column names to retrieve
Returns:
pd.DataFrame or pd.Series: Requested data
"""
def resample(self, freq, **kwargs):
"""
Resample data to different frequency.
Parameters:
- freq: str, target frequency ('1H', '1D', '1W', etc.)
Returns:
Data: Resampled data instance
"""
def dropna(self, **kwargs):
"""Remove missing values."""
def fillna(self, method='ffill', **kwargs):
"""Fill missing values."""
class DataUpdater:
"""
Data updating and synchronization utilities.
Handles incremental data updates, cache management,
and data validation across multiple sources.
"""
def __init__(self, data_cls, **kwargs):
"""Initialize updater for specific data class."""
def update(self, **kwargs):
"""Update data with latest available."""
def schedule_update(self, freq, **kwargs):
"""Schedule automatic data updates."""Tools for generating synthetic market data for strategy testing and Monte Carlo simulations.
class SyntheticData:
"""
Base class for synthetic data generation.
Provides framework for creating artificial market data
with specified statistical properties.
"""
def generate(self, n_samples, **kwargs):
"""
Generate synthetic data.
Parameters:
- n_samples: int, number of samples to generate
Returns:
pd.DataFrame: Generated synthetic data
"""
class GBMData:
"""
Geometric Brownian Motion data generator.
Generates synthetic price data following GBM process,
commonly used for option pricing and Monte Carlo simulations.
"""
@classmethod
def generate(cls, n_samples, start_price=100, mu=0.05, sigma=0.2, **kwargs):
"""
Generate GBM price series.
Parameters:
- n_samples: int, number of time steps
- start_price: float, initial price
- mu: float, drift rate (annualized)
- sigma: float, volatility (annualized)
- dt: float, time step (default: 1/252 for daily)
- seed: int, random seed for reproducibility
Returns:
pd.Series: Generated price series
"""Helper functions for data processing and symbol management.
def symbol_dict(*args, **kwargs):
"""
Create symbol dictionary for multi-symbol operations.
Parameters:
- args: symbol specifications
- kwargs: symbol name mappings
Returns:
dict: Symbol mapping dictionary
"""import vectorbt as vbt
# Download single symbol
data = vbt.YFData.download("AAPL", start="2020-01-01", end="2023-01-01")
close = data.get("Close")
# Download multiple symbols
symbols = ["AAPL", "GOOGL", "MSFT"]
data = vbt.YFData.download(symbols, period="2y")
close = data.get("Close")
# Access OHLCV data
ohlcv = data.get() # All columns
volume = data.get("Volume")# Binance spot data
btc_data = vbt.BinanceData.download(
"BTCUSDT",
start="2023-01-01",
interval="1h"
)
# Multiple exchanges via CCXT
exchanges = ["binance", "coinbase", "kraken"]
btc_prices = {}
for exchange in exchanges:
data = vbt.CCXTData.download(
"BTC/USDT",
start="2023-01-01",
exchange=exchange,
timeframe="1d"
)
btc_prices[exchange] = data.get("Close")# Initial download with caching
data = vbt.YFData.download("AAPL", start="2020-01-01")
# Update with latest data
updated_data = data.update()
# Save and load data
data.save("aapl_data.pkl")
loaded_data = vbt.YFData.load("aapl_data.pkl")
# Automatic updates
updater = vbt.DataUpdater(vbt.YFData, symbols="AAPL")
updater.schedule_update(freq="1H") # Update hourly# Generate GBM price series
synthetic_prices = vbt.GBMData.generate(
n_samples=252*2, # 2 years daily
start_price=100,
mu=0.08, # 8% annual drift
sigma=0.25, # 25% annual volatility
seed=42
)
# Monte Carlo simulation
n_simulations = 1000
simulations = []
for i in range(n_simulations):
sim = vbt.GBMData.generate(
n_samples=252,
start_price=100,
mu=0.05,
sigma=0.2,
seed=i
)
simulations.append(sim)
# Analyze distribution of outcomes
final_prices = [sim.iloc[-1] for sim in simulations]# Create unified data pipeline
class MultiSourceData:
def __init__(self):
self.sources = {
'stocks': vbt.YFData,
'crypto': vbt.BinanceData,
'futures': vbt.AlpacaData
}
def download_all(self, symbols_dict, **kwargs):
data = {}
for source, symbols in symbols_dict.items():
if source in self.sources:
data[source] = self.sources[source].download(symbols, **kwargs)
return data
# Usage
pipeline = MultiSourceData()
all_data = pipeline.download_all({
'stocks': ['AAPL', 'GOOGL'],
'crypto': ['BTCUSDT'],
'futures': ['ES']
})Install with Tessl CLI
npx tessl i tessl/pypi-vectorbt