CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-vectorbt

Python library for backtesting and analyzing trading strategies at scale

Pending
Overview
Eval results
Files

data-management.mddocs/

Data Sources & Management

Unified data acquisition and management system supporting multiple financial data providers with automatic synchronization, caching, and preprocessing capabilities. The data module provides consistent interfaces for accessing market data from various sources.

Capabilities

Yahoo Finance Data

Access to Yahoo Finance historical and real-time market data with automatic caching and data validation.

class YFData:
    """
    Yahoo Finance data provider with caching and update capabilities.
    
    Provides access to historical OHLCV data, dividends, stock splits,
    and basic fundamental data from Yahoo Finance.
    """
    
    @classmethod
    def download(cls, symbols, start=None, end=None, **kwargs):
        """
        Download historical data from Yahoo Finance.
        
        Parameters:
        - symbols: str or list, ticker symbols to download
        - start: str or datetime, start date (default: 1 year ago)
        - end: str or datetime, end date (default: today)  
        - period: str, period instead of start/end ('1d', '5d', '1mo', etc.)
        - interval: str, data interval ('1d', '1h', '5m', etc.)
        - auto_adjust: bool, adjust OHLC for splits/dividends (default: True)
        - prepost: bool, include pre/post market data (default: False)
        - threads: bool, use threading for multiple symbols (default: True)
        
        Returns:
        YFData: Data instance with downloaded data
        """
    
    def get(self, column=None):
        """
        Get data columns.
        
        Parameters:
        - column: str, column name ('Open', 'High', 'Low', 'Close', 'Volume')
        
        Returns:
        pd.DataFrame or pd.Series: Requested data
        """
    
    def update(self, **kwargs):
        """
        Update data with latest available data.
        
        Returns:
        YFData: Updated data instance
        """
    
    def save(self, path):
        """Save data to file."""
    
    @classmethod
    def load(cls, path):
        """Load data from file."""

Binance Data

Access to Binance cryptocurrency exchange data including spot and futures markets.

class BinanceData:
    """
    Binance exchange data provider for cryptocurrency markets.
    
    Supports spot and futures data with various intervals and 
    comprehensive symbol coverage.
    """
    
    @classmethod  
    def download(cls, symbols, start=None, end=None, **kwargs):
        """
        Download data from Binance.
        
        Parameters:
        - symbols: str or list, trading pairs (e.g., 'BTCUSDT')
        - start: str or datetime, start date
        - end: str or datetime, end date
        - interval: str, kline interval ('1m', '5m', '1h', '1d', etc.)
        - market: str, market type ('spot', 'futures')
        
        Returns:
        BinanceData: Data instance with downloaded data
        """
    
    def get(self, column=None):
        """Get data columns."""
    
    def update(self, **kwargs):
        """Update with latest data."""

CCXT Exchange Data

Universal cryptocurrency exchange data access through the CCXT library supporting 100+ exchanges.

class CCXTData:
    """
    Universal cryptocurrency exchange data via CCXT library.
    
    Provides unified access to data from 100+ cryptocurrency exchanges
    with consistent interface and automatic rate limiting.
    """
    
    @classmethod
    def download(cls, symbols, start=None, end=None, exchange='binance', **kwargs):
        """
        Download data from CCXT-supported exchange.
        
        Parameters:
        - symbols: str or list, trading pairs
        - start: str or datetime, start date
        - end: str or datetime, end date  
        - exchange: str, exchange name (e.g., 'binance', 'coinbase')
        - timeframe: str, timeframe ('1m', '5m', '1h', '1d', etc.)
        
        Returns:
        CCXTData: Data instance with exchange data
        """
    
    def get_exchanges(self):
        """Get list of supported exchanges."""
    
    def get_symbols(self, exchange):
        """Get available symbols for exchange."""

Alpaca Data

Access to Alpaca trading API for US equities and ETFs with commission-free trading integration.

class AlpacaData:
    """
    Alpaca trading API data provider.
    
    Provides access to US equity and ETF data with real-time and 
    historical data capabilities.
    """
    
    @classmethod
    def download(cls, symbols, start=None, end=None, **kwargs):
        """
        Download data from Alpaca.
        
        Parameters:
        - symbols: str or list, US equity symbols
        - start: str or datetime, start date
        - end: str or datetime, end date
        - timeframe: str, bar timeframe ('1Min', '5Min', '1Hour', '1Day')
        - api_key: str, Alpaca API key
        - secret_key: str, Alpaca secret key
        - paper: bool, use paper trading endpoint (default: True)
        
        Returns:
        AlpacaData: Data instance with Alpaca data
        """

Base Data Classes

Core data management functionality providing the foundation for all data sources.

class Data:
    """
    Base data management class.
    
    Provides common functionality for data storage, manipulation,
    and preprocessing across all data sources.
    """
    
    def __init__(self, data, **kwargs):
        """
        Initialize data instance.
        
        Parameters:
        - data: pd.DataFrame, market data
        - symbols: list, symbol names
        - wrapper: ArrayWrapper, data wrapper configuration
        """
    
    def get(self, column=None, **kwargs):
        """
        Get data columns with optional preprocessing.
        
        Parameters:
        - column: str or list, column names to retrieve
        
        Returns:
        pd.DataFrame or pd.Series: Requested data
        """
    
    def resample(self, freq, **kwargs):
        """
        Resample data to different frequency.
        
        Parameters:
        - freq: str, target frequency ('1H', '1D', '1W', etc.)
        
        Returns:
        Data: Resampled data instance
        """
    
    def dropna(self, **kwargs):
        """Remove missing values."""
    
    def fillna(self, method='ffill', **kwargs):
        """Fill missing values."""

class DataUpdater:
    """
    Data updating and synchronization utilities.
    
    Handles incremental data updates, cache management,
    and data validation across multiple sources.
    """
    
    def __init__(self, data_cls, **kwargs):
        """Initialize updater for specific data class."""
    
    def update(self, **kwargs):
        """Update data with latest available."""
    
    def schedule_update(self, freq, **kwargs):
        """Schedule automatic data updates."""

Synthetic Data Generation

Tools for generating synthetic market data for strategy testing and Monte Carlo simulations.

class SyntheticData:
    """
    Base class for synthetic data generation.
    
    Provides framework for creating artificial market data
    with specified statistical properties.
    """
    
    def generate(self, n_samples, **kwargs):
        """
        Generate synthetic data.
        
        Parameters:
        - n_samples: int, number of samples to generate
        
        Returns:
        pd.DataFrame: Generated synthetic data
        """

class GBMData:
    """
    Geometric Brownian Motion data generator.
    
    Generates synthetic price data following GBM process,
    commonly used for option pricing and Monte Carlo simulations.
    """
    
    @classmethod
    def generate(cls, n_samples, start_price=100, mu=0.05, sigma=0.2, **kwargs):
        """
        Generate GBM price series.
        
        Parameters:
        - n_samples: int, number of time steps
        - start_price: float, initial price
        - mu: float, drift rate (annualized)
        - sigma: float, volatility (annualized)
        - dt: float, time step (default: 1/252 for daily)
        - seed: int, random seed for reproducibility
        
        Returns:
        pd.Series: Generated price series
        """

Utility Functions

Helper functions for data processing and symbol management.

def symbol_dict(*args, **kwargs):
    """
    Create symbol dictionary for multi-symbol operations.
    
    Parameters:
    - args: symbol specifications
    - kwargs: symbol name mappings
    
    Returns:
    dict: Symbol mapping dictionary
    """

Usage Examples

Basic Data Download

import vectorbt as vbt

# Download single symbol
data = vbt.YFData.download("AAPL", start="2020-01-01", end="2023-01-01")
close = data.get("Close")

# Download multiple symbols
symbols = ["AAPL", "GOOGL", "MSFT"]  
data = vbt.YFData.download(symbols, period="2y")
close = data.get("Close")

# Access OHLCV data
ohlcv = data.get()  # All columns
volume = data.get("Volume")

Cryptocurrency Data

# Binance spot data
btc_data = vbt.BinanceData.download(
    "BTCUSDT", 
    start="2023-01-01",
    interval="1h"
)

# Multiple exchanges via CCXT
exchanges = ["binance", "coinbase", "kraken"]
btc_prices = {}

for exchange in exchanges:
    data = vbt.CCXTData.download(
        "BTC/USDT",
        start="2023-01-01", 
        exchange=exchange,
        timeframe="1d"
    )
    btc_prices[exchange] = data.get("Close")

Data Updates and Caching

# Initial download with caching
data = vbt.YFData.download("AAPL", start="2020-01-01")

# Update with latest data
updated_data = data.update()

# Save and load data
data.save("aapl_data.pkl")
loaded_data = vbt.YFData.load("aapl_data.pkl")

# Automatic updates
updater = vbt.DataUpdater(vbt.YFData, symbols="AAPL")
updater.schedule_update(freq="1H")  # Update hourly

Synthetic Data Generation

# Generate GBM price series
synthetic_prices = vbt.GBMData.generate(
    n_samples=252*2,  # 2 years daily
    start_price=100,
    mu=0.08,  # 8% annual drift
    sigma=0.25,  # 25% annual volatility
    seed=42
)

# Monte Carlo simulation
n_simulations = 1000
simulations = []

for i in range(n_simulations):
    sim = vbt.GBMData.generate(
        n_samples=252,
        start_price=100,
        mu=0.05,
        sigma=0.2,
        seed=i
    )
    simulations.append(sim)

# Analyze distribution of outcomes
final_prices = [sim.iloc[-1] for sim in simulations]

Multi-Source Data Pipeline

# Create unified data pipeline
class MultiSourceData:
    def __init__(self):
        self.sources = {
            'stocks': vbt.YFData,
            'crypto': vbt.BinanceData,
            'futures': vbt.AlpacaData
        }
    
    def download_all(self, symbols_dict, **kwargs):
        data = {}
        for source, symbols in symbols_dict.items():
            if source in self.sources:
                data[source] = self.sources[source].download(symbols, **kwargs)
        return data

# Usage
pipeline = MultiSourceData()
all_data = pipeline.download_all({
    'stocks': ['AAPL', 'GOOGL'],
    'crypto': ['BTCUSDT'],
    'futures': ['ES']
})

Install with Tessl CLI

npx tessl i tessl/pypi-vectorbt

docs

data-management.md

generic-analysis.md

index.md

indicators-signals.md

label-generation.md

portfolio-analysis.md

records-management.md

utilities-config.md

tile.json