CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-ffn

Financial functions for Python providing performance analysis, risk metrics, portfolio optimization, and data retrieval for quantitative finance

Pending
Overview
Eval results
Files

statistical-analysis.mddocs/

Statistical Analysis

Advanced statistical tools including clustering algorithms, bootstrap resampling, and momentum calculations for quantitative research. Provides sophisticated analytical capabilities for financial data exploration and strategy development.

Capabilities

Clustering Analysis

Identify groups of assets with similar return patterns using machine learning clustering algorithms.

def calc_clusters(returns, n=None, plot=False):
    """
    Perform K-means clustering on assets based on return correlations.

    Parameters:
    - returns (pd.DataFrame): Return series for multiple assets
    - n (int): Number of clusters (default: None for automatic selection)
    - plot (bool): Whether to generate cluster visualization plots (default: False)

    Returns:
    dict: Clustering results with cluster assignments and centroids
    """

def calc_ftca(returns, threshold=0.5):
    """
    Fast Threshold Clustering Algorithm for asset grouping.

    Parameters:
    - returns (pd.DataFrame): Return series for multiple assets  
    - threshold (float): Correlation threshold for clustering (default: 0.5)

    Returns:
    dict: Cluster assignments and statistics
    """

Momentum Analysis

Calculate momentum-based signals and probabilistic momentum indicators.

def calc_prob_mom(returns, other_returns):
    """
    Calculate probabilistic momentum comparing two return series.

    Parameters:
    - returns (pd.Series): Primary return series
    - other_returns (pd.Series): Comparison return series

    Returns:
    float: Probabilistic momentum score
    """

Bootstrap Resampling

Statistical resampling techniques for robust metric estimation and confidence intervals.

def resample_returns(returns, func, seed=0, num_trials=100):
    """
    Bootstrap resample returns and calculate statistics with confidence intervals.

    Parameters:
    - returns (pd.Series or pd.DataFrame): Return series to resample
    - func (callable): Function to apply to each resampled dataset
    - seed (int): Random seed for reproducibility (default: 0)
    - num_trials (int): Number of bootstrap trials (default: 100)

    Returns:
    dict: Bootstrap results including mean, std, and confidence intervals
    """

Rolling Analysis

Apply functions over rolling windows for time-varying analysis.

def rollapply(data, window, fn):
    """
    Apply function over rolling window.

    Parameters:
    - data (pd.Series or pd.DataFrame): Input data
    - window (int): Rolling window size
    - fn (callable): Function to apply to each window

    Returns:
    pd.Series or pd.DataFrame: Rolling function results
    """

Statistical Utilities

Data transformation and statistical processing functions.

def winsorize(x, axis=0, limits=0.01):
    """
    Winsorize values to reduce impact of outliers.

    Parameters:
    - x (array-like): Input data
    - axis (int): Axis along which to winsorize (default: 0)
    - limits (float or tuple): Winsorization limits as fraction (default: 0.01)

    Returns:
    array-like: Winsorized data
    """

def rescale(x, min=0.0, max=1.0, axis=0):
    """
    Rescale values to fit within specified range.

    Parameters:
    - x (array-like): Input data
    - min (float): Minimum value for rescaling (default: 0.0)
    - max (float): Maximum value for rescaling (default: 1.0)
    - axis (int): Axis along which to rescale (default: 0)

    Returns:
    array-like: Rescaled data
    """

Time Series Analysis

Frequency analysis and period estimation utilities.

def infer_freq(data):
    """
    Infer most likely frequency from time series index.

    Parameters:
    - data (pd.Series or pd.DataFrame): Time series data

    Returns:
    str: Inferred frequency string (e.g., 'D', 'M', 'Y')
    """

def infer_nperiods(data, annualization_factor=None):
    """
    Infer number of periods for annualization based on data frequency.

    Parameters:
    - data (pd.Series or pd.DataFrame): Time series data
    - annualization_factor (int): Override annualization factor (default: None)

    Returns:
    int: Number of periods for annualization
    """

Usage Examples

Asset Clustering Analysis

import ffn
import matplotlib.pyplot as plt

# Download sector ETF data
sector_etfs = ['XLK', 'XLF', 'XLE', 'XLV', 'XLI', 'XLP', 'XLY', 'XLU', 'XLB']
prices = ffn.get(sector_etfs, start='2020-01-01')
returns = ffn.to_returns(prices).dropna()

# Perform clustering analysis
clusters = ffn.calc_clusters(returns, n=3, plot=True)
print("Cluster Assignments:")
for cluster_id, assets in clusters['clusters'].items():
    print(f"Cluster {cluster_id}: {assets}")

# Fast threshold clustering
ftca_results = ffn.calc_ftca(returns, threshold=0.6)
print(f"\nFTCA found {len(ftca_results)} clusters")

# Analyze cluster characteristics
for i, cluster in enumerate(clusters['clusters'].values()):
    cluster_returns = returns[cluster].mean(axis=1)
    cluster_vol = cluster_returns.std() * (252**0.5)
    print(f"Cluster {i} Volatility: {cluster_vol:.3f}")

Momentum Analysis

import ffn

# Download market data
prices = ffn.get('SPY,QQQ,IWM', start='2020-01-01')
returns = ffn.to_returns(prices).dropna()

# Calculate momentum signals
spy_qqq_mom = ffn.calc_prob_mom(returns['SPY'], returns['QQQ'])
spy_iwm_mom = ffn.calc_prob_mom(returns['SPY'], returns['IWM'])
qqq_iwm_mom = ffn.calc_prob_mom(returns['QQQ'], returns['IWM'])

print(f"SPY vs QQQ Momentum: {spy_qqq_mom:.3f}")
print(f"SPY vs IWM Momentum: {spy_iwm_mom:.3f}")
print(f"QQQ vs IWM Momentum: {qqq_iwm_mom:.3f}")

# Rolling momentum analysis
window = 63  # Quarterly
rolling_mom = []
for i in range(window, len(returns)):
    period_returns = returns.iloc[i-window:i]
    mom_score = ffn.calc_prob_mom(period_returns['SPY'], period_returns['QQQ'])
    rolling_mom.append(mom_score)

rolling_mom_series = pd.Series(rolling_mom, index=returns.index[window:])
rolling_mom_series.plot(title='Rolling SPY vs QQQ Momentum', figsize=(12, 6))
plt.axhline(y=0.5, color='r', linestyle='--', label='Neutral')
plt.legend()
plt.show()

Bootstrap Analysis

import ffn
import numpy as np

# Download and prepare data
prices = ffn.get('AAPL', start='2020-01-01')['AAPL']
returns = ffn.to_returns(prices).dropna()

# Bootstrap Sharpe ratio analysis
def calc_sharpe_wrapper(ret_series):
    return ffn.calc_sharpe(ret_series, rf=0.02)

sharpe_bootstrap = ffn.resample_returns(returns, calc_sharpe_wrapper, 
                                       seed=42, num_trials=1000)

print("Bootstrap Sharpe Ratio Results:")
print(f"Mean: {sharpe_bootstrap['mean']:.3f}")
print(f"Std: {sharpe_bootstrap['std']:.3f}")
print(f"95% CI: [{sharpe_bootstrap['ci_lower']:.3f}, {sharpe_bootstrap['ci_upper']:.3f}]")

# Bootstrap maximum drawdown
def calc_max_dd_wrapper(ret_series):
    price_series = ffn.to_price_index(ret_series)
    return ffn.calc_max_drawdown(price_series)

dd_bootstrap = ffn.resample_returns(returns, calc_max_dd_wrapper,
                                   seed=42, num_trials=1000)

print(f"\nBootstrap Max Drawdown Results:")
print(f"Mean: {dd_bootstrap['mean']:.3f}")
print(f"95% CI: [{dd_bootstrap['ci_lower']:.3f}, {dd_bootstrap['ci_upper']:.3f}]")

Rolling Window Analysis

import ffn

# Download data
prices = ffn.get('SPY,TLT', start='2015-01-01')
returns = ffn.to_returns(prices).dropna()

# Rolling correlation analysis
def rolling_corr(window_data):
    return window_data.corr().iloc[0, 1]

rolling_corr_60d = ffn.rollapply(returns, window=60, fn=rolling_corr)
rolling_corr_252d = ffn.rollapply(returns, window=252, fn=rolling_corr)

# Plot rolling correlations
fig, ax = plt.subplots(figsize=(12, 6))
rolling_corr_60d.plot(label='60-Day Rolling Correlation', ax=ax)
rolling_corr_252d.plot(label='252-Day Rolling Correlation', ax=ax)
plt.title('SPY-TLT Rolling Correlation')
plt.ylabel('Correlation')
plt.legend()
plt.grid(True)
plt.show()

# Rolling Sharpe ratio
def rolling_sharpe(window_data):
    return ffn.calc_sharpe(window_data['SPY'], rf=0.02)

rolling_sharpe_252d = ffn.rollapply(returns, window=252, fn=rolling_sharpe)
rolling_sharpe_252d.plot(title='SPY Rolling 1-Year Sharpe Ratio', figsize=(12, 6))
plt.ylabel('Sharpe Ratio')
plt.grid(True)
plt.show()

Data Preprocessing and Outlier Treatment

import ffn
import numpy as np

# Download volatile asset data
prices = ffn.get('TSLA', start='2020-01-01')['TSLA']
returns = ffn.to_returns(prices).dropna()

print("Original Return Statistics:")
print(f"Mean: {returns.mean():.4f}")
print(f"Std: {returns.std():.4f}")
print(f"Skewness: {returns.skew():.3f}")
print(f"Min: {returns.min():.4f}")
print(f"Max: {returns.max():.4f}")

# Winsorize extreme returns
winsorized_returns = ffn.winsorize(returns, limits=0.05)  # 5% winsorization

print(f"\nWinsorized Return Statistics:")
print(f"Mean: {winsorized_returns.mean():.4f}")
print(f"Std: {winsorized_returns.std():.4f}")
print(f"Skewness: {winsorized_returns.skew():.3f}")
print(f"Min: {winsorized_returns.min():.4f}")
print(f"Max: {winsorized_returns.max():.4f}")

# Rescale returns to [-1, 1] range
rescaled_returns = ffn.rescale(returns, min=-1, max=1)

print(f"\nRescaled Return Range:")
print(f"Min: {rescaled_returns.min():.4f}")
print(f"Max: {rescaled_returns.max():.4f}")

# Compare performance metrics
original_sharpe = ffn.calc_sharpe(returns, rf=0.02)
winsorized_sharpe = ffn.calc_sharpe(winsorized_returns, rf=0.02)

print(f"\nSharpe Ratio Comparison:")
print(f"Original: {original_sharpe:.3f}")
print(f"Winsorized: {winsorized_sharpe:.3f}")

Advanced Statistical Analysis Pipeline

import ffn
import pandas as pd

def comprehensive_analysis(tickers, start_date='2020-01-01'):
    """Comprehensive statistical analysis pipeline."""
    
    # Download and prepare data
    prices = ffn.get(tickers, start=start_date)
    returns = ffn.to_returns(prices).dropna()
    
    results = {}
    
    # 1. Clustering analysis
    if len(returns.columns) > 2:
        clusters = ffn.calc_clusters(returns, n=min(3, len(returns.columns)//2))
        results['clusters'] = clusters
    
    # 2. Bootstrap statistics for each asset
    bootstrap_results = {}
    for asset in returns.columns:
        asset_returns = returns[asset]
        
        # Bootstrap Sharpe
        sharpe_boot = ffn.resample_returns(asset_returns, 
                                         lambda x: ffn.calc_sharpe(x, rf=0.02),
                                         num_trials=500)
        bootstrap_results[asset] = sharpe_boot
    
    results['bootstrap'] = bootstrap_results
    
    # 3. Rolling correlations (if multiple assets)
    if len(returns.columns) > 1:
        rolling_corrs = {}
        for i, asset1 in enumerate(returns.columns):
            for asset2 in returns.columns[i+1:]:
                pair_returns = returns[[asset1, asset2]]
                rolling_corr = ffn.rollapply(pair_returns, 60, 
                                           lambda x: x.corr().iloc[0,1])
                rolling_corrs[f"{asset1}_{asset2}"] = rolling_corr
        
        results['rolling_correlations'] = rolling_corrs
    
    # 4. Frequency analysis
    freq_info = {
        'inferred_frequency': ffn.infer_freq(returns),
        'nperiods': ffn.infer_nperiods(returns)
    }
    results['frequency_analysis'] = freq_info
    
    return results

# Run comprehensive analysis
analysis_results = comprehensive_analysis(['AAPL', 'MSFT', 'GOOGL', 'AMZN'])

print("Comprehensive Analysis Results:")
print(f"Clusters found: {len(analysis_results.get('clusters', {}).get('clusters', {}))}")
print(f"Bootstrap samples: {len(list(analysis_results['bootstrap'].values())[0]['samples'])}")
print(f"Rolling correlations tracked: {len(analysis_results.get('rolling_correlations', {}))}")
print(f"Data frequency: {analysis_results['frequency_analysis']['inferred_frequency']}")

Install with Tessl CLI

npx tessl i tessl/pypi-ffn

docs

data-retrieval.md

data-utilities.md

index.md

pandas-extensions.md

performance-analysis.md

portfolio-optimization.md

return-calculations.md

risk-metrics.md

statistical-analysis.md

tile.json