Financial functions for Python providing performance analysis, risk metrics, portfolio optimization, and data retrieval for quantitative finance
—
Advanced statistical tools including clustering algorithms, bootstrap resampling, and momentum calculations for quantitative research. Provides sophisticated analytical capabilities for financial data exploration and strategy development.
Identify groups of assets with similar return patterns using machine learning clustering algorithms.
def calc_clusters(returns, n=None, plot=False):
"""
Perform K-means clustering on assets based on return correlations.
Parameters:
- returns (pd.DataFrame): Return series for multiple assets
- n (int): Number of clusters (default: None for automatic selection)
- plot (bool): Whether to generate cluster visualization plots (default: False)
Returns:
dict: Clustering results with cluster assignments and centroids
"""
def calc_ftca(returns, threshold=0.5):
"""
Fast Threshold Clustering Algorithm for asset grouping.
Parameters:
- returns (pd.DataFrame): Return series for multiple assets
- threshold (float): Correlation threshold for clustering (default: 0.5)
Returns:
dict: Cluster assignments and statistics
"""Calculate momentum-based signals and probabilistic momentum indicators.
def calc_prob_mom(returns, other_returns):
"""
Calculate probabilistic momentum comparing two return series.
Parameters:
- returns (pd.Series): Primary return series
- other_returns (pd.Series): Comparison return series
Returns:
float: Probabilistic momentum score
"""Statistical resampling techniques for robust metric estimation and confidence intervals.
def resample_returns(returns, func, seed=0, num_trials=100):
"""
Bootstrap resample returns and calculate statistics with confidence intervals.
Parameters:
- returns (pd.Series or pd.DataFrame): Return series to resample
- func (callable): Function to apply to each resampled dataset
- seed (int): Random seed for reproducibility (default: 0)
- num_trials (int): Number of bootstrap trials (default: 100)
Returns:
dict: Bootstrap results including mean, std, and confidence intervals
"""Apply functions over rolling windows for time-varying analysis.
def rollapply(data, window, fn):
"""
Apply function over rolling window.
Parameters:
- data (pd.Series or pd.DataFrame): Input data
- window (int): Rolling window size
- fn (callable): Function to apply to each window
Returns:
pd.Series or pd.DataFrame: Rolling function results
"""Data transformation and statistical processing functions.
def winsorize(x, axis=0, limits=0.01):
"""
Winsorize values to reduce impact of outliers.
Parameters:
- x (array-like): Input data
- axis (int): Axis along which to winsorize (default: 0)
- limits (float or tuple): Winsorization limits as fraction (default: 0.01)
Returns:
array-like: Winsorized data
"""
def rescale(x, min=0.0, max=1.0, axis=0):
"""
Rescale values to fit within specified range.
Parameters:
- x (array-like): Input data
- min (float): Minimum value for rescaling (default: 0.0)
- max (float): Maximum value for rescaling (default: 1.0)
- axis (int): Axis along which to rescale (default: 0)
Returns:
array-like: Rescaled data
"""Frequency analysis and period estimation utilities.
def infer_freq(data):
"""
Infer most likely frequency from time series index.
Parameters:
- data (pd.Series or pd.DataFrame): Time series data
Returns:
str: Inferred frequency string (e.g., 'D', 'M', 'Y')
"""
def infer_nperiods(data, annualization_factor=None):
"""
Infer number of periods for annualization based on data frequency.
Parameters:
- data (pd.Series or pd.DataFrame): Time series data
- annualization_factor (int): Override annualization factor (default: None)
Returns:
int: Number of periods for annualization
"""import ffn
import matplotlib.pyplot as plt
# Download sector ETF data
sector_etfs = ['XLK', 'XLF', 'XLE', 'XLV', 'XLI', 'XLP', 'XLY', 'XLU', 'XLB']
prices = ffn.get(sector_etfs, start='2020-01-01')
returns = ffn.to_returns(prices).dropna()
# Perform clustering analysis
clusters = ffn.calc_clusters(returns, n=3, plot=True)
print("Cluster Assignments:")
for cluster_id, assets in clusters['clusters'].items():
print(f"Cluster {cluster_id}: {assets}")
# Fast threshold clustering
ftca_results = ffn.calc_ftca(returns, threshold=0.6)
print(f"\nFTCA found {len(ftca_results)} clusters")
# Analyze cluster characteristics
for i, cluster in enumerate(clusters['clusters'].values()):
cluster_returns = returns[cluster].mean(axis=1)
cluster_vol = cluster_returns.std() * (252**0.5)
print(f"Cluster {i} Volatility: {cluster_vol:.3f}")import ffn
# Download market data
prices = ffn.get('SPY,QQQ,IWM', start='2020-01-01')
returns = ffn.to_returns(prices).dropna()
# Calculate momentum signals
spy_qqq_mom = ffn.calc_prob_mom(returns['SPY'], returns['QQQ'])
spy_iwm_mom = ffn.calc_prob_mom(returns['SPY'], returns['IWM'])
qqq_iwm_mom = ffn.calc_prob_mom(returns['QQQ'], returns['IWM'])
print(f"SPY vs QQQ Momentum: {spy_qqq_mom:.3f}")
print(f"SPY vs IWM Momentum: {spy_iwm_mom:.3f}")
print(f"QQQ vs IWM Momentum: {qqq_iwm_mom:.3f}")
# Rolling momentum analysis
window = 63 # Quarterly
rolling_mom = []
for i in range(window, len(returns)):
period_returns = returns.iloc[i-window:i]
mom_score = ffn.calc_prob_mom(period_returns['SPY'], period_returns['QQQ'])
rolling_mom.append(mom_score)
rolling_mom_series = pd.Series(rolling_mom, index=returns.index[window:])
rolling_mom_series.plot(title='Rolling SPY vs QQQ Momentum', figsize=(12, 6))
plt.axhline(y=0.5, color='r', linestyle='--', label='Neutral')
plt.legend()
plt.show()import ffn
import numpy as np
# Download and prepare data
prices = ffn.get('AAPL', start='2020-01-01')['AAPL']
returns = ffn.to_returns(prices).dropna()
# Bootstrap Sharpe ratio analysis
def calc_sharpe_wrapper(ret_series):
return ffn.calc_sharpe(ret_series, rf=0.02)
sharpe_bootstrap = ffn.resample_returns(returns, calc_sharpe_wrapper,
seed=42, num_trials=1000)
print("Bootstrap Sharpe Ratio Results:")
print(f"Mean: {sharpe_bootstrap['mean']:.3f}")
print(f"Std: {sharpe_bootstrap['std']:.3f}")
print(f"95% CI: [{sharpe_bootstrap['ci_lower']:.3f}, {sharpe_bootstrap['ci_upper']:.3f}]")
# Bootstrap maximum drawdown
def calc_max_dd_wrapper(ret_series):
price_series = ffn.to_price_index(ret_series)
return ffn.calc_max_drawdown(price_series)
dd_bootstrap = ffn.resample_returns(returns, calc_max_dd_wrapper,
seed=42, num_trials=1000)
print(f"\nBootstrap Max Drawdown Results:")
print(f"Mean: {dd_bootstrap['mean']:.3f}")
print(f"95% CI: [{dd_bootstrap['ci_lower']:.3f}, {dd_bootstrap['ci_upper']:.3f}]")import ffn
# Download data
prices = ffn.get('SPY,TLT', start='2015-01-01')
returns = ffn.to_returns(prices).dropna()
# Rolling correlation analysis
def rolling_corr(window_data):
return window_data.corr().iloc[0, 1]
rolling_corr_60d = ffn.rollapply(returns, window=60, fn=rolling_corr)
rolling_corr_252d = ffn.rollapply(returns, window=252, fn=rolling_corr)
# Plot rolling correlations
fig, ax = plt.subplots(figsize=(12, 6))
rolling_corr_60d.plot(label='60-Day Rolling Correlation', ax=ax)
rolling_corr_252d.plot(label='252-Day Rolling Correlation', ax=ax)
plt.title('SPY-TLT Rolling Correlation')
plt.ylabel('Correlation')
plt.legend()
plt.grid(True)
plt.show()
# Rolling Sharpe ratio
def rolling_sharpe(window_data):
return ffn.calc_sharpe(window_data['SPY'], rf=0.02)
rolling_sharpe_252d = ffn.rollapply(returns, window=252, fn=rolling_sharpe)
rolling_sharpe_252d.plot(title='SPY Rolling 1-Year Sharpe Ratio', figsize=(12, 6))
plt.ylabel('Sharpe Ratio')
plt.grid(True)
plt.show()import ffn
import numpy as np
# Download volatile asset data
prices = ffn.get('TSLA', start='2020-01-01')['TSLA']
returns = ffn.to_returns(prices).dropna()
print("Original Return Statistics:")
print(f"Mean: {returns.mean():.4f}")
print(f"Std: {returns.std():.4f}")
print(f"Skewness: {returns.skew():.3f}")
print(f"Min: {returns.min():.4f}")
print(f"Max: {returns.max():.4f}")
# Winsorize extreme returns
winsorized_returns = ffn.winsorize(returns, limits=0.05) # 5% winsorization
print(f"\nWinsorized Return Statistics:")
print(f"Mean: {winsorized_returns.mean():.4f}")
print(f"Std: {winsorized_returns.std():.4f}")
print(f"Skewness: {winsorized_returns.skew():.3f}")
print(f"Min: {winsorized_returns.min():.4f}")
print(f"Max: {winsorized_returns.max():.4f}")
# Rescale returns to [-1, 1] range
rescaled_returns = ffn.rescale(returns, min=-1, max=1)
print(f"\nRescaled Return Range:")
print(f"Min: {rescaled_returns.min():.4f}")
print(f"Max: {rescaled_returns.max():.4f}")
# Compare performance metrics
original_sharpe = ffn.calc_sharpe(returns, rf=0.02)
winsorized_sharpe = ffn.calc_sharpe(winsorized_returns, rf=0.02)
print(f"\nSharpe Ratio Comparison:")
print(f"Original: {original_sharpe:.3f}")
print(f"Winsorized: {winsorized_sharpe:.3f}")import ffn
import pandas as pd
def comprehensive_analysis(tickers, start_date='2020-01-01'):
"""Comprehensive statistical analysis pipeline."""
# Download and prepare data
prices = ffn.get(tickers, start=start_date)
returns = ffn.to_returns(prices).dropna()
results = {}
# 1. Clustering analysis
if len(returns.columns) > 2:
clusters = ffn.calc_clusters(returns, n=min(3, len(returns.columns)//2))
results['clusters'] = clusters
# 2. Bootstrap statistics for each asset
bootstrap_results = {}
for asset in returns.columns:
asset_returns = returns[asset]
# Bootstrap Sharpe
sharpe_boot = ffn.resample_returns(asset_returns,
lambda x: ffn.calc_sharpe(x, rf=0.02),
num_trials=500)
bootstrap_results[asset] = sharpe_boot
results['bootstrap'] = bootstrap_results
# 3. Rolling correlations (if multiple assets)
if len(returns.columns) > 1:
rolling_corrs = {}
for i, asset1 in enumerate(returns.columns):
for asset2 in returns.columns[i+1:]:
pair_returns = returns[[asset1, asset2]]
rolling_corr = ffn.rollapply(pair_returns, 60,
lambda x: x.corr().iloc[0,1])
rolling_corrs[f"{asset1}_{asset2}"] = rolling_corr
results['rolling_correlations'] = rolling_corrs
# 4. Frequency analysis
freq_info = {
'inferred_frequency': ffn.infer_freq(returns),
'nperiods': ffn.infer_nperiods(returns)
}
results['frequency_analysis'] = freq_info
return results
# Run comprehensive analysis
analysis_results = comprehensive_analysis(['AAPL', 'MSFT', 'GOOGL', 'AMZN'])
print("Comprehensive Analysis Results:")
print(f"Clusters found: {len(analysis_results.get('clusters', {}).get('clusters', {}))}")
print(f"Bootstrap samples: {len(list(analysis_results['bootstrap'].values())[0]['samples'])}")
print(f"Rolling correlations tracked: {len(analysis_results.get('rolling_correlations', {}))}")
print(f"Data frequency: {analysis_results['frequency_analysis']['inferred_frequency']}")Install with Tessl CLI
npx tessl i tessl/pypi-ffn