tessl/pypi-yfinance

Download market data from Yahoo! Finance API

Overview

Eval results

Files

Bulk Data Operations

Name: tessl/pypi-yfinance
Author: tessl

Efficient downloading and management of multiple financial instruments with threading support, various data formatting options, and bulk operations for portfolio analysis and multi-symbol research.

Capabilities

Multi-Symbol Download Function

Download historical data for multiple tickers simultaneously with threading support and flexible formatting options.

def download(tickers, start: Union[str, datetime] = None, end: Union[str, datetime] = None,
            actions: bool = False, threads: Union[bool, int] = True, 
            ignore_tz: bool = None, group_by: str = 'column', 
            auto_adjust: bool = None, back_adjust: bool = False,
            repair: bool = False, keepna: bool = False, progress: bool = True,
            period: str = None, interval: str = "1d", prepost: bool = False,
            rounding: bool = False, timeout: int = 10, session = None,
            multi_level_index: bool = True) -> Union[pd.DataFrame, None]:
    """
    Download historical data for multiple tickers.
    
    Parameters:
    - tickers: str or list, ticker symbols to download (space/comma separated string or list)
    - start: str/datetime, start date in YYYY-MM-DD format
    - end: str/datetime, end date in YYYY-MM-DD format  
    - period: str, period to download ("1d", "5d", "1mo", "3mo", "6mo", "1y", "2y", "5y", "10y", "ytd", "max")
    - interval: str, data interval ("1m", "2m", "5m", "15m", "30m", "60m", "90m", "1h", "1d", "5d", "1wk", "1mo", "3mo")
    - actions: bool, download dividend and stock split data
    - threads: bool/int, enable multithreading (True/False or number of threads)
    - group_by: str, group data by 'ticker' or 'column'
    - auto_adjust: bool, adjust OHLC prices for splits and dividends
    - back_adjust: bool, back-adjust prices instead of forward-adjust
    - repair: bool, detect and repair bad data
    - keepna: bool, keep NaN values in output
    - progress: bool, show download progress bar
    - prepost: bool, include pre and post market data
    - rounding: bool, round values to 2 decimal places
    - timeout: int, timeout for requests in seconds
    - multi_level_index: bool, use multi-level column index
    
    Returns:
    pd.DataFrame with historical data for all tickers
    """

Usage Examples

import yfinance as yf

# Download multiple stocks
data = yf.download(["AAPL", "GOOGL", "MSFT"], period="1mo")

# Download with specific date range
data = yf.download("AAPL GOOGL MSFT", start="2023-01-01", end="2023-12-31")

# Download with custom formatting
data = yf.download(["AAPL", "GOOGL"], period="6mo", 
                  group_by='ticker', threads=4, progress=True)

# Download intraday data
data = yf.download(["SPY", "QQQ"], period="5d", interval="5m")

# Download with actions (dividends/splits)
data = yf.download(["AAPL", "MSFT"], period="1y", actions=True)

Data Structure Examples

Column-grouped data (default: group_by='column'):

Close                    High                     Low                    Open                  Volume              
symbol                AAPL       GOOGL       AAPL       GOOGL       AAPL       GOOGL       AAPL       GOOGL        AAPL      GOOGL
date                                                                                                                                    
2023-01-03      125.07     88.59      125.42     89.19      124.76     88.12      125.20     88.30    112117500   23097900

Ticker-grouped data (group_by='ticker'):

Returns dictionary with ticker symbols as keys
Each value is a DataFrame with OHLCV columns

Multi-Ticker Management Class

Manage multiple tickers with shared operations and bulk data access.

class Tickers:
    def __init__(self, tickers, session=None):
        """
        Create a Tickers object for managing multiple ticker symbols.
        
        Parameters:
        - tickers: str or list, ticker symbols (space/comma separated or list)
        - session: requests.Session, optional session for HTTP requests
        """
    
    def history(self, period: str = "1mo", interval: str = "1d",
               start: Union[str, datetime] = None, end: Union[str, datetime] = None,
               prepost: bool = False, actions: bool = True, auto_adjust: bool = True,
               repair: bool = False, threads: Union[bool, int] = True,
               group_by: str = 'column', progress: bool = True, 
               timeout: int = 10, **kwargs) -> pd.DataFrame:
        """
        Download historical data for all tickers.
        
        Returns:
        pd.DataFrame with historical data formatted according to group_by parameter
        """
    
    def download(self, **kwargs) -> pd.DataFrame:
        """
        Alias for history() method with same parameters.
        """
    
    def news(self) -> dict:
        """
        Get news for all tickers.
        
        Returns:
        dict with ticker symbols as keys and news lists as values
        """
    
    def live(self, message_handler: Callable = None, verbose: bool = True):
        """
        Start real-time data streaming for all tickers.
        
        Parameters:
        - message_handler: function to handle incoming messages
        - verbose: bool, enable verbose logging
        """
    
    # Properties
    symbols: list  # List of ticker symbols
    tickers: dict  # Dictionary mapping symbols to Ticker objects

Usage Examples

# Create Tickers object
portfolio = yf.Tickers("AAPL GOOGL MSFT AMZN")

# Or with a list
portfolio = yf.Tickers(["AAPL", "GOOGL", "MSFT", "AMZN"])

# Download historical data
data = portfolio.history(period="1y")

# Access individual ticker objects
apple = portfolio.tickers['AAPL']
apple_info = apple.info

# Get news for all tickers
all_news = portfolio.news()
apple_news = all_news['AAPL']

# Start live streaming
def handle_updates(msg):
    print(f"Portfolio update: {msg}")

portfolio.live(message_handler=handle_updates)

Threading and Performance

Control threading behavior for optimal performance based on your use case.

Threading Options

# Disable threading (sequential downloads)
data = yf.download(["AAPL", "GOOGL", "MSFT"], threads=False)

# Enable threading with default thread count
data = yf.download(["AAPL", "GOOGL", "MSFT"], threads=True)

# Specify exact number of threads
data = yf.download(["AAPL", "GOOGL", "MSFT"], threads=8)

# Control progress display
data = yf.download(tickers, progress=True)  # Show progress bar
data = yf.download(tickers, progress=False)  # Silent download

Data Formatting and Processing

Control how multi-ticker data is structured and processed.

Grouping Options

# Column-based grouping (default)
data = yf.download(["AAPL", "GOOGL"], group_by='column')
# Access: data['Close']['AAPL'], data['Volume']['GOOGL']

# Ticker-based grouping  
data = yf.download(["AAPL", "GOOGL"], group_by='ticker')
# Access: data['AAPL']['Close'], data['GOOGL']['Volume']

Data Processing Options

# Handle missing data
data = yf.download(tickers, keepna=True)  # Keep NaN values
data = yf.download(tickers, keepna=False)  # Drop NaN values

# Data repair and adjustment
data = yf.download(tickers, repair=True, auto_adjust=True)

# Rounding for cleaner output
data = yf.download(tickers, rounding=True)  # Round to 2 decimal places

Error Handling and Reliability

Handle common issues when downloading multiple tickers.

Timeout and Session Management

# Custom timeout for slow connections
data = yf.download(tickers, timeout=30)

# Use custom session for connection pooling
import requests
session = requests.Session()
data = yf.download(tickers, session=session)

Missing Data Handling

# Check for missing tickers
tickers = ["AAPL", "INVALID_TICKER", "GOOGL"]
data = yf.download(tickers, period="1mo")

# Identify which tickers have data
available_tickers = data.columns.get_level_values(1).unique()
missing_tickers = set(tickers) - set(available_tickers)

print(f"Available: {list(available_tickers)}")
print(f"Missing: {list(missing_tickers)}")

Portfolio Analysis Patterns

Common patterns for portfolio and multi-asset analysis.

Returns Calculation

# Download portfolio data
portfolio = ["AAPL", "GOOGL", "MSFT", "AMZN"]
data = yf.download(portfolio, period="1y")

# Calculate daily returns
prices = data['Close']
returns = prices.pct_change().dropna()

# Calculate cumulative returns
cumulative_returns = (1 + returns).cumprod()

# Portfolio metrics
correlation_matrix = returns.corr()
volatility = returns.std() * np.sqrt(252)  # Annualized volatility

Comparative Analysis

# Normalize prices for comparison
normalized_prices = prices / prices.iloc[0]

# Rolling correlations
rolling_corr = returns.rolling(window=30).corr()

# Relative performance
benchmark = yf.download("SPY", period="1y")['Close']
relative_performance = prices.div(benchmark, axis=0)

Large Dataset Considerations

Best practices for handling many tickers or long time periods.

Batch Processing

# Process large ticker lists in batches
def download_in_batches(tickers, batch_size=20, **kwargs):
    all_data = []
    for i in range(0, len(tickers), batch_size):
        batch = tickers[i:i + batch_size]
        batch_data = yf.download(batch, **kwargs)
        all_data.append(batch_data)
    return pd.concat(all_data, axis=1)

# Usage
large_ticker_list = ["AAPL", "GOOGL", ...] # 100+ tickers
data = download_in_batches(large_ticker_list, batch_size=25, period="1y")

Memory Management

# For very large datasets, consider processing in chunks
def process_large_dataset(tickers, start_date, end_date, chunk_months=6):
    date_ranges = pd.date_range(start_date, end_date, freq=f'{chunk_months}M')
    
    results = []
    for i in range(len(date_ranges) - 1):
        chunk_start = date_ranges[i]
        chunk_end = date_ranges[i + 1]
        
        chunk_data = yf.download(tickers, start=chunk_start, end=chunk_end)
        # Process chunk_data as needed
        results.append(chunk_data)
    
    return pd.concat(results)

Common Use Cases

Market Index Components

# Download S&P 500 components (example subset)
sp500_sample = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "META", "NVDA", "JPM", "JNJ", "V"]
index_data = yf.download(sp500_sample, period="1y", group_by='ticker')

# Calculate index-like performance
equal_weight_returns = sum(ticker_data['Close'].pct_change() 
                          for ticker_data in index_data.values()) / len(sp500_sample)

Sector Analysis

# Technology sector stocks
tech_stocks = ["AAPL", "MSFT", "GOOGL", "AMZN", "META", "NVDA", "ORCL", "CRM", "ADBE", "INTC"]
tech_data = yf.download(tech_stocks, period="1y")

# Sector performance metrics
sector_prices = tech_data['Close']
sector_returns = sector_prices.pct_change()
sector_volatility = sector_returns.std()
sector_correlation = sector_returns.corr()

International Markets

# Global indices
global_indices = ["^GSPC", "^IXIC", "^DJI", "^FTSE", "^N225", "^HSI", "000001.SS"]
global_data = yf.download(global_indices, period="1y")

# Currency pairs for international analysis
currencies = ["EURUSD=X", "GBPUSD=X", "JPYUSD=X", "AUDUSD=X"]
fx_data = yf.download(currencies, period="6mo")

Install with Tessl CLI