0
# Bulk Data Operations
1
2
Efficient downloading and management of multiple financial instruments with threading support, various data formatting options, and bulk operations for portfolio analysis and multi-symbol research.
3
4
## Capabilities
5
6
### Multi-Symbol Download Function
7
8
Download historical data for multiple tickers simultaneously with threading support and flexible formatting options.
9
10
```python { .api }
11
def download(tickers, start: Union[str, datetime] = None, end: Union[str, datetime] = None,
12
actions: bool = False, threads: Union[bool, int] = True,
13
ignore_tz: bool = None, group_by: str = 'column',
14
auto_adjust: bool = None, back_adjust: bool = False,
15
repair: bool = False, keepna: bool = False, progress: bool = True,
16
period: str = None, interval: str = "1d", prepost: bool = False,
17
rounding: bool = False, timeout: int = 10, session = None,
18
multi_level_index: bool = True) -> Union[pd.DataFrame, None]:
19
"""
20
Download historical data for multiple tickers.
21
22
Parameters:
23
- tickers: str or list, ticker symbols to download (space/comma separated string or list)
24
- start: str/datetime, start date in YYYY-MM-DD format
25
- end: str/datetime, end date in YYYY-MM-DD format
26
- period: str, period to download ("1d", "5d", "1mo", "3mo", "6mo", "1y", "2y", "5y", "10y", "ytd", "max")
27
- interval: str, data interval ("1m", "2m", "5m", "15m", "30m", "60m", "90m", "1h", "1d", "5d", "1wk", "1mo", "3mo")
28
- actions: bool, download dividend and stock split data
29
- threads: bool/int, enable multithreading (True/False or number of threads)
30
- group_by: str, group data by 'ticker' or 'column'
31
- auto_adjust: bool, adjust OHLC prices for splits and dividends
32
- back_adjust: bool, back-adjust prices instead of forward-adjust
33
- repair: bool, detect and repair bad data
34
- keepna: bool, keep NaN values in output
35
- progress: bool, show download progress bar
36
- prepost: bool, include pre and post market data
37
- rounding: bool, round values to 2 decimal places
38
- timeout: int, timeout for requests in seconds
39
- multi_level_index: bool, use multi-level column index
40
41
Returns:
42
pd.DataFrame with historical data for all tickers
43
"""
44
```
45
46
#### Usage Examples
47
48
```python
49
import yfinance as yf
50
51
# Download multiple stocks
52
data = yf.download(["AAPL", "GOOGL", "MSFT"], period="1mo")
53
54
# Download with specific date range
55
data = yf.download("AAPL GOOGL MSFT", start="2023-01-01", end="2023-12-31")
56
57
# Download with custom formatting
58
data = yf.download(["AAPL", "GOOGL"], period="6mo",
59
group_by='ticker', threads=4, progress=True)
60
61
# Download intraday data
62
data = yf.download(["SPY", "QQQ"], period="5d", interval="5m")
63
64
# Download with actions (dividends/splits)
65
data = yf.download(["AAPL", "MSFT"], period="1y", actions=True)
66
```
67
68
#### Data Structure Examples
69
70
**Column-grouped data** (default: `group_by='column'`):
71
```
72
Close High Low Open Volume
73
symbol AAPL GOOGL AAPL GOOGL AAPL GOOGL AAPL GOOGL AAPL GOOGL
74
date
75
2023-01-03 125.07 88.59 125.42 89.19 124.76 88.12 125.20 88.30 112117500 23097900
76
```
77
78
**Ticker-grouped data** (`group_by='ticker'`):
79
- Returns dictionary with ticker symbols as keys
80
- Each value is a DataFrame with OHLCV columns
81
82
### Multi-Ticker Management Class
83
84
Manage multiple tickers with shared operations and bulk data access.
85
86
```python { .api }
87
class Tickers:
88
def __init__(self, tickers, session=None):
89
"""
90
Create a Tickers object for managing multiple ticker symbols.
91
92
Parameters:
93
- tickers: str or list, ticker symbols (space/comma separated or list)
94
- session: requests.Session, optional session for HTTP requests
95
"""
96
97
def history(self, period: str = "1mo", interval: str = "1d",
98
start: Union[str, datetime] = None, end: Union[str, datetime] = None,
99
prepost: bool = False, actions: bool = True, auto_adjust: bool = True,
100
repair: bool = False, threads: Union[bool, int] = True,
101
group_by: str = 'column', progress: bool = True,
102
timeout: int = 10, **kwargs) -> pd.DataFrame:
103
"""
104
Download historical data for all tickers.
105
106
Returns:
107
pd.DataFrame with historical data formatted according to group_by parameter
108
"""
109
110
def download(self, **kwargs) -> pd.DataFrame:
111
"""
112
Alias for history() method with same parameters.
113
"""
114
115
def news(self) -> dict:
116
"""
117
Get news for all tickers.
118
119
Returns:
120
dict with ticker symbols as keys and news lists as values
121
"""
122
123
def live(self, message_handler: Callable = None, verbose: bool = True):
124
"""
125
Start real-time data streaming for all tickers.
126
127
Parameters:
128
- message_handler: function to handle incoming messages
129
- verbose: bool, enable verbose logging
130
"""
131
132
# Properties
133
symbols: list # List of ticker symbols
134
tickers: dict # Dictionary mapping symbols to Ticker objects
135
```
136
137
#### Usage Examples
138
139
```python
140
# Create Tickers object
141
portfolio = yf.Tickers("AAPL GOOGL MSFT AMZN")
142
143
# Or with a list
144
portfolio = yf.Tickers(["AAPL", "GOOGL", "MSFT", "AMZN"])
145
146
# Download historical data
147
data = portfolio.history(period="1y")
148
149
# Access individual ticker objects
150
apple = portfolio.tickers['AAPL']
151
apple_info = apple.info
152
153
# Get news for all tickers
154
all_news = portfolio.news()
155
apple_news = all_news['AAPL']
156
157
# Start live streaming
158
def handle_updates(msg):
159
print(f"Portfolio update: {msg}")
160
161
portfolio.live(message_handler=handle_updates)
162
```
163
164
### Threading and Performance
165
166
Control threading behavior for optimal performance based on your use case.
167
168
#### Threading Options
169
170
```python
171
# Disable threading (sequential downloads)
172
data = yf.download(["AAPL", "GOOGL", "MSFT"], threads=False)
173
174
# Enable threading with default thread count
175
data = yf.download(["AAPL", "GOOGL", "MSFT"], threads=True)
176
177
# Specify exact number of threads
178
data = yf.download(["AAPL", "GOOGL", "MSFT"], threads=8)
179
180
# Control progress display
181
data = yf.download(tickers, progress=True) # Show progress bar
182
data = yf.download(tickers, progress=False) # Silent download
183
```
184
185
### Data Formatting and Processing
186
187
Control how multi-ticker data is structured and processed.
188
189
#### Grouping Options
190
191
```python
192
# Column-based grouping (default)
193
data = yf.download(["AAPL", "GOOGL"], group_by='column')
194
# Access: data['Close']['AAPL'], data['Volume']['GOOGL']
195
196
# Ticker-based grouping
197
data = yf.download(["AAPL", "GOOGL"], group_by='ticker')
198
# Access: data['AAPL']['Close'], data['GOOGL']['Volume']
199
```
200
201
#### Data Processing Options
202
203
```python
204
# Handle missing data
205
data = yf.download(tickers, keepna=True) # Keep NaN values
206
data = yf.download(tickers, keepna=False) # Drop NaN values
207
208
# Data repair and adjustment
209
data = yf.download(tickers, repair=True, auto_adjust=True)
210
211
# Rounding for cleaner output
212
data = yf.download(tickers, rounding=True) # Round to 2 decimal places
213
```
214
215
### Error Handling and Reliability
216
217
Handle common issues when downloading multiple tickers.
218
219
#### Timeout and Session Management
220
221
```python
222
# Custom timeout for slow connections
223
data = yf.download(tickers, timeout=30)
224
225
# Use custom session for connection pooling
226
import requests
227
session = requests.Session()
228
data = yf.download(tickers, session=session)
229
```
230
231
#### Missing Data Handling
232
233
```python
234
# Check for missing tickers
235
tickers = ["AAPL", "INVALID_TICKER", "GOOGL"]
236
data = yf.download(tickers, period="1mo")
237
238
# Identify which tickers have data
239
available_tickers = data.columns.get_level_values(1).unique()
240
missing_tickers = set(tickers) - set(available_tickers)
241
242
print(f"Available: {list(available_tickers)}")
243
print(f"Missing: {list(missing_tickers)}")
244
```
245
246
### Portfolio Analysis Patterns
247
248
Common patterns for portfolio and multi-asset analysis.
249
250
#### Returns Calculation
251
252
```python
253
# Download portfolio data
254
portfolio = ["AAPL", "GOOGL", "MSFT", "AMZN"]
255
data = yf.download(portfolio, period="1y")
256
257
# Calculate daily returns
258
prices = data['Close']
259
returns = prices.pct_change().dropna()
260
261
# Calculate cumulative returns
262
cumulative_returns = (1 + returns).cumprod()
263
264
# Portfolio metrics
265
correlation_matrix = returns.corr()
266
volatility = returns.std() * np.sqrt(252) # Annualized volatility
267
```
268
269
#### Comparative Analysis
270
271
```python
272
# Normalize prices for comparison
273
normalized_prices = prices / prices.iloc[0]
274
275
# Rolling correlations
276
rolling_corr = returns.rolling(window=30).corr()
277
278
# Relative performance
279
benchmark = yf.download("SPY", period="1y")['Close']
280
relative_performance = prices.div(benchmark, axis=0)
281
```
282
283
### Large Dataset Considerations
284
285
Best practices for handling many tickers or long time periods.
286
287
#### Batch Processing
288
289
```python
290
# Process large ticker lists in batches
291
def download_in_batches(tickers, batch_size=20, **kwargs):
292
all_data = []
293
for i in range(0, len(tickers), batch_size):
294
batch = tickers[i:i + batch_size]
295
batch_data = yf.download(batch, **kwargs)
296
all_data.append(batch_data)
297
return pd.concat(all_data, axis=1)
298
299
# Usage
300
large_ticker_list = ["AAPL", "GOOGL", ...] # 100+ tickers
301
data = download_in_batches(large_ticker_list, batch_size=25, period="1y")
302
```
303
304
#### Memory Management
305
306
```python
307
# For very large datasets, consider processing in chunks
308
def process_large_dataset(tickers, start_date, end_date, chunk_months=6):
309
date_ranges = pd.date_range(start_date, end_date, freq=f'{chunk_months}M')
310
311
results = []
312
for i in range(len(date_ranges) - 1):
313
chunk_start = date_ranges[i]
314
chunk_end = date_ranges[i + 1]
315
316
chunk_data = yf.download(tickers, start=chunk_start, end=chunk_end)
317
# Process chunk_data as needed
318
results.append(chunk_data)
319
320
return pd.concat(results)
321
```
322
323
## Common Use Cases
324
325
### Market Index Components
326
327
```python
328
# Download S&P 500 components (example subset)
329
sp500_sample = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "META", "NVDA", "JPM", "JNJ", "V"]
330
index_data = yf.download(sp500_sample, period="1y", group_by='ticker')
331
332
# Calculate index-like performance
333
equal_weight_returns = sum(ticker_data['Close'].pct_change()
334
for ticker_data in index_data.values()) / len(sp500_sample)
335
```
336
337
### Sector Analysis
338
339
```python
340
# Technology sector stocks
341
tech_stocks = ["AAPL", "MSFT", "GOOGL", "AMZN", "META", "NVDA", "ORCL", "CRM", "ADBE", "INTC"]
342
tech_data = yf.download(tech_stocks, period="1y")
343
344
# Sector performance metrics
345
sector_prices = tech_data['Close']
346
sector_returns = sector_prices.pct_change()
347
sector_volatility = sector_returns.std()
348
sector_correlation = sector_returns.corr()
349
```
350
351
### International Markets
352
353
```python
354
# Global indices
355
global_indices = ["^GSPC", "^IXIC", "^DJI", "^FTSE", "^N225", "^HSI", "000001.SS"]
356
global_data = yf.download(global_indices, period="1y")
357
358
# Currency pairs for international analysis
359
currencies = ["EURUSD=X", "GBPUSD=X", "JPYUSD=X", "AUDUSD=X"]
360
fx_data = yf.download(currencies, period="6mo")
361
```