or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

bulk-data.mdconfig-utils.mdindex.mdlive-streaming.mdmarket-sector.mdscreening.mdsearch-lookup.mdticker-data.md

bulk-data.mddocs/

0

# Bulk Data Operations

1

2

Efficient downloading and management of multiple financial instruments with threading support, various data formatting options, and bulk operations for portfolio analysis and multi-symbol research.

3

4

## Capabilities

5

6

### Multi-Symbol Download Function

7

8

Download historical data for multiple tickers simultaneously with threading support and flexible formatting options.

9

10

```python { .api }

11

def download(tickers, start: Union[str, datetime] = None, end: Union[str, datetime] = None,

12

actions: bool = False, threads: Union[bool, int] = True,

13

ignore_tz: bool = None, group_by: str = 'column',

14

auto_adjust: bool = None, back_adjust: bool = False,

15

repair: bool = False, keepna: bool = False, progress: bool = True,

16

period: str = None, interval: str = "1d", prepost: bool = False,

17

rounding: bool = False, timeout: int = 10, session = None,

18

multi_level_index: bool = True) -> Union[pd.DataFrame, None]:

19

"""

20

Download historical data for multiple tickers.

21

22

Parameters:

23

- tickers: str or list, ticker symbols to download (space/comma separated string or list)

24

- start: str/datetime, start date in YYYY-MM-DD format

25

- end: str/datetime, end date in YYYY-MM-DD format

26

- period: str, period to download ("1d", "5d", "1mo", "3mo", "6mo", "1y", "2y", "5y", "10y", "ytd", "max")

27

- interval: str, data interval ("1m", "2m", "5m", "15m", "30m", "60m", "90m", "1h", "1d", "5d", "1wk", "1mo", "3mo")

28

- actions: bool, download dividend and stock split data

29

- threads: bool/int, enable multithreading (True/False or number of threads)

30

- group_by: str, group data by 'ticker' or 'column'

31

- auto_adjust: bool, adjust OHLC prices for splits and dividends

32

- back_adjust: bool, back-adjust prices instead of forward-adjust

33

- repair: bool, detect and repair bad data

34

- keepna: bool, keep NaN values in output

35

- progress: bool, show download progress bar

36

- prepost: bool, include pre and post market data

37

- rounding: bool, round values to 2 decimal places

38

- timeout: int, timeout for requests in seconds

39

- multi_level_index: bool, use multi-level column index

40

41

Returns:

42

pd.DataFrame with historical data for all tickers

43

"""

44

```

45

46

#### Usage Examples

47

48

```python

49

import yfinance as yf

50

51

# Download multiple stocks

52

data = yf.download(["AAPL", "GOOGL", "MSFT"], period="1mo")

53

54

# Download with specific date range

55

data = yf.download("AAPL GOOGL MSFT", start="2023-01-01", end="2023-12-31")

56

57

# Download with custom formatting

58

data = yf.download(["AAPL", "GOOGL"], period="6mo",

59

group_by='ticker', threads=4, progress=True)

60

61

# Download intraday data

62

data = yf.download(["SPY", "QQQ"], period="5d", interval="5m")

63

64

# Download with actions (dividends/splits)

65

data = yf.download(["AAPL", "MSFT"], period="1y", actions=True)

66

```

67

68

#### Data Structure Examples

69

70

**Column-grouped data** (default: `group_by='column'`):

71

```

72

Close High Low Open Volume

73

symbol AAPL GOOGL AAPL GOOGL AAPL GOOGL AAPL GOOGL AAPL GOOGL

74

date

75

2023-01-03 125.07 88.59 125.42 89.19 124.76 88.12 125.20 88.30 112117500 23097900

76

```

77

78

**Ticker-grouped data** (`group_by='ticker'`):

79

- Returns dictionary with ticker symbols as keys

80

- Each value is a DataFrame with OHLCV columns

81

82

### Multi-Ticker Management Class

83

84

Manage multiple tickers with shared operations and bulk data access.

85

86

```python { .api }

87

class Tickers:

88

def __init__(self, tickers, session=None):

89

"""

90

Create a Tickers object for managing multiple ticker symbols.

91

92

Parameters:

93

- tickers: str or list, ticker symbols (space/comma separated or list)

94

- session: requests.Session, optional session for HTTP requests

95

"""

96

97

def history(self, period: str = "1mo", interval: str = "1d",

98

start: Union[str, datetime] = None, end: Union[str, datetime] = None,

99

prepost: bool = False, actions: bool = True, auto_adjust: bool = True,

100

repair: bool = False, threads: Union[bool, int] = True,

101

group_by: str = 'column', progress: bool = True,

102

timeout: int = 10, **kwargs) -> pd.DataFrame:

103

"""

104

Download historical data for all tickers.

105

106

Returns:

107

pd.DataFrame with historical data formatted according to group_by parameter

108

"""

109

110

def download(self, **kwargs) -> pd.DataFrame:

111

"""

112

Alias for history() method with same parameters.

113

"""

114

115

def news(self) -> dict:

116

"""

117

Get news for all tickers.

118

119

Returns:

120

dict with ticker symbols as keys and news lists as values

121

"""

122

123

def live(self, message_handler: Callable = None, verbose: bool = True):

124

"""

125

Start real-time data streaming for all tickers.

126

127

Parameters:

128

- message_handler: function to handle incoming messages

129

- verbose: bool, enable verbose logging

130

"""

131

132

# Properties

133

symbols: list # List of ticker symbols

134

tickers: dict # Dictionary mapping symbols to Ticker objects

135

```

136

137

#### Usage Examples

138

139

```python

140

# Create Tickers object

141

portfolio = yf.Tickers("AAPL GOOGL MSFT AMZN")

142

143

# Or with a list

144

portfolio = yf.Tickers(["AAPL", "GOOGL", "MSFT", "AMZN"])

145

146

# Download historical data

147

data = portfolio.history(period="1y")

148

149

# Access individual ticker objects

150

apple = portfolio.tickers['AAPL']

151

apple_info = apple.info

152

153

# Get news for all tickers

154

all_news = portfolio.news()

155

apple_news = all_news['AAPL']

156

157

# Start live streaming

158

def handle_updates(msg):

159

print(f"Portfolio update: {msg}")

160

161

portfolio.live(message_handler=handle_updates)

162

```

163

164

### Threading and Performance

165

166

Control threading behavior for optimal performance based on your use case.

167

168

#### Threading Options

169

170

```python

171

# Disable threading (sequential downloads)

172

data = yf.download(["AAPL", "GOOGL", "MSFT"], threads=False)

173

174

# Enable threading with default thread count

175

data = yf.download(["AAPL", "GOOGL", "MSFT"], threads=True)

176

177

# Specify exact number of threads

178

data = yf.download(["AAPL", "GOOGL", "MSFT"], threads=8)

179

180

# Control progress display

181

data = yf.download(tickers, progress=True) # Show progress bar

182

data = yf.download(tickers, progress=False) # Silent download

183

```

184

185

### Data Formatting and Processing

186

187

Control how multi-ticker data is structured and processed.

188

189

#### Grouping Options

190

191

```python

192

# Column-based grouping (default)

193

data = yf.download(["AAPL", "GOOGL"], group_by='column')

194

# Access: data['Close']['AAPL'], data['Volume']['GOOGL']

195

196

# Ticker-based grouping

197

data = yf.download(["AAPL", "GOOGL"], group_by='ticker')

198

# Access: data['AAPL']['Close'], data['GOOGL']['Volume']

199

```

200

201

#### Data Processing Options

202

203

```python

204

# Handle missing data

205

data = yf.download(tickers, keepna=True) # Keep NaN values

206

data = yf.download(tickers, keepna=False) # Drop NaN values

207

208

# Data repair and adjustment

209

data = yf.download(tickers, repair=True, auto_adjust=True)

210

211

# Rounding for cleaner output

212

data = yf.download(tickers, rounding=True) # Round to 2 decimal places

213

```

214

215

### Error Handling and Reliability

216

217

Handle common issues when downloading multiple tickers.

218

219

#### Timeout and Session Management

220

221

```python

222

# Custom timeout for slow connections

223

data = yf.download(tickers, timeout=30)

224

225

# Use custom session for connection pooling

226

import requests

227

session = requests.Session()

228

data = yf.download(tickers, session=session)

229

```

230

231

#### Missing Data Handling

232

233

```python

234

# Check for missing tickers

235

tickers = ["AAPL", "INVALID_TICKER", "GOOGL"]

236

data = yf.download(tickers, period="1mo")

237

238

# Identify which tickers have data

239

available_tickers = data.columns.get_level_values(1).unique()

240

missing_tickers = set(tickers) - set(available_tickers)

241

242

print(f"Available: {list(available_tickers)}")

243

print(f"Missing: {list(missing_tickers)}")

244

```

245

246

### Portfolio Analysis Patterns

247

248

Common patterns for portfolio and multi-asset analysis.

249

250

#### Returns Calculation

251

252

```python

253

# Download portfolio data

254

portfolio = ["AAPL", "GOOGL", "MSFT", "AMZN"]

255

data = yf.download(portfolio, period="1y")

256

257

# Calculate daily returns

258

prices = data['Close']

259

returns = prices.pct_change().dropna()

260

261

# Calculate cumulative returns

262

cumulative_returns = (1 + returns).cumprod()

263

264

# Portfolio metrics

265

correlation_matrix = returns.corr()

266

volatility = returns.std() * np.sqrt(252) # Annualized volatility

267

```

268

269

#### Comparative Analysis

270

271

```python

272

# Normalize prices for comparison

273

normalized_prices = prices / prices.iloc[0]

274

275

# Rolling correlations

276

rolling_corr = returns.rolling(window=30).corr()

277

278

# Relative performance

279

benchmark = yf.download("SPY", period="1y")['Close']

280

relative_performance = prices.div(benchmark, axis=0)

281

```

282

283

### Large Dataset Considerations

284

285

Best practices for handling many tickers or long time periods.

286

287

#### Batch Processing

288

289

```python

290

# Process large ticker lists in batches

291

def download_in_batches(tickers, batch_size=20, **kwargs):

292

all_data = []

293

for i in range(0, len(tickers), batch_size):

294

batch = tickers[i:i + batch_size]

295

batch_data = yf.download(batch, **kwargs)

296

all_data.append(batch_data)

297

return pd.concat(all_data, axis=1)

298

299

# Usage

300

large_ticker_list = ["AAPL", "GOOGL", ...] # 100+ tickers

301

data = download_in_batches(large_ticker_list, batch_size=25, period="1y")

302

```

303

304

#### Memory Management

305

306

```python

307

# For very large datasets, consider processing in chunks

308

def process_large_dataset(tickers, start_date, end_date, chunk_months=6):

309

date_ranges = pd.date_range(start_date, end_date, freq=f'{chunk_months}M')

310

311

results = []

312

for i in range(len(date_ranges) - 1):

313

chunk_start = date_ranges[i]

314

chunk_end = date_ranges[i + 1]

315

316

chunk_data = yf.download(tickers, start=chunk_start, end=chunk_end)

317

# Process chunk_data as needed

318

results.append(chunk_data)

319

320

return pd.concat(results)

321

```

322

323

## Common Use Cases

324

325

### Market Index Components

326

327

```python

328

# Download S&P 500 components (example subset)

329

sp500_sample = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "META", "NVDA", "JPM", "JNJ", "V"]

330

index_data = yf.download(sp500_sample, period="1y", group_by='ticker')

331

332

# Calculate index-like performance

333

equal_weight_returns = sum(ticker_data['Close'].pct_change()

334

for ticker_data in index_data.values()) / len(sp500_sample)

335

```

336

337

### Sector Analysis

338

339

```python

340

# Technology sector stocks

341

tech_stocks = ["AAPL", "MSFT", "GOOGL", "AMZN", "META", "NVDA", "ORCL", "CRM", "ADBE", "INTC"]

342

tech_data = yf.download(tech_stocks, period="1y")

343

344

# Sector performance metrics

345

sector_prices = tech_data['Close']

346

sector_returns = sector_prices.pct_change()

347

sector_volatility = sector_returns.std()

348

sector_correlation = sector_returns.corr()

349

```

350

351

### International Markets

352

353

```python

354

# Global indices

355

global_indices = ["^GSPC", "^IXIC", "^DJI", "^FTSE", "^N225", "^HSI", "000001.SS"]

356

global_data = yf.download(global_indices, period="1y")

357

358

# Currency pairs for international analysis

359

currencies = ["EURUSD=X", "GBPUSD=X", "JPYUSD=X", "AUDUSD=X"]

360

fx_data = yf.download(currencies, period="6mo")

361

```