or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-retrieval.mddata-utilities.mdindex.mdpandas-extensions.mdperformance-analysis.mdportfolio-optimization.mdreturn-calculations.mdrisk-metrics.mdstatistical-analysis.md

data-retrieval.mddocs/

0

# Data Retrieval

1

2

Financial data downloading with Yahoo Finance integration, CSV support, and flexible data provider architecture. Provides seamless access to financial time series data for analysis and backtesting.

3

4

## Capabilities

5

6

### Main Data Function

7

8

Primary interface for downloading financial data with flexible provider support and data processing options.

9

10

```python { .api }

11

def get(tickers, provider=None, common_dates=True, forward_fill=False, clean_tickers=True, column_names=None, ticker_field_sep=":", mrefresh=False, existing=None, **kwargs):

12

"""

13

Download financial data and return as DataFrame.

14

15

Parameters:

16

- tickers (str, list): Ticker symbols to download (can be CSV string, list, or individual ticker)

17

- provider (function): Data provider function (default: yf - Yahoo Finance)

18

- common_dates (bool): Keep only dates common across all tickers (default: True)

19

- forward_fill (bool): Forward fill missing values (default: False)

20

- clean_tickers (bool): Clean ticker names using utils.clean_ticker (default: True)

21

- column_names (list): Custom column names for DataFrame (default: None)

22

- ticker_field_sep (str): Separator for ticker:field specification (default: ":")

23

- mrefresh (bool): Ignore memoization cache and refresh data (default: False)

24

- existing (pd.DataFrame): Existing DataFrame to append new data to (default: None)

25

- **kwargs: Additional arguments passed to the provider function

26

27

Returns:

28

pd.DataFrame: Financial data with tickers as columns and dates as index

29

"""

30

```

31

32

### Yahoo Finance Provider

33

34

Default data provider using yfinance library for stock, ETF, and index data.

35

36

```python { .api }

37

def yf(ticker, field, start=None, end=None, mrefresh=False):

38

"""

39

Yahoo Finance data provider using yfinance library.

40

41

Parameters:

42

- ticker (str): Stock ticker symbol (e.g., 'AAPL', 'SPY')

43

- field (str): Data field to retrieve (default: 'Adj Close')

44

Available fields: 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'

45

- start (str, datetime): Start date for data (default: None for all available)

46

- end (str, datetime): End date for data (default: None for most recent)

47

- mrefresh (bool): Ignore memoization cache (default: False)

48

49

Returns:

50

pd.Series: Time series data for the specified ticker and field

51

"""

52

```

53

54

### CSV Data Provider

55

56

Local CSV file data provider with pandas integration.

57

58

```python { .api }

59

def csv(ticker, path="data.csv", field="", mrefresh=False, **kwargs):

60

"""

61

CSV file data provider with pandas read_csv wrapper.

62

63

Parameters:

64

- ticker (str): Column name in CSV file to extract

65

- path (str): Path to CSV file (default: "data.csv")

66

- field (str): Additional field specification (default: "")

67

- mrefresh (bool): Ignore memoization cache (default: False)

68

- **kwargs: Additional arguments passed to pandas.read_csv()

69

70

Returns:

71

pd.Series: Time series data from CSV file

72

"""

73

```

74

75

### Legacy Web Provider

76

77

```python { .api }

78

def web(ticker, field=None, start=None, end=None, mrefresh=False, source="yahoo"):

79

"""

80

Legacy web data provider (DEPRECATED - use yf() instead).

81

82

Parameters:

83

- ticker (str): Ticker symbol

84

- field (str): Data field (default: None)

85

- start: Start date (default: None)

86

- end: End date (default: None)

87

- mrefresh (bool): Ignore cache (default: False)

88

- source (str): Data source (default: "yahoo")

89

90

Returns:

91

pd.Series: Time series data

92

93

Note: This function is deprecated. Use yf() for Yahoo Finance data.

94

"""

95

```

96

97

### Constants

98

99

```python { .api }

100

DEFAULT_PROVIDER = yf # Default data provider set to Yahoo Finance

101

```

102

103

## Usage Examples

104

105

### Basic Data Download

106

107

```python

108

import ffn

109

110

# Download single stock

111

aapl = ffn.get('AAPL', start='2020-01-01')

112

print(f"AAPL data shape: {aapl.shape}")

113

print(f"Date range: {aapl.index[0]} to {aapl.index[-1]}")

114

115

# Download multiple stocks

116

stocks = ffn.get('AAPL,MSFT,GOOGL', start='2020-01-01', end='2023-01-01')

117

print(f"Multi-stock data shape: {stocks.shape}")

118

print(f"Columns: {stocks.columns.tolist()}")

119

120

# Download with list format

121

tech_stocks = ffn.get(['AAPL', 'MSFT', 'GOOGL', 'AMZN'], start='2020-01-01')

122

print(f"Tech stocks: {tech_stocks.columns.tolist()}")

123

```

124

125

### Advanced Data Options

126

127

```python

128

import ffn

129

130

# Download with forward fill for missing data

131

data_ff = ffn.get('AAPL,BND', start='2020-01-01', forward_fill=True)

132

print(f"Forward filled data gaps: {data_ff.isnull().sum().sum()}")

133

134

# Download without common dates (keep all available data)

135

data_all = ffn.get('AAPL,VTI', start='2020-01-01', common_dates=False)

136

print(f"All dates shape: {data_all.shape}")

137

138

# Custom column names

139

custom_data = ffn.get('AAPL,MSFT', start='2020-01-01',

140

column_names=['Apple', 'Microsoft'])

141

print(f"Custom columns: {custom_data.columns.tolist()}")

142

143

# Refresh cached data

144

fresh_data = ffn.get('AAPL', start='2023-01-01', mrefresh=True)

145

print("Data refreshed from source")

146

```

147

148

### Different Data Fields

149

150

```python

151

import ffn

152

153

# Download different price fields using ticker:field syntax

154

price_data = ffn.get('AAPL:Open,AAPL:High,AAPL:Low,AAPL:Close', start='2023-01-01')

155

print(f"OHLC data columns: {price_data.columns.tolist()}")

156

157

# Volume data

158

volume_data = ffn.get('AAPL:Volume,MSFT:Volume', start='2023-01-01')

159

print(f"Volume data shape: {volume_data.shape}")

160

161

# Direct provider usage for specific fields

162

ohlcv_data = {}

163

for field in ['Open', 'High', 'Low', 'Close', 'Volume']:

164

ohlcv_data[field] = ffn.yf('AAPL', field=field, start='2023-01-01')

165

166

import pandas as pd

167

ohlcv_df = pd.DataFrame(ohlcv_data)

168

print(f"OHLCV DataFrame shape: {ohlcv_df.shape}")

169

```

170

171

### CSV Data Integration

172

173

```python

174

import ffn

175

import pandas as pd

176

177

# Create sample CSV data

178

sample_data = pd.DataFrame({

179

'Date': pd.date_range('2020-01-01', periods=100, freq='D'),

180

'Custom_Asset_1': (1 + 0.001 * np.random.randn(100)).cumprod() * 100,

181

'Custom_Asset_2': (1 + 0.0015 * np.random.randn(100)).cumprod() * 100

182

})

183

sample_data.set_index('Date').to_csv('sample_data.csv')

184

185

# Load CSV data using ffn

186

csv_data = ffn.get('Custom_Asset_1,Custom_Asset_2', provider=ffn.csv,

187

path='sample_data.csv')

188

print(f"CSV data shape: {csv_data.shape}")

189

190

# Combine CSV and web data

191

combined = ffn.get('AAPL', start='2020-01-01', end='2020-04-09') # 100 days

192

combined = ffn.get('Custom_Asset_1,Custom_Asset_2', provider=ffn.csv,

193

path='sample_data.csv', existing=combined)

194

print(f"Combined data columns: {combined.columns.tolist()}")

195

```

196

197

### Data Pipeline Examples

198

199

```python

200

import ffn

201

202

# Multi-asset data pipeline

203

def build_portfolio_data(tickers, start_date, benchmark='SPY'):

204

"""Build complete dataset for portfolio analysis."""

205

206

# Download core assets

207

assets = ffn.get(tickers, start=start_date, forward_fill=True)

208

209

# Add benchmark

210

benchmark_data = ffn.get(benchmark, start=start_date)

211

benchmark_data.columns = ['Benchmark']

212

213

# Combine datasets

214

full_data = pd.concat([assets, benchmark_data], axis=1)

215

216

# Remove any remaining NaN values

217

full_data = full_data.dropna()

218

219

return full_data

220

221

# Usage

222

portfolio_data = build_portfolio_data(

223

['AAPL', 'MSFT', 'GOOGL', 'AMZN'],

224

start_date='2020-01-01',

225

benchmark='QQQ'

226

)

227

228

print(f"Portfolio dataset shape: {portfolio_data.shape}")

229

print(f"Date range: {portfolio_data.index[0]} to {portfolio_data.index[-1]}")

230

231

# Quick analysis

232

returns = ffn.to_returns(portfolio_data).dropna()

233

correlations = returns.corr()

234

print(f"\nCorrelation with benchmark:")

235

print(correlations['Benchmark'].drop('Benchmark').round(3))

236

```

237

238

### Error Handling and Data Quality

239

240

```python

241

import ffn

242

243

# Handle invalid tickers gracefully

244

tickers = ['AAPL', 'INVALID_TICKER', 'MSFT']

245

try:

246

data = ffn.get(tickers, start='2020-01-01')

247

print(f"Successfully downloaded: {data.columns.tolist()}")

248

except Exception as e:

249

print(f"Error downloading some tickers: {e}")

250

251

# Download valid tickers individually

252

valid_data = {}

253

for ticker in ['AAPL', 'MSFT']: # Skip invalid

254

try:

255

valid_data[ticker] = ffn.get(ticker, start='2020-01-01')[ticker]

256

except:

257

continue

258

259

data = pd.DataFrame(valid_data)

260

print(f"Recovered data: {data.columns.tolist()}")

261

262

# Data quality checks

263

print(f"Missing values: {data.isnull().sum().sum()}")

264

print(f"Data range: {data.index[0]} to {data.index[-1]}")

265

print(f"Business days in range: {len(data)}")

266

267

# Clean and prepare for analysis

268

clean_data = (data

269

.dropna() # Remove missing values

270

.asfreq('D') # Ensure daily frequency

271

.forward_fill() # Fill weekend gaps

272

)

273

274

print(f"Cleaned data shape: {clean_data.shape}")

275

```