0
# Data Retrieval
1
2
Financial data downloading with Yahoo Finance integration, CSV support, and flexible data provider architecture. Provides seamless access to financial time series data for analysis and backtesting.
3
4
## Capabilities
5
6
### Main Data Function
7
8
Primary interface for downloading financial data with flexible provider support and data processing options.
9
10
```python { .api }
11
def get(tickers, provider=None, common_dates=True, forward_fill=False, clean_tickers=True, column_names=None, ticker_field_sep=":", mrefresh=False, existing=None, **kwargs):
12
"""
13
Download financial data and return as DataFrame.
14
15
Parameters:
16
- tickers (str, list): Ticker symbols to download (can be CSV string, list, or individual ticker)
17
- provider (function): Data provider function (default: yf - Yahoo Finance)
18
- common_dates (bool): Keep only dates common across all tickers (default: True)
19
- forward_fill (bool): Forward fill missing values (default: False)
20
- clean_tickers (bool): Clean ticker names using utils.clean_ticker (default: True)
21
- column_names (list): Custom column names for DataFrame (default: None)
22
- ticker_field_sep (str): Separator for ticker:field specification (default: ":")
23
- mrefresh (bool): Ignore memoization cache and refresh data (default: False)
24
- existing (pd.DataFrame): Existing DataFrame to append new data to (default: None)
25
- **kwargs: Additional arguments passed to the provider function
26
27
Returns:
28
pd.DataFrame: Financial data with tickers as columns and dates as index
29
"""
30
```
31
32
### Yahoo Finance Provider
33
34
Default data provider using yfinance library for stock, ETF, and index data.
35
36
```python { .api }
37
def yf(ticker, field, start=None, end=None, mrefresh=False):
38
"""
39
Yahoo Finance data provider using yfinance library.
40
41
Parameters:
42
- ticker (str): Stock ticker symbol (e.g., 'AAPL', 'SPY')
43
- field (str): Data field to retrieve (default: 'Adj Close')
44
Available fields: 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'
45
- start (str, datetime): Start date for data (default: None for all available)
46
- end (str, datetime): End date for data (default: None for most recent)
47
- mrefresh (bool): Ignore memoization cache (default: False)
48
49
Returns:
50
pd.Series: Time series data for the specified ticker and field
51
"""
52
```
53
54
### CSV Data Provider
55
56
Local CSV file data provider with pandas integration.
57
58
```python { .api }
59
def csv(ticker, path="data.csv", field="", mrefresh=False, **kwargs):
60
"""
61
CSV file data provider with pandas read_csv wrapper.
62
63
Parameters:
64
- ticker (str): Column name in CSV file to extract
65
- path (str): Path to CSV file (default: "data.csv")
66
- field (str): Additional field specification (default: "")
67
- mrefresh (bool): Ignore memoization cache (default: False)
68
- **kwargs: Additional arguments passed to pandas.read_csv()
69
70
Returns:
71
pd.Series: Time series data from CSV file
72
"""
73
```
74
75
### Legacy Web Provider
76
77
```python { .api }
78
def web(ticker, field=None, start=None, end=None, mrefresh=False, source="yahoo"):
79
"""
80
Legacy web data provider (DEPRECATED - use yf() instead).
81
82
Parameters:
83
- ticker (str): Ticker symbol
84
- field (str): Data field (default: None)
85
- start: Start date (default: None)
86
- end: End date (default: None)
87
- mrefresh (bool): Ignore cache (default: False)
88
- source (str): Data source (default: "yahoo")
89
90
Returns:
91
pd.Series: Time series data
92
93
Note: This function is deprecated. Use yf() for Yahoo Finance data.
94
"""
95
```
96
97
### Constants
98
99
```python { .api }
100
DEFAULT_PROVIDER = yf # Default data provider set to Yahoo Finance
101
```
102
103
## Usage Examples
104
105
### Basic Data Download
106
107
```python
108
import ffn
109
110
# Download single stock
111
aapl = ffn.get('AAPL', start='2020-01-01')
112
print(f"AAPL data shape: {aapl.shape}")
113
print(f"Date range: {aapl.index[0]} to {aapl.index[-1]}")
114
115
# Download multiple stocks
116
stocks = ffn.get('AAPL,MSFT,GOOGL', start='2020-01-01', end='2023-01-01')
117
print(f"Multi-stock data shape: {stocks.shape}")
118
print(f"Columns: {stocks.columns.tolist()}")
119
120
# Download with list format
121
tech_stocks = ffn.get(['AAPL', 'MSFT', 'GOOGL', 'AMZN'], start='2020-01-01')
122
print(f"Tech stocks: {tech_stocks.columns.tolist()}")
123
```
124
125
### Advanced Data Options
126
127
```python
128
import ffn
129
130
# Download with forward fill for missing data
131
data_ff = ffn.get('AAPL,BND', start='2020-01-01', forward_fill=True)
132
print(f"Forward filled data gaps: {data_ff.isnull().sum().sum()}")
133
134
# Download without common dates (keep all available data)
135
data_all = ffn.get('AAPL,VTI', start='2020-01-01', common_dates=False)
136
print(f"All dates shape: {data_all.shape}")
137
138
# Custom column names
139
custom_data = ffn.get('AAPL,MSFT', start='2020-01-01',
140
column_names=['Apple', 'Microsoft'])
141
print(f"Custom columns: {custom_data.columns.tolist()}")
142
143
# Refresh cached data
144
fresh_data = ffn.get('AAPL', start='2023-01-01', mrefresh=True)
145
print("Data refreshed from source")
146
```
147
148
### Different Data Fields
149
150
```python
151
import ffn
152
153
# Download different price fields using ticker:field syntax
154
price_data = ffn.get('AAPL:Open,AAPL:High,AAPL:Low,AAPL:Close', start='2023-01-01')
155
print(f"OHLC data columns: {price_data.columns.tolist()}")
156
157
# Volume data
158
volume_data = ffn.get('AAPL:Volume,MSFT:Volume', start='2023-01-01')
159
print(f"Volume data shape: {volume_data.shape}")
160
161
# Direct provider usage for specific fields
162
ohlcv_data = {}
163
for field in ['Open', 'High', 'Low', 'Close', 'Volume']:
164
ohlcv_data[field] = ffn.yf('AAPL', field=field, start='2023-01-01')
165
166
import pandas as pd
167
ohlcv_df = pd.DataFrame(ohlcv_data)
168
print(f"OHLCV DataFrame shape: {ohlcv_df.shape}")
169
```
170
171
### CSV Data Integration
172
173
```python
174
import ffn
175
import pandas as pd
176
177
# Create sample CSV data
178
sample_data = pd.DataFrame({
179
'Date': pd.date_range('2020-01-01', periods=100, freq='D'),
180
'Custom_Asset_1': (1 + 0.001 * np.random.randn(100)).cumprod() * 100,
181
'Custom_Asset_2': (1 + 0.0015 * np.random.randn(100)).cumprod() * 100
182
})
183
sample_data.set_index('Date').to_csv('sample_data.csv')
184
185
# Load CSV data using ffn
186
csv_data = ffn.get('Custom_Asset_1,Custom_Asset_2', provider=ffn.csv,
187
path='sample_data.csv')
188
print(f"CSV data shape: {csv_data.shape}")
189
190
# Combine CSV and web data
191
combined = ffn.get('AAPL', start='2020-01-01', end='2020-04-09') # 100 days
192
combined = ffn.get('Custom_Asset_1,Custom_Asset_2', provider=ffn.csv,
193
path='sample_data.csv', existing=combined)
194
print(f"Combined data columns: {combined.columns.tolist()}")
195
```
196
197
### Data Pipeline Examples
198
199
```python
200
import ffn
201
202
# Multi-asset data pipeline
203
def build_portfolio_data(tickers, start_date, benchmark='SPY'):
204
"""Build complete dataset for portfolio analysis."""
205
206
# Download core assets
207
assets = ffn.get(tickers, start=start_date, forward_fill=True)
208
209
# Add benchmark
210
benchmark_data = ffn.get(benchmark, start=start_date)
211
benchmark_data.columns = ['Benchmark']
212
213
# Combine datasets
214
full_data = pd.concat([assets, benchmark_data], axis=1)
215
216
# Remove any remaining NaN values
217
full_data = full_data.dropna()
218
219
return full_data
220
221
# Usage
222
portfolio_data = build_portfolio_data(
223
['AAPL', 'MSFT', 'GOOGL', 'AMZN'],
224
start_date='2020-01-01',
225
benchmark='QQQ'
226
)
227
228
print(f"Portfolio dataset shape: {portfolio_data.shape}")
229
print(f"Date range: {portfolio_data.index[0]} to {portfolio_data.index[-1]}")
230
231
# Quick analysis
232
returns = ffn.to_returns(portfolio_data).dropna()
233
correlations = returns.corr()
234
print(f"\nCorrelation with benchmark:")
235
print(correlations['Benchmark'].drop('Benchmark').round(3))
236
```
237
238
### Error Handling and Data Quality
239
240
```python
241
import ffn
242
243
# Handle invalid tickers gracefully
244
tickers = ['AAPL', 'INVALID_TICKER', 'MSFT']
245
try:
246
data = ffn.get(tickers, start='2020-01-01')
247
print(f"Successfully downloaded: {data.columns.tolist()}")
248
except Exception as e:
249
print(f"Error downloading some tickers: {e}")
250
251
# Download valid tickers individually
252
valid_data = {}
253
for ticker in ['AAPL', 'MSFT']: # Skip invalid
254
try:
255
valid_data[ticker] = ffn.get(ticker, start='2020-01-01')[ticker]
256
except:
257
continue
258
259
data = pd.DataFrame(valid_data)
260
print(f"Recovered data: {data.columns.tolist()}")
261
262
# Data quality checks
263
print(f"Missing values: {data.isnull().sum().sum()}")
264
print(f"Data range: {data.index[0]} to {data.index[-1]}")
265
print(f"Business days in range: {len(data)}")
266
267
# Clean and prepare for analysis
268
clean_data = (data
269
.dropna() # Remove missing values
270
.asfreq('D') # Ensure daily frequency
271
.forward_fill() # Fill weekend gaps
272
)
273
274
print(f"Cleaned data shape: {clean_data.shape}")
275
```