Access and analyze historical weather and climate data with Python.
—
Meteostat provides comprehensive data processing capabilities for time series analysis, including normalization, interpolation, aggregation, unit conversion, and data quality assessment. These methods are available on all time series classes (Hourly, Daily, Monthly).
Core methods for accessing and examining time series data.
def fetch(self) -> pd.DataFrame:
"""
Fetch the processed time series data as a pandas DataFrame.
Returns:
pandas.DataFrame with meteorological time series data
"""
def count(self) -> int:
"""
Count the number of non-null observations in the time series.
Returns:
int, total count of non-null data points across all parameters
"""
def stations(self) -> pd.Index:
"""
Get the station IDs associated with the time series.
Returns:
pandas.Index of station identifiers used in the time series
"""Evaluate data completeness and coverage across the time series.
def coverage(self, parameter: str = None) -> float:
"""
Calculate data coverage as a ratio of available to expected observations.
Parameters:
- parameter: str, optional - specific parameter to calculate coverage for
If None, returns overall coverage across all parameters
Returns:
float, coverage ratio between 0.0 and 1.0 (or slightly above 1.0 if model data included)
"""Ensure complete time series with regular intervals and filled gaps.
def normalize(self):
"""
Normalize the time series to ensure regular time intervals.
Fills missing time steps with NaN values for complete series.
Returns:
Time series object with normalized temporal coverage
"""Fill gaps in time series data using various interpolation methods.
def interpolate(self, limit: int = 3):
"""
Interpolate missing values in the time series.
Parameters:
- limit: int, maximum number of consecutive NaN values to interpolate
(default: 3)
Returns:
Time series object with interpolated missing values
"""Aggregate time series data to different temporal frequencies.
def aggregate(self, freq: str, spatial: bool = False):
"""
Aggregate time series data to a different temporal frequency.
Parameters:
- freq: str, target frequency using pandas frequency strings
('D' for daily, 'W' for weekly, 'MS' for monthly, 'AS' for annual)
- spatial: bool, whether to perform spatial averaging across stations
(default: False)
Returns:
Time series object with aggregated data at the target frequency
"""Convert meteorological parameters to different unit systems.
def convert(self, units: dict):
"""
Convert meteorological parameters to different units.
Parameters:
- units: dict, mapping of parameter names to conversion functions
e.g., {'temp': units.fahrenheit, 'prcp': units.inches}
Returns:
Time series object with converted units
"""Manage local data cache for improved performance.
def clear_cache(self):
"""
Clear cached data files associated with the time series.
Useful for forcing fresh data downloads or freeing disk space.
"""from datetime import datetime
from meteostat import Point, Daily
# Create daily time series
location = Point(52.5200, 13.4050) # Berlin
start = datetime(2020, 1, 1)
end = datetime(2020, 12, 31)
data = Daily(location, start, end)
# Check data quality
print(f"Total observations: {data.count()}")
coverage_stats = data.coverage()
print("Data coverage by parameter:")
print(coverage_stats)
# Fetch the data
daily_data = data.fetch()
print(f"Retrieved {len(daily_data)} daily records")from datetime import datetime
from meteostat import Point, Hourly
# Get hourly data that may have gaps
location = Point(41.8781, -87.6298) # Chicago
start = datetime(2020, 1, 15)
end = datetime(2020, 1, 20)
data = Hourly(location, start, end)
# Check for missing values before processing
raw_data = data.fetch()
missing_before = raw_data.isnull().sum()
print("Missing values before interpolation:")
print(missing_before)
# Interpolate missing values (max 3 consecutive hours)
data = data.interpolate(limit=3)
interpolated_data = data.fetch()
missing_after = interpolated_data.isnull().sum()
print("Missing values after interpolation:")
print(missing_after)from datetime import datetime
from meteostat import Point, Hourly
# Start with hourly data
location = Point(40.7128, -74.0060) # New York
start = datetime(2020, 6, 1)
end = datetime(2020, 8, 31)
hourly_data = Hourly(location, start, end)
# Aggregate to daily values
daily_agg = hourly_data.aggregate('D')
daily_data = daily_agg.fetch()
print(f"Aggregated to {len(daily_data)} daily records")
# Aggregate to weekly values
weekly_agg = hourly_data.aggregate('W')
weekly_data = weekly_agg.fetch()
print(f"Aggregated to {len(weekly_data)} weekly records")
# Aggregate to monthly values
monthly_agg = hourly_data.aggregate('MS') # Month start
monthly_data = monthly_agg.fetch()
print(f"Aggregated to {len(monthly_data)} monthly records")from datetime import datetime
from meteostat import Stations, Daily
# Get data from multiple stations in a region
stations = Stations().region('DE').nearby(52.5200, 13.4050, 100000).fetch(5)
# Create time series for multiple stations
start = datetime(2020, 1, 1)
end = datetime(2020, 12, 31)
data = Daily(stations, start, end)
# Regular aggregation (keeps station dimension)
monthly_data = data.aggregate('MS')
station_monthly = monthly_data.fetch()
print(f"Monthly data with stations: {station_monthly.shape}")
# Spatial aggregation (averages across stations)
regional_monthly = data.aggregate('MS', spatial=True)
regional_data = regional_monthly.fetch()
print(f"Regional monthly averages: {regional_data.shape}")from datetime import datetime
from meteostat import Point, Daily, units
# Get daily data
location = Point(39.7392, -104.9903) # Denver
start = datetime(2020, 1, 1)
end = datetime(2020, 12, 31)
data = Daily(location, start, end)
# Convert to Imperial units
imperial_data = data.convert({
'tavg': units.fahrenheit,
'tmin': units.fahrenheit,
'tmax': units.fahrenheit,
'prcp': units.inches
})
imperial_df = imperial_data.fetch()
print("Temperature in Fahrenheit, precipitation in inches:")
print(imperial_df[['tavg', 'tmin', 'tmax', 'prcp']].head())
# Convert to scientific units
scientific_data = data.convert({
'tavg': units.kelvin,
'tmin': units.kelvin,
'tmax': units.kelvin,
'wspd': units.ms # m/s instead of km/h
})
scientific_df = scientific_data.fetch()
print("Temperature in Kelvin, wind speed in m/s:")
print(scientific_df[['tavg', 'wspd']].head())from meteostat import Point, Daily
# Define custom conversion functions
def celsius_to_rankine(temp_c):
"""Convert Celsius to Rankine"""
return (temp_c + 273.15) * 9/5
def mm_to_feet(mm):
"""Convert millimeters to feet"""
return mm / 304.8
# Apply custom conversions
location = Point(25.7617, -80.1918) # Miami
data = Daily(location, datetime(2020, 1, 1), datetime(2020, 3, 31))
converted_data = data.convert({
'tavg': celsius_to_rankine,
'prcp': mm_to_feet
})
custom_df = converted_data.fetch()
print("Custom unit conversions:")
print(custom_df[['tavg', 'prcp']].head())Time series classes use appropriate aggregation functions when aggregating to coarser temporal resolutions:
# Default aggregation functions for different parameters
aggregation_methods = {
# Temperature - use mean values
'temp': 'mean',
'tavg': 'mean',
'tmin': 'min', # For daily aggregation: minimum of period
'tmax': 'max', # For daily aggregation: maximum of period
'dwpt': 'mean',
# Precipitation - sum over period
'prcp': 'sum',
'snow': 'max', # Maximum snow depth
# Wind - directional mean for direction, average for speed
'wdir': 'degree_mean', # Special circular mean
'wspd': 'mean',
'wpgt': 'max', # Maximum gust
# Pressure and other continuous variables
'pres': 'mean',
'rhum': 'mean',
# Sunshine and condition codes
'tsun': 'sum', # Total sunshine duration
'coco': 'max' # Worst condition code
}# Assess data completeness
coverage = data.coverage()
high_quality = coverage[coverage > 0.8] # >80% coverage
print(f"Parameters with good coverage: {list(high_quality.index)}")# Conservative interpolation for critical applications
conservative_data = data.interpolate(limit=1) # Only fill single gaps
# More aggressive gap-filling for visualization
visualization_data = data.interpolate(limit=6) # Fill up to 6-hour gaps# Check for unrealistic temporal jumps
df = data.fetch()
temp_diff = df['temp'].diff().abs()
outliers = temp_diff[temp_diff > 10] # >10°C hourly change
print(f"Potential temperature outliers: {len(outliers)}")Install with Tessl CLI
npx tessl i tessl/pypi-meteostat