0
# Weather Data I/O
1
2
Access and read weather data from multiple sources including TMY files, NSRDB, PVGIS, SURFRAD, and other meteorological databases. Comprehensive tools for retrieving and parsing weather data for photovoltaic modeling.
3
4
## Capabilities
5
6
### TMY Data Sources
7
8
Read and retrieve Typical Meteorological Year data from various formats.
9
10
```python { .api }
11
def read_tmy2(filename):
12
"""
13
Read TMY2 weather data files.
14
15
Parameters:
16
- filename: str, path to TMY2 file
17
18
Returns:
19
tuple: (data, metadata) where data is DataFrame with weather data
20
and metadata is dict with station information
21
"""
22
23
def read_tmy3(filename, coerce_year=None, map_variables=True, encoding=None):
24
"""
25
Read TMY3 weather data files.
26
27
Parameters:
28
- filename: str, path to TMY3 file
29
- coerce_year: int, force all data to specific year
30
- map_variables: bool, map to standard pvlib names
31
- encoding: str, file encoding
32
33
Returns:
34
tuple: (data, metadata) where data is DataFrame with weather data
35
"""
36
37
def read_epw(filename, coerce_year=None):
38
"""
39
Read EnergyPlus Weather (EPW) files.
40
41
Parameters:
42
- filename: str, path to EPW file
43
- coerce_year: int, force all data to specific year
44
45
Returns:
46
tuple: (data, metadata) where data is DataFrame with hourly weather data
47
"""
48
49
def parse_epw(filename, coerce_year=None):
50
"""
51
Parse EnergyPlus Weather files with detailed error handling.
52
53
Parameters:
54
- filename: str, path to EPW file
55
- coerce_year: int, force all data to specific year
56
57
Returns:
58
tuple: (data, metadata) with parsed weather data
59
"""
60
```
61
62
### NSRDB (National Solar Radiation Database)
63
64
Access high-quality satellite-derived solar irradiance data.
65
66
```python { .api }
67
def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
68
leap_day=False, full_name='pvlib python', affiliation='pvlib python',
69
reason='pvlib python', mailing_list=False, utc=True,
70
map_variables=True, attributes=(), timeout=30):
71
"""
72
Get NSRDB PSM3 data via API.
73
74
Parameters:
75
- latitude: numeric, latitude in degrees
76
- longitude: numeric, longitude in degrees
77
- api_key: str, NREL API key
78
- email: str, email address for API access
79
- names: str or list, data years ('tmy', year, or list of years)
80
- interval: int, time interval in minutes (30 or 60)
81
- leap_day: bool, include leap day in data
82
- utc: bool, return timestamps in UTC
83
- map_variables: bool, map to standard pvlib names
84
- attributes: list, additional data attributes to retrieve
85
- timeout: int, request timeout in seconds
86
87
Returns:
88
tuple: (data, metadata) with solar irradiance and meteorological data
89
"""
90
91
def read_psm3(filename, map_variables=True):
92
"""
93
Read PSM3 files downloaded from NSRDB.
94
95
Parameters:
96
- filename: str, path to PSM3 CSV file
97
- map_variables: bool, map to standard pvlib names
98
99
Returns:
100
tuple: (data, metadata) with parsed PSM3 data
101
"""
102
103
def get_nsrdb_psm4_tmy(latitude, longitude, api_key, email, year='tmy',
104
attributes=['ghi', 'dni', 'dhi', 'temp_air', 'wind_speed'],
105
names=None, map_variables=True, leap_day=False,
106
interval=60, full_name='pvlib python',
107
affiliation='pvlib python', reason='pvlib python',
108
mailing_list=False, timeout=30):
109
"""
110
Get NSRDB PSM4 TMY data.
111
112
Parameters:
113
- latitude: numeric, latitude in degrees
114
- longitude: numeric, longitude in degrees
115
- api_key: str, NREL API key
116
- email: str, email address
117
- year: str or int, TMY year or specific year
118
- attributes: list, weather variables to retrieve
119
- map_variables: bool, map to standard pvlib names
120
- leap_day: bool, include leap day
121
- interval: int, time interval in minutes
122
- timeout: int, request timeout in seconds
123
124
Returns:
125
tuple: (data, metadata) with PSM4 TMY data
126
"""
127
```
128
129
### PVGIS Data Access
130
131
Retrieve data from the European Commission's Photovoltaic Geographical Information System.
132
133
```python { .api }
134
def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True,
135
userhorizon=None, startyear=None, endyear=None,
136
url='https://re.jrc.ec.europa.eu/api/v5_2/',
137
map_variables=True, timeout=30):
138
"""
139
Get PVGIS TMY data via API.
140
141
Parameters:
142
- latitude: numeric, latitude in degrees (-90 to 90)
143
- longitude: numeric, longitude in degrees (-180 to 180)
144
- outputformat: str, output format ('json', 'csv', 'basic')
145
- usehorizon: bool, consider horizon shading
146
- userhorizon: list, user-defined horizon profile
147
- startyear: int, start year for TMY calculation
148
- endyear: int, end year for TMY calculation
149
- url: str, PVGIS API base URL
150
- map_variables: bool, map to standard pvlib names
151
- timeout: int, request timeout in seconds
152
153
Returns:
154
tuple: (data, metadata, inputs) with TMY data and metadata
155
"""
156
157
def get_pvgis_hourly(latitude, longitude, start=None, end=None,
158
raddatabase=None, components=True, surface_tilt=0,
159
surface_azimuth=180, outputformat='json',
160
usehorizon=True, userhorizon=None,
161
pvcalculation=False, peakpower=None,
162
pvtechchoice='crystSi', mountingplace='free',
163
loss=0, trackingtype=0, tilt=None, azim=None,
164
url='https://re.jrc.ec.europa.eu/api/v5_2/',
165
map_variables=True, timeout=30):
166
"""
167
Get PVGIS hourly data via API.
168
169
Parameters:
170
- latitude: numeric, latitude in degrees
171
- longitude: numeric, longitude in degrees
172
- start: datetime-like, start date (YYYY or YYYY-MM-DD)
173
- end: datetime-like, end date (YYYY or YYYY-MM-DD)
174
- raddatabase: str, radiation database ('PVGIS-SARAH2', 'PVGIS-NSRDB', etc.)
175
- components: bool, include irradiance components
176
- surface_tilt: numeric, surface tilt angle in degrees
177
- surface_azimuth: numeric, surface azimuth in degrees
178
- outputformat: str, output format ('json', 'csv')
179
- usehorizon: bool, consider horizon shading
180
- pvcalculation: bool, include PV power calculation
181
- peakpower: numeric, PV system peak power in kW
182
- pvtechchoice: str, PV technology choice
183
- mountingplace: str, mounting type ('free', 'building')
184
- loss: numeric, system loss percentage
185
- timeout: int, request timeout in seconds
186
187
Returns:
188
tuple: (data, metadata, inputs) with hourly data
189
"""
190
191
def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
192
"""
193
Read PVGIS TMY files.
194
195
Parameters:
196
- filename: str, path to PVGIS file
197
- pvgis_format: str, file format ('json', 'csv', 'basic')
198
- map_variables: bool, map to standard pvlib names
199
200
Returns:
201
tuple: (data, metadata, inputs) with parsed PVGIS data
202
"""
203
204
def get_pvgis_horizon(latitude, longitude, url='https://re.jrc.ec.europa.eu/api/v5_2/', **kwargs):
205
"""
206
Get horizon profile from PVGIS.
207
208
Parameters:
209
- latitude: numeric, latitude in degrees
210
- longitude: numeric, longitude in degrees
211
- url: str, PVGIS API base URL
212
213
Returns:
214
pandas.DataFrame with horizon elevation angles
215
"""
216
```
217
218
### SURFRAD and SRML Networks
219
220
Access ground-based radiation measurement networks.
221
222
```python { .api }
223
def read_surfrad(filename, map_variables=True):
224
"""
225
Read SURFRAD (Surface Radiation) data files.
226
227
Parameters:
228
- filename: str, path to SURFRAD file
229
- map_variables: bool, map to standard pvlib names
230
231
Returns:
232
pandas.DataFrame with SURFRAD measurements
233
"""
234
235
def read_srml(filename, map_variables=True):
236
"""
237
Read SRML (Solar Radiation Monitoring Laboratory) files.
238
239
Parameters:
240
- filename: str, path to SRML file
241
- map_variables: bool, map to standard pvlib names
242
243
Returns:
244
pandas.DataFrame with SRML measurements
245
"""
246
247
def get_srml(station, start, end, filetype='PO', map_variables=True,
248
url='http://solardat.uoregon.edu/SolarRadiationBasics.php'):
249
"""
250
Get SRML data via web scraping.
251
252
Parameters:
253
- station: str, SRML station identifier
254
- start: datetime-like, start date
255
- end: datetime-like, end date
256
- filetype: str, file type ('PO' for processed)
257
- map_variables: bool, map to standard pvlib names
258
- url: str, SRML base URL
259
260
Returns:
261
pandas.DataFrame with SRML data
262
"""
263
```
264
265
### MIDC and BSRN Networks
266
267
Access additional radiation measurement networks.
268
269
```python { .api }
270
def read_midc(filename, variable_map={}, raw_data=False, **kwargs):
271
"""
272
Read MIDC (Measurement and Instrumentation Data Center) files.
273
274
Parameters:
275
- filename: str, path to MIDC file
276
- variable_map: dict, custom variable name mapping
277
- raw_data: bool, return raw data without processing
278
279
Returns:
280
pandas.DataFrame with MIDC measurements
281
"""
282
283
def read_midc_raw_data_from_nrel(site, start, end, variable_map={}, **kwargs):
284
"""
285
Get MIDC data directly from NREL servers.
286
287
Parameters:
288
- site: str, MIDC site identifier
289
- start: datetime-like, start date
290
- end: datetime-like, end date
291
- variable_map: dict, custom variable mapping
292
293
Returns:
294
pandas.DataFrame with MIDC data
295
"""
296
297
def get_bsrn(station, start, end, username, password, url=None, **kwargs):
298
"""
299
Get BSRN (Baseline Surface Radiation Network) data.
300
301
Parameters:
302
- station: str, BSRN station identifier
303
- start: datetime-like, start date
304
- end: datetime-like, end date
305
- username: str, BSRN account username
306
- password: str, BSRN account password
307
- url: str, BSRN data server URL
308
309
Returns:
310
pandas.DataFrame with BSRN measurements
311
"""
312
313
def read_bsrn(filename, logical_records=('0100',)):
314
"""
315
Read BSRN data files.
316
317
Parameters:
318
- filename: str, path to BSRN file
319
- logical_records: tuple, logical record types to read
320
321
Returns:
322
pandas.DataFrame with BSRN data
323
"""
324
```
325
326
### CAMS and SolarAnywhere
327
328
Access satellite-based and commercial data sources.
329
330
```python { .api }
331
def get_cams(latitude, longitude, start, end, email, identifier='mcclear',
332
time_step='PT01H', time_reference='UT', verbose=False,
333
map_variables=True, timeout=30):
334
"""
335
Get CAMS (Copernicus Atmosphere Monitoring Service) data.
336
337
Parameters:
338
- latitude: numeric, latitude in degrees
339
- longitude: numeric, longitude in degrees
340
- start: datetime-like, start date (YYYY-MM-DD)
341
- end: datetime-like, end date (YYYY-MM-DD)
342
- email: str, email for data request
343
- identifier: str, data service identifier
344
- time_step: str, time resolution ('PT01H', 'PT15M')
345
- time_reference: str, time reference ('UT', 'TST')
346
- verbose: bool, print request details
347
- map_variables: bool, map to standard pvlib names
348
- timeout: int, request timeout in seconds
349
350
Returns:
351
pandas.DataFrame with CAMS irradiance data
352
"""
353
354
def read_cams(filename, integrated=False, label=None, map_variables=True):
355
"""
356
Read CAMS data files.
357
358
Parameters:
359
- filename: str, path to CAMS file
360
- integrated: bool, data is time-integrated
361
- label: str, data label for multi-file datasets
362
- map_variables: bool, map to standard pvlib names
363
364
Returns:
365
pandas.DataFrame with parsed CAMS data
366
"""
367
368
def get_solaranywhere(latitude, longitude, api_key, start=None, end=None,
369
time_zone='UTC', spatial_resolution='1km',
370
temporal_resolution='15min', irradiance_type='beam+diffuse',
371
weather_data_format='csv', url=None, **kwargs):
372
"""
373
Get SolarAnywhere satellite irradiance data.
374
375
Parameters:
376
- latitude: numeric, latitude in degrees
377
- longitude: numeric, longitude in degrees
378
- api_key: str, SolarAnywhere API key
379
- start: datetime-like, start date
380
- end: datetime-like, end date
381
- time_zone: str, time zone identifier
382
- spatial_resolution: str, spatial resolution ('1km', '10km')
383
- temporal_resolution: str, temporal resolution ('15min', 'hour')
384
- irradiance_type: str, irradiance components to retrieve
385
- weather_data_format: str, output format ('csv', 'json')
386
387
Returns:
388
pandas.DataFrame with SolarAnywhere data
389
"""
390
391
def read_solaranywhere(filename, map_variables=True, encoding='iso-8859-1'):
392
"""
393
Read SolarAnywhere data files.
394
395
Parameters:
396
- filename: str, path to SolarAnywhere file
397
- map_variables: bool, map to standard pvlib names
398
- encoding: str, file encoding
399
400
Returns:
401
pandas.DataFrame with SolarAnywhere data
402
"""
403
```
404
405
### Climate Data Sources
406
407
Access precipitation and climate data for soiling and snow modeling.
408
409
```python { .api }
410
def read_crn(filename, map_variables=True):
411
"""
412
Read Climate Reference Network (CRN) files.
413
414
Parameters:
415
- filename: str, path to CRN file
416
- map_variables: bool, map to standard pvlib names
417
418
Returns:
419
pandas.DataFrame with CRN climate data
420
"""
421
422
def get_acis_prism(latitude, longitude, start, end, map_variables=True, **kwargs):
423
"""
424
Get ACIS PRISM precipitation data.
425
426
Parameters:
427
- latitude: numeric, latitude in degrees
428
- longitude: numeric, longitude in degrees
429
- start: datetime-like, start date
430
- end: datetime-like, end date
431
- map_variables: bool, map to standard pvlib names
432
433
Returns:
434
pandas.DataFrame with precipitation data
435
"""
436
437
def get_acis_station_data(station, start, end, trace_val=0.001,
438
map_variables=True, **kwargs):
439
"""
440
Get ACIS station-specific data.
441
442
Parameters:
443
- station: str, station identifier
444
- start: datetime-like, start date
445
- end: datetime-like, end date
446
- trace_val: numeric, value for trace precipitation
447
- map_variables: bool, map to standard pvlib names
448
449
Returns:
450
pandas.DataFrame with station weather data
451
"""
452
453
def get_acis_available_stations(latitude_range, longitude_range,
454
start_date=None, end_date=None, **kwargs):
455
"""
456
Get list of available ACIS weather stations.
457
458
Parameters:
459
- latitude_range: tuple, (min_lat, max_lat)
460
- longitude_range: tuple, (min_lon, max_lon)
461
- start_date: datetime-like, earliest data date
462
- end_date: datetime-like, latest data date
463
464
Returns:
465
list of dict with station information
466
"""
467
```
468
469
## Usage Examples
470
471
### Loading TMY Data
472
473
```python
474
import pvlib
475
from pvlib import iotools
476
import pandas as pd
477
478
# Read TMY3 file
479
data, metadata = iotools.read_tmy3('path/to/tmy3_file.csv')
480
print(f"Location: {metadata['Name']}, {metadata['State']}")
481
print(f"Latitude: {metadata['Latitude']:.2f}, Longitude: {metadata['Longitude']:.2f}")
482
print(f"Elevation: {metadata['Elevation']:.1f} m")
483
484
# Display first few rows
485
print(data.head())
486
487
# Read EnergyPlus Weather file
488
epw_data, epw_meta = iotools.read_epw('weather_file.epw')
489
print(f"Data shape: {epw_data.shape}")
490
print(f"Available columns: {epw_data.columns.tolist()}")
491
```
492
493
### Accessing NSRDB Data
494
495
```python
496
import pvlib
497
from pvlib import iotools
498
import matplotlib.pyplot as plt
499
500
# Get PSM3 TMY data from NSRDB
501
api_key = 'your_nrel_api_key'
502
email = 'your_email@domain.com'
503
lat, lon = 39.7555, -105.2211 # Golden, CO
504
505
# Retrieve TMY data
506
data, metadata = iotools.get_psm3(
507
latitude=lat,
508
longitude=lon,
509
api_key=api_key,
510
email=email,
511
names='tmy',
512
attributes=['ghi', 'dni', 'dhi', 'temp_air', 'wind_speed'],
513
map_variables=True
514
)
515
516
print(f"TMY data retrieved for {metadata['Location ID']}")
517
print(f"Available variables: {data.columns.tolist()}")
518
519
# Plot annual irradiance profile
520
fig, axes = plt.subplots(2, 1, figsize=(12, 8))
521
522
# Daily totals
523
daily_ghi = data['ghi'].resample('D').sum() / 1000 # kWh/m²/day
524
daily_ghi.plot(ax=axes[0], title='Daily Global Horizontal Irradiance')
525
axes[0].set_ylabel('GHI (kWh/m²/day)')
526
527
# Monthly averages
528
monthly_dni = data['dni'].resample('M').mean()
529
monthly_dhi = data['dhi'].resample('M').mean()
530
monthly_ghi = data['ghi'].resample('M').mean()
531
532
axes[1].plot(monthly_ghi.index.month, monthly_ghi, label='GHI', marker='o')
533
axes[1].plot(monthly_dni.index.month, monthly_dni, label='DNI', marker='s')
534
axes[1].plot(monthly_dhi.index.month, monthly_dhi, label='DHI', marker='^')
535
axes[1].set_xlabel('Month')
536
axes[1].set_ylabel('Irradiance (W/m²)')
537
axes[1].legend()
538
axes[1].set_title('Monthly Average Irradiance Components')
539
540
plt.tight_layout()
541
plt.show()
542
```
543
544
### Retrieving PVGIS Data
545
546
```python
547
import pvlib
548
from pvlib import iotools
549
import numpy as np
550
import pandas as pd
551
552
# Location in Europe (Munich, Germany)
553
lat, lon = 48.1351, 11.5820
554
555
# Get PVGIS TMY data
556
pvgis_data, pvgis_meta, pvgis_inputs = iotools.get_pvgis_tmy(
557
latitude=lat,
558
longitude=lon,
559
usehorizon=True,
560
outputformat='json'
561
)
562
563
print(f"PVGIS TMY data for coordinates: {lat:.2f}, {lon:.2f}")
564
print(f"Data period: {pvgis_inputs['meteo_data']['year_min']} - {pvgis_inputs['meteo_data']['year_max']}")
565
print(f"Available variables: {pvgis_data.columns.tolist()}")
566
567
# Get hourly data for specific period
568
start_date = 2020
569
end_date = 2020
570
571
hourly_data, hourly_meta, hourly_inputs = iotools.get_pvgis_hourly(
572
latitude=lat,
573
longitude=lon,
574
start=start_date,
575
end=end_date,
576
raddatabase='PVGIS-SARAH2',
577
components=True,
578
surface_tilt=30,
579
surface_azimuth=180
580
)
581
582
# Compare TMY vs actual year
583
comparison_months = []
584
for month in range(1, 13):
585
tmy_month = pvgis_data[pvgis_data.index.month == month]['ghi'].mean()
586
actual_month = hourly_data[hourly_data.index.month == month]['ghi'].mean()
587
588
comparison_months.append({
589
'month': month,
590
'tmy_ghi': tmy_month,
591
'actual_ghi': actual_month,
592
'difference': actual_month - tmy_month
593
})
594
595
comparison_df = pd.DataFrame(comparison_months)
596
print("\nTMY vs 2020 Comparison (Monthly Average GHI):")
597
print(comparison_df.round(2))
598
599
# Get horizon data
600
horizon = iotools.get_pvgis_horizon(lat, lon)
601
print(f"\nHorizon profile with {len(horizon)} data points")
602
print(f"Max horizon elevation: {horizon['horizon_elevation'].max():.1f}°")
603
```
604
605
### Working with Multiple Data Sources
606
607
```python
608
import pvlib
609
from pvlib import iotools
610
import pandas as pd
611
import matplotlib.pyplot as plt
612
613
# Compare data from multiple sources for same location
614
lat, lon = 36.0544, -112.1401 # Grand Canyon, AZ
615
year = 2020
616
617
# Source 1: NSRDB PSM3
618
nsrdb_data, nsrdb_meta = iotools.get_psm3(
619
lat, lon, api_key='your_key', email='your_email',
620
names=year, map_variables=True
621
)
622
623
# Source 2: PVGIS (if available for location)
624
try:
625
pvgis_data, pvgis_meta, pvgis_inputs = iotools.get_pvgis_hourly(
626
lat, lon, start=year, end=year
627
)
628
pvgis_available = True
629
except:
630
pvgis_available = False
631
print("PVGIS data not available for this location")
632
633
# Source 3: SolarAnywhere (requires API key)
634
try:
635
sa_data = iotools.get_solaranywhere(
636
lat, lon,
637
api_key='your_solaranywhere_key',
638
start=f'{year}-01-01',
639
end=f'{year}-12-31'
640
)
641
sa_available = True
642
except:
643
sa_available = False
644
print("SolarAnywhere data not available")
645
646
# Compare monthly statistics
647
monthly_stats = []
648
649
for month in range(1, 13):
650
stats = {'month': month}
651
652
# NSRDB statistics
653
nsrdb_month = nsrdb_data[nsrdb_data.index.month == month]
654
stats['nsrdb_ghi_avg'] = nsrdb_month['ghi'].mean()
655
stats['nsrdb_dni_avg'] = nsrdb_month['dni'].mean()
656
stats['nsrdb_temp_avg'] = nsrdb_month['temp_air'].mean()
657
658
# PVGIS statistics (if available)
659
if pvgis_available:
660
pvgis_month = pvgis_data[pvgis_data.index.month == month]
661
stats['pvgis_ghi_avg'] = pvgis_month['ghi'].mean()
662
stats['pvgis_temp_avg'] = pvgis_month['temp_air'].mean()
663
664
monthly_stats.append(stats)
665
666
stats_df = pd.DataFrame(monthly_stats)
667
print("\nMonthly Comparison Between Data Sources:")
668
print(stats_df.round(2))
669
670
# Plot comparison
671
if pvgis_available:
672
fig, axes = plt.subplots(2, 1, figsize=(12, 8))
673
674
axes[0].plot(stats_df['month'], stats_df['nsrdb_ghi_avg'],
675
'o-', label='NSRDB PSM3', linewidth=2)
676
axes[0].plot(stats_df['month'], stats_df['pvgis_ghi_avg'],
677
's-', label='PVGIS', linewidth=2)
678
axes[0].set_ylabel('GHI (W/m²)')
679
axes[0].set_title('Monthly Average Global Horizontal Irradiance')
680
axes[0].legend()
681
axes[0].grid(True)
682
683
axes[1].plot(stats_df['month'], stats_df['nsrdb_temp_avg'],
684
'o-', label='NSRDB', linewidth=2)
685
axes[1].plot(stats_df['month'], stats_df['pvgis_temp_avg'],
686
's-', label='PVGIS', linewidth=2)
687
axes[1].set_xlabel('Month')
688
axes[1].set_ylabel('Temperature (°C)')
689
axes[1].set_title('Monthly Average Air Temperature')
690
axes[1].legend()
691
axes[1].grid(True)
692
693
plt.tight_layout()
694
plt.show()
695
```
696
697
### Processing Downloaded Files
698
699
```python
700
import pvlib
701
from pvlib import iotools
702
import glob
703
import pandas as pd
704
705
# Process multiple TMY files in a directory
706
tmy_files = glob.glob('weather_data/*.csv')
707
weather_summary = []
708
709
for file in tmy_files:
710
try:
711
# Try reading as TMY3 first
712
data, metadata = iotools.read_tmy3(file)
713
714
# Calculate annual statistics
715
annual_stats = {
716
'file': file,
717
'location': metadata.get('Name', 'Unknown'),
718
'latitude': metadata.get('Latitude', None),
719
'longitude': metadata.get('Longitude', None),
720
'annual_ghi': data['ghi'].sum() / 1000, # kWh/m²/year
721
'annual_dni': data['dni'].sum() / 1000,
722
'avg_temp': data['temp_air'].mean(),
723
'max_temp': data['temp_air'].max(),
724
'min_temp': data['temp_air'].min(),
725
'avg_wind': data['wind_speed'].mean(),
726
'max_wind': data['wind_speed'].max()
727
}
728
weather_summary.append(annual_stats)
729
730
except Exception as e:
731
print(f"Error reading {file}: {e}")
732
733
# Create summary dataframe
734
summary_df = pd.DataFrame(weather_summary)
735
summary_df = summary_df.round(2)
736
737
print("Weather Data Summary:")
738
print(summary_df.to_string(index=False))
739
740
# Find locations with highest solar resource
741
top_solar = summary_df.nlargest(3, 'annual_ghi')
742
print("\nTop 3 locations by annual GHI:")
743
print(top_solar[['location', 'latitude', 'longitude', 'annual_ghi']])
744
```
745
746
### Error Handling and Data Validation
747
748
```python
749
import pvlib
750
from pvlib import iotools
751
import pandas as pd
752
import numpy as np
753
754
def validate_weather_data(data, location_name="Unknown"):
755
"""
756
Validate weather data for common issues.
757
"""
758
print(f"\nValidating weather data for: {location_name}")
759
760
issues = []
761
762
# Check for missing data
763
missing_data = data.isnull().sum()
764
if missing_data.sum() > 0:
765
issues.append(f"Missing data found: {missing_data[missing_data > 0].to_dict()}")
766
767
# Check for negative irradiance
768
if 'ghi' in data.columns:
769
negative_ghi = (data['ghi'] < 0).sum()
770
if negative_ghi > 0:
771
issues.append(f"Negative GHI values: {negative_ghi}")
772
773
# Check for unrealistic temperatures
774
if 'temp_air' in data.columns:
775
extreme_temps = ((data['temp_air'] < -50) | (data['temp_air'] > 60)).sum()
776
if extreme_temps > 0:
777
issues.append(f"Extreme temperature values: {extreme_temps}")
778
779
# Check for unrealistic wind speeds
780
if 'wind_speed' in data.columns:
781
high_wind = (data['wind_speed'] > 50).sum() # > 50 m/s is very rare
782
if high_wind > 0:
783
issues.append(f"Unusually high wind speeds: {high_wind}")
784
785
# Check time series continuity
786
if isinstance(data.index, pd.DatetimeIndex):
787
time_gaps = pd.Series(data.index).diff().dropna()
788
expected_freq = time_gaps.mode()[0] if len(time_gaps) > 0 else pd.Timedelta(hours=1)
789
large_gaps = (time_gaps > expected_freq * 1.5).sum()
790
if large_gaps > 0:
791
issues.append(f"Time series gaps found: {large_gaps}")
792
793
if issues:
794
print("Issues found:")
795
for issue in issues:
796
print(f" - {issue}")
797
else:
798
print("✓ No major issues detected")
799
800
return len(issues) == 0
801
802
# Example usage with error handling
803
def robust_data_loading(source, **kwargs):
804
"""
805
Robustly load weather data with fallback options.
806
"""
807
try:
808
if source == 'nsrdb':
809
data, metadata = iotools.get_psm3(**kwargs)
810
location_name = metadata.get('Location ID', 'NSRDB Location')
811
812
elif source == 'pvgis':
813
data, metadata, inputs = iotools.get_pvgis_tmy(**kwargs)
814
location_name = f"PVGIS ({kwargs.get('latitude')}, {kwargs.get('longitude')})"
815
816
elif source == 'tmy3':
817
data, metadata = iotools.read_tmy3(**kwargs)
818
location_name = metadata.get('Name', 'TMY3 Location')
819
820
else:
821
raise ValueError(f"Unknown source: {source}")
822
823
# Validate the loaded data
824
is_valid = validate_weather_data(data, location_name)
825
826
if not is_valid:
827
print(f"Warning: Data quality issues detected for {location_name}")
828
829
return data, metadata, is_valid
830
831
except Exception as e:
832
print(f"Error loading data from {source}: {e}")
833
return None, None, False
834
835
# Test robust loading
836
lat, lon = 40.0150, -105.2705 # Boulder, CO
837
838
# Try multiple sources with fallback
839
for source in ['nsrdb', 'pvgis', 'tmy3']:
840
if source == 'nsrdb':
841
kwargs = {
842
'latitude': lat, 'longitude': lon,
843
'api_key': 'DEMO_KEY', 'email': 'test@example.com',
844
'names': 'tmy'
845
}
846
elif source == 'pvgis':
847
kwargs = {'latitude': lat, 'longitude': lon}
848
else:
849
kwargs = {'filename': 'local_tmy_file.csv'}
850
851
data, metadata, valid = robust_data_loading(source, **kwargs)
852
853
if data is not None and valid:
854
print(f"✓ Successfully loaded valid data from {source}")
855
break
856
else:
857
print(f"✗ Failed to load valid data from {source}")
858
```