or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

atmosphere.mdbifacial.mdclearsky.mdiam.mdindex.mdinverter.mdiotools.mdirradiance.mdlosses.mdpvsystem.mdsolar-position.mdspectrum.mdtemperature.md

iotools.mddocs/

0

# Weather Data I/O

1

2

Access and read weather data from multiple sources including TMY files, NSRDB, PVGIS, SURFRAD, and other meteorological databases. Comprehensive tools for retrieving and parsing weather data for photovoltaic modeling.

3

4

## Capabilities

5

6

### TMY Data Sources

7

8

Read and retrieve Typical Meteorological Year data from various formats.

9

10

```python { .api }

11

def read_tmy2(filename):

12

"""

13

Read TMY2 weather data files.

14

15

Parameters:

16

- filename: str, path to TMY2 file

17

18

Returns:

19

tuple: (data, metadata) where data is DataFrame with weather data

20

and metadata is dict with station information

21

"""

22

23

def read_tmy3(filename, coerce_year=None, map_variables=True, encoding=None):

24

"""

25

Read TMY3 weather data files.

26

27

Parameters:

28

- filename: str, path to TMY3 file

29

- coerce_year: int, force all data to specific year

30

- map_variables: bool, map to standard pvlib names

31

- encoding: str, file encoding

32

33

Returns:

34

tuple: (data, metadata) where data is DataFrame with weather data

35

"""

36

37

def read_epw(filename, coerce_year=None):

38

"""

39

Read EnergyPlus Weather (EPW) files.

40

41

Parameters:

42

- filename: str, path to EPW file

43

- coerce_year: int, force all data to specific year

44

45

Returns:

46

tuple: (data, metadata) where data is DataFrame with hourly weather data

47

"""

48

49

def parse_epw(filename, coerce_year=None):

50

"""

51

Parse EnergyPlus Weather files with detailed error handling.

52

53

Parameters:

54

- filename: str, path to EPW file

55

- coerce_year: int, force all data to specific year

56

57

Returns:

58

tuple: (data, metadata) with parsed weather data

59

"""

60

```

61

62

### NSRDB (National Solar Radiation Database)

63

64

Access high-quality satellite-derived solar irradiance data.

65

66

```python { .api }

67

def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,

68

leap_day=False, full_name='pvlib python', affiliation='pvlib python',

69

reason='pvlib python', mailing_list=False, utc=True,

70

map_variables=True, attributes=(), timeout=30):

71

"""

72

Get NSRDB PSM3 data via API.

73

74

Parameters:

75

- latitude: numeric, latitude in degrees

76

- longitude: numeric, longitude in degrees

77

- api_key: str, NREL API key

78

- email: str, email address for API access

79

- names: str or list, data years ('tmy', year, or list of years)

80

- interval: int, time interval in minutes (30 or 60)

81

- leap_day: bool, include leap day in data

82

- utc: bool, return timestamps in UTC

83

- map_variables: bool, map to standard pvlib names

84

- attributes: list, additional data attributes to retrieve

85

- timeout: int, request timeout in seconds

86

87

Returns:

88

tuple: (data, metadata) with solar irradiance and meteorological data

89

"""

90

91

def read_psm3(filename, map_variables=True):

92

"""

93

Read PSM3 files downloaded from NSRDB.

94

95

Parameters:

96

- filename: str, path to PSM3 CSV file

97

- map_variables: bool, map to standard pvlib names

98

99

Returns:

100

tuple: (data, metadata) with parsed PSM3 data

101

"""

102

103

def get_nsrdb_psm4_tmy(latitude, longitude, api_key, email, year='tmy',

104

attributes=['ghi', 'dni', 'dhi', 'temp_air', 'wind_speed'],

105

names=None, map_variables=True, leap_day=False,

106

interval=60, full_name='pvlib python',

107

affiliation='pvlib python', reason='pvlib python',

108

mailing_list=False, timeout=30):

109

"""

110

Get NSRDB PSM4 TMY data.

111

112

Parameters:

113

- latitude: numeric, latitude in degrees

114

- longitude: numeric, longitude in degrees

115

- api_key: str, NREL API key

116

- email: str, email address

117

- year: str or int, TMY year or specific year

118

- attributes: list, weather variables to retrieve

119

- map_variables: bool, map to standard pvlib names

120

- leap_day: bool, include leap day

121

- interval: int, time interval in minutes

122

- timeout: int, request timeout in seconds

123

124

Returns:

125

tuple: (data, metadata) with PSM4 TMY data

126

"""

127

```

128

129

### PVGIS Data Access

130

131

Retrieve data from the European Commission's Photovoltaic Geographical Information System.

132

133

```python { .api }

134

def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True,

135

userhorizon=None, startyear=None, endyear=None,

136

url='https://re.jrc.ec.europa.eu/api/v5_2/',

137

map_variables=True, timeout=30):

138

"""

139

Get PVGIS TMY data via API.

140

141

Parameters:

142

- latitude: numeric, latitude in degrees (-90 to 90)

143

- longitude: numeric, longitude in degrees (-180 to 180)

144

- outputformat: str, output format ('json', 'csv', 'basic')

145

- usehorizon: bool, consider horizon shading

146

- userhorizon: list, user-defined horizon profile

147

- startyear: int, start year for TMY calculation

148

- endyear: int, end year for TMY calculation

149

- url: str, PVGIS API base URL

150

- map_variables: bool, map to standard pvlib names

151

- timeout: int, request timeout in seconds

152

153

Returns:

154

tuple: (data, metadata, inputs) with TMY data and metadata

155

"""

156

157

def get_pvgis_hourly(latitude, longitude, start=None, end=None,

158

raddatabase=None, components=True, surface_tilt=0,

159

surface_azimuth=180, outputformat='json',

160

usehorizon=True, userhorizon=None,

161

pvcalculation=False, peakpower=None,

162

pvtechchoice='crystSi', mountingplace='free',

163

loss=0, trackingtype=0, tilt=None, azim=None,

164

url='https://re.jrc.ec.europa.eu/api/v5_2/',

165

map_variables=True, timeout=30):

166

"""

167

Get PVGIS hourly data via API.

168

169

Parameters:

170

- latitude: numeric, latitude in degrees

171

- longitude: numeric, longitude in degrees

172

- start: datetime-like, start date (YYYY or YYYY-MM-DD)

173

- end: datetime-like, end date (YYYY or YYYY-MM-DD)

174

- raddatabase: str, radiation database ('PVGIS-SARAH2', 'PVGIS-NSRDB', etc.)

175

- components: bool, include irradiance components

176

- surface_tilt: numeric, surface tilt angle in degrees

177

- surface_azimuth: numeric, surface azimuth in degrees

178

- outputformat: str, output format ('json', 'csv')

179

- usehorizon: bool, consider horizon shading

180

- pvcalculation: bool, include PV power calculation

181

- peakpower: numeric, PV system peak power in kW

182

- pvtechchoice: str, PV technology choice

183

- mountingplace: str, mounting type ('free', 'building')

184

- loss: numeric, system loss percentage

185

- timeout: int, request timeout in seconds

186

187

Returns:

188

tuple: (data, metadata, inputs) with hourly data

189

"""

190

191

def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):

192

"""

193

Read PVGIS TMY files.

194

195

Parameters:

196

- filename: str, path to PVGIS file

197

- pvgis_format: str, file format ('json', 'csv', 'basic')

198

- map_variables: bool, map to standard pvlib names

199

200

Returns:

201

tuple: (data, metadata, inputs) with parsed PVGIS data

202

"""

203

204

def get_pvgis_horizon(latitude, longitude, url='https://re.jrc.ec.europa.eu/api/v5_2/', **kwargs):

205

"""

206

Get horizon profile from PVGIS.

207

208

Parameters:

209

- latitude: numeric, latitude in degrees

210

- longitude: numeric, longitude in degrees

211

- url: str, PVGIS API base URL

212

213

Returns:

214

pandas.DataFrame with horizon elevation angles

215

"""

216

```

217

218

### SURFRAD and SRML Networks

219

220

Access ground-based radiation measurement networks.

221

222

```python { .api }

223

def read_surfrad(filename, map_variables=True):

224

"""

225

Read SURFRAD (Surface Radiation) data files.

226

227

Parameters:

228

- filename: str, path to SURFRAD file

229

- map_variables: bool, map to standard pvlib names

230

231

Returns:

232

pandas.DataFrame with SURFRAD measurements

233

"""

234

235

def read_srml(filename, map_variables=True):

236

"""

237

Read SRML (Solar Radiation Monitoring Laboratory) files.

238

239

Parameters:

240

- filename: str, path to SRML file

241

- map_variables: bool, map to standard pvlib names

242

243

Returns:

244

pandas.DataFrame with SRML measurements

245

"""

246

247

def get_srml(station, start, end, filetype='PO', map_variables=True,

248

url='http://solardat.uoregon.edu/SolarRadiationBasics.php'):

249

"""

250

Get SRML data via web scraping.

251

252

Parameters:

253

- station: str, SRML station identifier

254

- start: datetime-like, start date

255

- end: datetime-like, end date

256

- filetype: str, file type ('PO' for processed)

257

- map_variables: bool, map to standard pvlib names

258

- url: str, SRML base URL

259

260

Returns:

261

pandas.DataFrame with SRML data

262

"""

263

```

264

265

### MIDC and BSRN Networks

266

267

Access additional radiation measurement networks.

268

269

```python { .api }

270

def read_midc(filename, variable_map={}, raw_data=False, **kwargs):

271

"""

272

Read MIDC (Measurement and Instrumentation Data Center) files.

273

274

Parameters:

275

- filename: str, path to MIDC file

276

- variable_map: dict, custom variable name mapping

277

- raw_data: bool, return raw data without processing

278

279

Returns:

280

pandas.DataFrame with MIDC measurements

281

"""

282

283

def read_midc_raw_data_from_nrel(site, start, end, variable_map={}, **kwargs):

284

"""

285

Get MIDC data directly from NREL servers.

286

287

Parameters:

288

- site: str, MIDC site identifier

289

- start: datetime-like, start date

290

- end: datetime-like, end date

291

- variable_map: dict, custom variable mapping

292

293

Returns:

294

pandas.DataFrame with MIDC data

295

"""

296

297

def get_bsrn(station, start, end, username, password, url=None, **kwargs):

298

"""

299

Get BSRN (Baseline Surface Radiation Network) data.

300

301

Parameters:

302

- station: str, BSRN station identifier

303

- start: datetime-like, start date

304

- end: datetime-like, end date

305

- username: str, BSRN account username

306

- password: str, BSRN account password

307

- url: str, BSRN data server URL

308

309

Returns:

310

pandas.DataFrame with BSRN measurements

311

"""

312

313

def read_bsrn(filename, logical_records=('0100',)):

314

"""

315

Read BSRN data files.

316

317

Parameters:

318

- filename: str, path to BSRN file

319

- logical_records: tuple, logical record types to read

320

321

Returns:

322

pandas.DataFrame with BSRN data

323

"""

324

```

325

326

### CAMS and SolarAnywhere

327

328

Access satellite-based and commercial data sources.

329

330

```python { .api }

331

def get_cams(latitude, longitude, start, end, email, identifier='mcclear',

332

time_step='PT01H', time_reference='UT', verbose=False,

333

map_variables=True, timeout=30):

334

"""

335

Get CAMS (Copernicus Atmosphere Monitoring Service) data.

336

337

Parameters:

338

- latitude: numeric, latitude in degrees

339

- longitude: numeric, longitude in degrees

340

- start: datetime-like, start date (YYYY-MM-DD)

341

- end: datetime-like, end date (YYYY-MM-DD)

342

- email: str, email for data request

343

- identifier: str, data service identifier

344

- time_step: str, time resolution ('PT01H', 'PT15M')

345

- time_reference: str, time reference ('UT', 'TST')

346

- verbose: bool, print request details

347

- map_variables: bool, map to standard pvlib names

348

- timeout: int, request timeout in seconds

349

350

Returns:

351

pandas.DataFrame with CAMS irradiance data

352

"""

353

354

def read_cams(filename, integrated=False, label=None, map_variables=True):

355

"""

356

Read CAMS data files.

357

358

Parameters:

359

- filename: str, path to CAMS file

360

- integrated: bool, data is time-integrated

361

- label: str, data label for multi-file datasets

362

- map_variables: bool, map to standard pvlib names

363

364

Returns:

365

pandas.DataFrame with parsed CAMS data

366

"""

367

368

def get_solaranywhere(latitude, longitude, api_key, start=None, end=None,

369

time_zone='UTC', spatial_resolution='1km',

370

temporal_resolution='15min', irradiance_type='beam+diffuse',

371

weather_data_format='csv', url=None, **kwargs):

372

"""

373

Get SolarAnywhere satellite irradiance data.

374

375

Parameters:

376

- latitude: numeric, latitude in degrees

377

- longitude: numeric, longitude in degrees

378

- api_key: str, SolarAnywhere API key

379

- start: datetime-like, start date

380

- end: datetime-like, end date

381

- time_zone: str, time zone identifier

382

- spatial_resolution: str, spatial resolution ('1km', '10km')

383

- temporal_resolution: str, temporal resolution ('15min', 'hour')

384

- irradiance_type: str, irradiance components to retrieve

385

- weather_data_format: str, output format ('csv', 'json')

386

387

Returns:

388

pandas.DataFrame with SolarAnywhere data

389

"""

390

391

def read_solaranywhere(filename, map_variables=True, encoding='iso-8859-1'):

392

"""

393

Read SolarAnywhere data files.

394

395

Parameters:

396

- filename: str, path to SolarAnywhere file

397

- map_variables: bool, map to standard pvlib names

398

- encoding: str, file encoding

399

400

Returns:

401

pandas.DataFrame with SolarAnywhere data

402

"""

403

```

404

405

### Climate Data Sources

406

407

Access precipitation and climate data for soiling and snow modeling.

408

409

```python { .api }

410

def read_crn(filename, map_variables=True):

411

"""

412

Read Climate Reference Network (CRN) files.

413

414

Parameters:

415

- filename: str, path to CRN file

416

- map_variables: bool, map to standard pvlib names

417

418

Returns:

419

pandas.DataFrame with CRN climate data

420

"""

421

422

def get_acis_prism(latitude, longitude, start, end, map_variables=True, **kwargs):

423

"""

424

Get ACIS PRISM precipitation data.

425

426

Parameters:

427

- latitude: numeric, latitude in degrees

428

- longitude: numeric, longitude in degrees

429

- start: datetime-like, start date

430

- end: datetime-like, end date

431

- map_variables: bool, map to standard pvlib names

432

433

Returns:

434

pandas.DataFrame with precipitation data

435

"""

436

437

def get_acis_station_data(station, start, end, trace_val=0.001,

438

map_variables=True, **kwargs):

439

"""

440

Get ACIS station-specific data.

441

442

Parameters:

443

- station: str, station identifier

444

- start: datetime-like, start date

445

- end: datetime-like, end date

446

- trace_val: numeric, value for trace precipitation

447

- map_variables: bool, map to standard pvlib names

448

449

Returns:

450

pandas.DataFrame with station weather data

451

"""

452

453

def get_acis_available_stations(latitude_range, longitude_range,

454

start_date=None, end_date=None, **kwargs):

455

"""

456

Get list of available ACIS weather stations.

457

458

Parameters:

459

- latitude_range: tuple, (min_lat, max_lat)

460

- longitude_range: tuple, (min_lon, max_lon)

461

- start_date: datetime-like, earliest data date

462

- end_date: datetime-like, latest data date

463

464

Returns:

465

list of dict with station information

466

"""

467

```

468

469

## Usage Examples

470

471

### Loading TMY Data

472

473

```python

474

import pvlib

475

from pvlib import iotools

476

import pandas as pd

477

478

# Read TMY3 file

479

data, metadata = iotools.read_tmy3('path/to/tmy3_file.csv')

480

print(f"Location: {metadata['Name']}, {metadata['State']}")

481

print(f"Latitude: {metadata['Latitude']:.2f}, Longitude: {metadata['Longitude']:.2f}")

482

print(f"Elevation: {metadata['Elevation']:.1f} m")

483

484

# Display first few rows

485

print(data.head())

486

487

# Read EnergyPlus Weather file

488

epw_data, epw_meta = iotools.read_epw('weather_file.epw')

489

print(f"Data shape: {epw_data.shape}")

490

print(f"Available columns: {epw_data.columns.tolist()}")

491

```

492

493

### Accessing NSRDB Data

494

495

```python

496

import pvlib

497

from pvlib import iotools

498

import matplotlib.pyplot as plt

499

500

# Get PSM3 TMY data from NSRDB

501

api_key = 'your_nrel_api_key'

502

email = 'your_email@domain.com'

503

lat, lon = 39.7555, -105.2211 # Golden, CO

504

505

# Retrieve TMY data

506

data, metadata = iotools.get_psm3(

507

latitude=lat,

508

longitude=lon,

509

api_key=api_key,

510

email=email,

511

names='tmy',

512

attributes=['ghi', 'dni', 'dhi', 'temp_air', 'wind_speed'],

513

map_variables=True

514

)

515

516

print(f"TMY data retrieved for {metadata['Location ID']}")

517

print(f"Available variables: {data.columns.tolist()}")

518

519

# Plot annual irradiance profile

520

fig, axes = plt.subplots(2, 1, figsize=(12, 8))

521

522

# Daily totals

523

daily_ghi = data['ghi'].resample('D').sum() / 1000 # kWh/m²/day

524

daily_ghi.plot(ax=axes[0], title='Daily Global Horizontal Irradiance')

525

axes[0].set_ylabel('GHI (kWh/m²/day)')

526

527

# Monthly averages

528

monthly_dni = data['dni'].resample('M').mean()

529

monthly_dhi = data['dhi'].resample('M').mean()

530

monthly_ghi = data['ghi'].resample('M').mean()

531

532

axes[1].plot(monthly_ghi.index.month, monthly_ghi, label='GHI', marker='o')

533

axes[1].plot(monthly_dni.index.month, monthly_dni, label='DNI', marker='s')

534

axes[1].plot(monthly_dhi.index.month, monthly_dhi, label='DHI', marker='^')

535

axes[1].set_xlabel('Month')

536

axes[1].set_ylabel('Irradiance (W/m²)')

537

axes[1].legend()

538

axes[1].set_title('Monthly Average Irradiance Components')

539

540

plt.tight_layout()

541

plt.show()

542

```

543

544

### Retrieving PVGIS Data

545

546

```python

547

import pvlib

548

from pvlib import iotools

549

import numpy as np

550

import pandas as pd

551

552

# Location in Europe (Munich, Germany)

553

lat, lon = 48.1351, 11.5820

554

555

# Get PVGIS TMY data

556

pvgis_data, pvgis_meta, pvgis_inputs = iotools.get_pvgis_tmy(

557

latitude=lat,

558

longitude=lon,

559

usehorizon=True,

560

outputformat='json'

561

)

562

563

print(f"PVGIS TMY data for coordinates: {lat:.2f}, {lon:.2f}")

564

print(f"Data period: {pvgis_inputs['meteo_data']['year_min']} - {pvgis_inputs['meteo_data']['year_max']}")

565

print(f"Available variables: {pvgis_data.columns.tolist()}")

566

567

# Get hourly data for specific period

568

start_date = 2020

569

end_date = 2020

570

571

hourly_data, hourly_meta, hourly_inputs = iotools.get_pvgis_hourly(

572

latitude=lat,

573

longitude=lon,

574

start=start_date,

575

end=end_date,

576

raddatabase='PVGIS-SARAH2',

577

components=True,

578

surface_tilt=30,

579

surface_azimuth=180

580

)

581

582

# Compare TMY vs actual year

583

comparison_months = []

584

for month in range(1, 13):

585

tmy_month = pvgis_data[pvgis_data.index.month == month]['ghi'].mean()

586

actual_month = hourly_data[hourly_data.index.month == month]['ghi'].mean()

587

588

comparison_months.append({

589

'month': month,

590

'tmy_ghi': tmy_month,

591

'actual_ghi': actual_month,

592

'difference': actual_month - tmy_month

593

})

594

595

comparison_df = pd.DataFrame(comparison_months)

596

print("\nTMY vs 2020 Comparison (Monthly Average GHI):")

597

print(comparison_df.round(2))

598

599

# Get horizon data

600

horizon = iotools.get_pvgis_horizon(lat, lon)

601

print(f"\nHorizon profile with {len(horizon)} data points")

602

print(f"Max horizon elevation: {horizon['horizon_elevation'].max():.1f}°")

603

```

604

605

### Working with Multiple Data Sources

606

607

```python

608

import pvlib

609

from pvlib import iotools

610

import pandas as pd

611

import matplotlib.pyplot as plt

612

613

# Compare data from multiple sources for same location

614

lat, lon = 36.0544, -112.1401 # Grand Canyon, AZ

615

year = 2020

616

617

# Source 1: NSRDB PSM3

618

nsrdb_data, nsrdb_meta = iotools.get_psm3(

619

lat, lon, api_key='your_key', email='your_email',

620

names=year, map_variables=True

621

)

622

623

# Source 2: PVGIS (if available for location)

624

try:

625

pvgis_data, pvgis_meta, pvgis_inputs = iotools.get_pvgis_hourly(

626

lat, lon, start=year, end=year

627

)

628

pvgis_available = True

629

except:

630

pvgis_available = False

631

print("PVGIS data not available for this location")

632

633

# Source 3: SolarAnywhere (requires API key)

634

try:

635

sa_data = iotools.get_solaranywhere(

636

lat, lon,

637

api_key='your_solaranywhere_key',

638

start=f'{year}-01-01',

639

end=f'{year}-12-31'

640

)

641

sa_available = True

642

except:

643

sa_available = False

644

print("SolarAnywhere data not available")

645

646

# Compare monthly statistics

647

monthly_stats = []

648

649

for month in range(1, 13):

650

stats = {'month': month}

651

652

# NSRDB statistics

653

nsrdb_month = nsrdb_data[nsrdb_data.index.month == month]

654

stats['nsrdb_ghi_avg'] = nsrdb_month['ghi'].mean()

655

stats['nsrdb_dni_avg'] = nsrdb_month['dni'].mean()

656

stats['nsrdb_temp_avg'] = nsrdb_month['temp_air'].mean()

657

658

# PVGIS statistics (if available)

659

if pvgis_available:

660

pvgis_month = pvgis_data[pvgis_data.index.month == month]

661

stats['pvgis_ghi_avg'] = pvgis_month['ghi'].mean()

662

stats['pvgis_temp_avg'] = pvgis_month['temp_air'].mean()

663

664

monthly_stats.append(stats)

665

666

stats_df = pd.DataFrame(monthly_stats)

667

print("\nMonthly Comparison Between Data Sources:")

668

print(stats_df.round(2))

669

670

# Plot comparison

671

if pvgis_available:

672

fig, axes = plt.subplots(2, 1, figsize=(12, 8))

673

674

axes[0].plot(stats_df['month'], stats_df['nsrdb_ghi_avg'],

675

'o-', label='NSRDB PSM3', linewidth=2)

676

axes[0].plot(stats_df['month'], stats_df['pvgis_ghi_avg'],

677

's-', label='PVGIS', linewidth=2)

678

axes[0].set_ylabel('GHI (W/m²)')

679

axes[0].set_title('Monthly Average Global Horizontal Irradiance')

680

axes[0].legend()

681

axes[0].grid(True)

682

683

axes[1].plot(stats_df['month'], stats_df['nsrdb_temp_avg'],

684

'o-', label='NSRDB', linewidth=2)

685

axes[1].plot(stats_df['month'], stats_df['pvgis_temp_avg'],

686

's-', label='PVGIS', linewidth=2)

687

axes[1].set_xlabel('Month')

688

axes[1].set_ylabel('Temperature (°C)')

689

axes[1].set_title('Monthly Average Air Temperature')

690

axes[1].legend()

691

axes[1].grid(True)

692

693

plt.tight_layout()

694

plt.show()

695

```

696

697

### Processing Downloaded Files

698

699

```python

700

import pvlib

701

from pvlib import iotools

702

import glob

703

import pandas as pd

704

705

# Process multiple TMY files in a directory

706

tmy_files = glob.glob('weather_data/*.csv')

707

weather_summary = []

708

709

for file in tmy_files:

710

try:

711

# Try reading as TMY3 first

712

data, metadata = iotools.read_tmy3(file)

713

714

# Calculate annual statistics

715

annual_stats = {

716

'file': file,

717

'location': metadata.get('Name', 'Unknown'),

718

'latitude': metadata.get('Latitude', None),

719

'longitude': metadata.get('Longitude', None),

720

'annual_ghi': data['ghi'].sum() / 1000, # kWh/m²/year

721

'annual_dni': data['dni'].sum() / 1000,

722

'avg_temp': data['temp_air'].mean(),

723

'max_temp': data['temp_air'].max(),

724

'min_temp': data['temp_air'].min(),

725

'avg_wind': data['wind_speed'].mean(),

726

'max_wind': data['wind_speed'].max()

727

}

728

weather_summary.append(annual_stats)

729

730

except Exception as e:

731

print(f"Error reading {file}: {e}")

732

733

# Create summary dataframe

734

summary_df = pd.DataFrame(weather_summary)

735

summary_df = summary_df.round(2)

736

737

print("Weather Data Summary:")

738

print(summary_df.to_string(index=False))

739

740

# Find locations with highest solar resource

741

top_solar = summary_df.nlargest(3, 'annual_ghi')

742

print("\nTop 3 locations by annual GHI:")

743

print(top_solar[['location', 'latitude', 'longitude', 'annual_ghi']])

744

```

745

746

### Error Handling and Data Validation

747

748

```python

749

import pvlib

750

from pvlib import iotools

751

import pandas as pd

752

import numpy as np

753

754

def validate_weather_data(data, location_name="Unknown"):

755

"""

756

Validate weather data for common issues.

757

"""

758

print(f"\nValidating weather data for: {location_name}")

759

760

issues = []

761

762

# Check for missing data

763

missing_data = data.isnull().sum()

764

if missing_data.sum() > 0:

765

issues.append(f"Missing data found: {missing_data[missing_data > 0].to_dict()}")

766

767

# Check for negative irradiance

768

if 'ghi' in data.columns:

769

negative_ghi = (data['ghi'] < 0).sum()

770

if negative_ghi > 0:

771

issues.append(f"Negative GHI values: {negative_ghi}")

772

773

# Check for unrealistic temperatures

774

if 'temp_air' in data.columns:

775

extreme_temps = ((data['temp_air'] < -50) | (data['temp_air'] > 60)).sum()

776

if extreme_temps > 0:

777

issues.append(f"Extreme temperature values: {extreme_temps}")

778

779

# Check for unrealistic wind speeds

780

if 'wind_speed' in data.columns:

781

high_wind = (data['wind_speed'] > 50).sum() # > 50 m/s is very rare

782

if high_wind > 0:

783

issues.append(f"Unusually high wind speeds: {high_wind}")

784

785

# Check time series continuity

786

if isinstance(data.index, pd.DatetimeIndex):

787

time_gaps = pd.Series(data.index).diff().dropna()

788

expected_freq = time_gaps.mode()[0] if len(time_gaps) > 0 else pd.Timedelta(hours=1)

789

large_gaps = (time_gaps > expected_freq * 1.5).sum()

790

if large_gaps > 0:

791

issues.append(f"Time series gaps found: {large_gaps}")

792

793

if issues:

794

print("Issues found:")

795

for issue in issues:

796

print(f" - {issue}")

797

else:

798

print("✓ No major issues detected")

799

800

return len(issues) == 0

801

802

# Example usage with error handling

803

def robust_data_loading(source, **kwargs):

804

"""

805

Robustly load weather data with fallback options.

806

"""

807

try:

808

if source == 'nsrdb':

809

data, metadata = iotools.get_psm3(**kwargs)

810

location_name = metadata.get('Location ID', 'NSRDB Location')

811

812

elif source == 'pvgis':

813

data, metadata, inputs = iotools.get_pvgis_tmy(**kwargs)

814

location_name = f"PVGIS ({kwargs.get('latitude')}, {kwargs.get('longitude')})"

815

816

elif source == 'tmy3':

817

data, metadata = iotools.read_tmy3(**kwargs)

818

location_name = metadata.get('Name', 'TMY3 Location')

819

820

else:

821

raise ValueError(f"Unknown source: {source}")

822

823

# Validate the loaded data

824

is_valid = validate_weather_data(data, location_name)

825

826

if not is_valid:

827

print(f"Warning: Data quality issues detected for {location_name}")

828

829

return data, metadata, is_valid

830

831

except Exception as e:

832

print(f"Error loading data from {source}: {e}")

833

return None, None, False

834

835

# Test robust loading

836

lat, lon = 40.0150, -105.2705 # Boulder, CO

837

838

# Try multiple sources with fallback

839

for source in ['nsrdb', 'pvgis', 'tmy3']:

840

if source == 'nsrdb':

841

kwargs = {

842

'latitude': lat, 'longitude': lon,

843

'api_key': 'DEMO_KEY', 'email': 'test@example.com',

844

'names': 'tmy'

845

}

846

elif source == 'pvgis':

847

kwargs = {'latitude': lat, 'longitude': lon}

848

else:

849

kwargs = {'filename': 'local_tmy_file.csv'}

850

851

data, metadata, valid = robust_data_loading(source, **kwargs)

852

853

if data is not None and valid:

854

print(f"✓ Successfully loaded valid data from {source}")

855

break

856

else:

857

print(f"✗ Failed to load valid data from {source}")

858

```