Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b274e1e
function, docs, tests
kandersolar Oct 17, 2025
d3bd426
make api key secret accessible to tests
kandersolar Oct 17, 2025
0b5ba09
bit more docs
kandersolar Oct 17, 2025
396833b
handle another API error
kandersolar Oct 17, 2025
749fd5e
lint
kandersolar Oct 17, 2025
6828769
Merge branch 'main' into get_era5
kandersolar Oct 17, 2025
8c1ab6d
fix test
kandersolar Oct 17, 2025
fd7f06d
Merge branch 'get_era5' of https://github.com/kandersolar/pvlib-pytho…
kandersolar Oct 17, 2025
8f4da88
fix tests, again
kandersolar Oct 17, 2025
9906815
one more
kandersolar Oct 17, 2025
ee7474d
use Timeout instead of Exception
kandersolar Oct 20, 2025
f34309e
Apply suggestions from code review
kandersolar Oct 20, 2025
3c8f2f2
rename from ECMWF to ERA5
kandersolar Oct 20, 2025
a0aa2c8
Merge branch 'get_era5' of https://github.com/kandersolar/pvlib-pytho…
kandersolar Oct 20, 2025
ac6fe82
and fix tests
kandersolar Oct 20, 2025
6568cf6
make unit conversion funcs private
kandersolar Oct 21, 2025
2313c80
Apply suggestions from code review
kandersolar Oct 28, 2025
5d9735c
Merge branch 'get_era5' of https://github.com/kandersolar/pvlib-pytho…
kandersolar Oct 28, 2025
53b90ed
convert input times to UTC if not localized
kandersolar Oct 28, 2025
668b9e2
lint
kandersolar Oct 28, 2025
6065b77
fix test bug
kandersolar Oct 28, 2025
42af726
Merge remote-tracking branch 'upstream/main' into get_era5
kandersolar Oct 28, 2025
91a73c3
Merge remote-tracking branch 'upstream/main' into get_era5
kandersolar Nov 3, 2025
6c625e8
Merge branch 'main' into get_era5
kandersolar Nov 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/sphinx/source/reference/iotools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,17 @@ lower quality.
iotools.read_crn


ECMWF ERA5
^^^^^^^^^^

A global reanalysis dataset providing weather and solar resource data.

.. autosummary::
:toctree: generated/

iotools.get_era5


Generic data file readers
-------------------------

Expand Down
3 changes: 2 additions & 1 deletion docs/sphinx/source/whatsnew/v0.13.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ Enhancements
:py:func:`~pvlib.singlediode.bishop88_mpp`,
:py:func:`~pvlib.singlediode.bishop88_v_from_i`, and
:py:func:`~pvlib.singlediode.bishop88_i_from_v`. (:issue:`2497`, :pull:`2498`)

* Add :py:func:`~pvlib.iotools.get_era5`, a function for accessing
ERA-5 reanalysis data. (:pull:`2573`)


Documentation
Expand Down
1 change: 1 addition & 0 deletions pvlib/iotools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@
from pvlib.iotools.meteonorm import get_meteonorm_observation_training # noqa: F401, E501
from pvlib.iotools.meteonorm import get_meteonorm_tmy # noqa: F401
from pvlib.iotools.nasa_power import get_nasa_power # noqa: F401
from pvlib.iotools.ecmwf import get_era5 # noqa: F401
195 changes: 195 additions & 0 deletions pvlib/iotools/ecmwf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
import requests
import pandas as pd
from io import BytesIO, StringIO
import zipfile
import time


VARIABLE_MAP = {
# short names
'd2m': 'temp_dew',
't2m': 'temp_air',
'sp': 'pressure',
'ssrd': 'ghi',
'tp': 'precipitation',

# long names
'2m_dewpoint_temperature': 'temp_dew',
'2m_temperature': 'temp_air',
'surface_pressure': 'pressure',
'surface_solar_radiation_downwards': 'ghi',
'total_precipitation': 'precipitation',
}


def same(x):
return x


def k_to_c(temp_k):
return temp_k - 273.15


def j_to_w(j):
return j / 3600


def m_to_cm(m):
return m / 100


UNITS = {
'u100': same,
'v100': same,
'u10': same,
'v10': same,
'd2m': k_to_c,
't2m': k_to_c,
'msl': same,
'sst': k_to_c,
'skt': k_to_c,
'sp': same,
'ssrd': j_to_w,
'strd': j_to_w,
'tp': m_to_cm,
}


def get_era5(latitude, longitude, start, end, variables, api_key,
map_variables=True, timeout=60,
url='https://cds.climate.copernicus.eu/api/retrieve/v1/'):
"""
Retrieve ERA5 reanalysis data from the ECMWF's Copernicus Data Store.
A CDS API key is needed to access this API. Register for one at [1]_.
This API [2]_ provides a subset of the full ERA5 dataset. See [3]_ for
the available variables. Data are available on a 0.25° x 0.25° grid.
Parameters
----------
latitude : float
In decimal degrees, north is positive (ISO 19115).
longitude: float
In decimal degrees, east is positive (ISO 19115).
start : datetime like or str
First day of the requested period.
end : datetime like or str
Last day of the requested period.
variables : list of str
List of variable names to retrieve. See [1]_ for options.
api_key : str
ECMWF CDS API key.
map_variables : bool, default True
When true, renames columns of the DataFrame to pvlib variable names
where applicable. See variable :const:`VARIABLE_MAP`.
timeout : int, default 60
Number of seconds to wait for the requested data to become available
before timeout.
url : str, optional
API endpoint URL.
Raises
------
Exception
If ``timeout`` is reached without the job finishing.
Returns
-------
data : pd.DataFrame
Time series data. The index corresponds to the start of the interval.
meta : dict
Metadata.
References
----------
.. [1] https://cds.climate.copernicus.eu/
.. [2] https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-timeseries?tab=overview
.. [3] https://confluence.ecmwf.int/pages/viewpage.action?pageId=505390919
""" # noqa: E501
start = pd.to_datetime(start).strftime("%Y-%m-%d")
end = pd.to_datetime(end).strftime("%Y-%m-%d")

headers = {'PRIVATE-TOKEN': api_key}

# allow variables to be specified with pvlib names
reverse_map = {v: k for k, v in VARIABLE_MAP.items()}
variables = [reverse_map.get(k, k) for k in variables]

# Step 1: submit data request (add it to the queue)
params = {
"inputs": {
"variable": variables,
"location": {"longitude": longitude, "latitude": latitude},
"date": [f"{start}/{end}"],
"data_format": "csv"
}
}
slug = "processes/reanalysis-era5-single-levels-timeseries/execution"
response = requests.post(url + slug, json=params, headers=headers,
timeout=timeout)
submission_response = response.json()
if not response.ok:
raise Exception(submission_response) # likely need to accept license

job_id = submission_response['jobID']

# Step 2: poll until the data request is ready
slug = "jobs/" + job_id
poll_interval = 1
num_polls = 0
while True:
response = requests.get(url + slug, headers=headers, timeout=timeout)
poll_response = response.json()
job_status = poll_response['status']

if job_status == 'successful':
break # ready to proceed to next step
elif job_status == 'failed':
msg = (
'Request failed. Please check the ECMWF website for details: '
'https://cds.climate.copernicus.eu/requests?tab=all'
)
raise Exception(msg)

num_polls += 1
if num_polls * poll_interval > timeout:
raise Exception(
'Request timed out. Try increasing the timeout parameter or '
'reducing the request size.'
)

time.sleep(1)

# Step 3: get the download link for our requested dataset
slug = "jobs/" + job_id + "/results"
response = requests.get(url + slug, headers=headers, timeout=timeout)
results_response = response.json()
download_url = results_response['asset']['value']['href']

# Step 4: finally, download our dataset. it's a zipfile of one CSV
response = requests.get(download_url, timeout=timeout)
zipbuffer = BytesIO(response.content)
archive = zipfile.ZipFile(zipbuffer)
filename = archive.filelist[0].filename
csvbuffer = StringIO(archive.read(filename).decode('utf-8'))
df = pd.read_csv(csvbuffer)

# and parse into the usual formats
metadata = submission_response['metadata'] # include messages from ECMWF
metadata['jobID'] = job_id
if not df.empty:
metadata['latitude'] = df['latitude'].values[0]
metadata['longitude'] = df['longitude'].values[0]

df.index = pd.to_datetime(df['valid_time']).dt.tz_localize('UTC')
df = df.drop(columns=['valid_time', 'latitude', 'longitude'])

if map_variables:
# convert units and rename
for shortname in df.columns:
converter = UNITS[shortname]
df[shortname] = converter(df[shortname])
df = df.rename(columns=VARIABLE_MAP)

return df, metadata
13 changes: 13 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,19 @@ def nrel_api_key():
reason='requires solaranywhere credentials')


try:
# Attempt to load ECMWF API key used for testing
# pvlib.iotools.get_era5
ecwmf_api_key = os.environ["ECMWF_API_KEY"]
has_ecmwf_credentials = True
except KeyError:
has_ecmwf_credentials = False

requires_ecmwf_credentials = pytest.mark.skipif(
not has_ecmwf_credentials,
reason='requires ECMWF credentials')


try:
import statsmodels # noqa: F401
has_statsmodels = True
Expand Down
80 changes: 80 additions & 0 deletions tests/iotools/test_ecmwf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
tests for pvlib/iotools/ecmwf.py
"""

import pandas as pd
import pytest
import pvlib
import os
from tests.conftest import RERUNS, RERUNS_DELAY, requires_ecmwf_credentials


@pytest.fixture
def params():
api_key = os.environ["ECMWF_API_KEY"]

return {
'latitude': 40.01, 'longitude': -80.01,
'start': '2020-06-01', 'end': '2020-06-01',
'variables': ['ghi', 'temp_air'],
'api_key': api_key,
}


@pytest.fixture
def expected():
index = pd.date_range("2020-06-01 00:00", "2020-06-01 23:59", freq="h",
tz="UTC")
index.name = 'valid_time'
temp_air = [16.6, 15.2, 13.5, 11.2, 10.8, 9.1, 7.3, 6.8, 7.6, 7.4, 8.5,
8.1, 9.8, 11.5, 14.1, 17.4, 18.3, 20., 20.7, 20.9, 21.5,
21.6, 21., 20.7]
ghi = [153., 18.4, 0., 0., 0., 0., 0., 0., 0., 0., 0., 60., 229.5,
427.8, 620.1, 785.5, 910.1, 984.2, 1005.9, 962.4, 844.1, 685.2,
526.9, 331.4]
df = pd.DataFrame({'temp_air': temp_air, 'ghi': ghi}, index=index)
return df


@requires_ecmwf_credentials
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_era5(params, expected):
df, meta = pvlib.iotools.get_era5(**params)
pd.testing.assert_frame_equal(df, expected, check_freq=False, atol=0.1)
assert meta['longitude'] == -80.0
assert meta['latitude'] == 40.0
assert isinstance(meta['jobID'], str)


@requires_ecmwf_credentials
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_era5_map_variables(params, expected):
df, meta = pvlib.iotools.get_era5(**params, map_variables=False)
expected = expected.rename(columns={'temp_air': 't2m', 'ghi': 'ssrd'})
expected['t2m'] -= 273.15 # apply unit conversions manually
expected['ssrd'] /= 3600
pd.testing.assert_frame_equal(df, expected, check_freq=False, atol=0.1)
assert meta['longitude'] == -80.0
assert meta['latitude'] == 40.0
assert isinstance(meta['jobID'], str)


@requires_ecmwf_credentials
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_era5_error(params):
params['variables'] = ['nonexistent']
match = 'Request failed. Please check the ECMWF website'
with pytest.raises(Exception, match=match):
df, meta = pvlib.iotools.get_era5(**params)


@requires_ecmwf_credentials
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_era5_timeout(params):
match = 'Request timed out. Try increasing'
with pytest.raises(Exception, match=match):
df, meta = pvlib.iotools.get_era5(**params, timeout=1)
Loading