diff --git a/.gitignore b/.gitignore
index a6338f3..01188d9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,5 @@ logs/*
.DS_Store
sandbox
slurm*
-data
\ No newline at end of file
+data
+bld
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..135524a
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+ "python.terminal.activateEnvInCurrentTerminal": true,
+ "quarto.path": "",
+ "quarto.usePipQuarto": false
+}
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
index 3b106e8..f987f3d 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,201 +1,395 @@
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright 2022, fastai
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
+Attribution 4.0 International
+
+=======================================================================
+
+Creative Commons Corporation ("Creative Commons") is not a law firm and
+does not provide legal services or legal advice. Distribution of
+Creative Commons public licenses does not create a lawyer-client or
+other relationship. Creative Commons makes its licenses and related
+information available on an "as-is" basis. Creative Commons gives no
+warranties regarding its licenses, any material licensed under their
+terms and conditions, or any related information. Creative Commons
+disclaims all liability for damages resulting from their use to the
+fullest extent possible.
+
+Using Creative Commons Public Licenses
+
+Creative Commons public licenses provide a standard set of terms and
+conditions that creators and other rights holders may use to share
+original works of authorship and other material subject to copyright
+and certain other rights specified in the public license below. The
+following considerations are for informational purposes only, are not
+exhaustive, and do not form part of our licenses.
+
+ Considerations for licensors: Our public licenses are
+ intended for use by those authorized to give the public
+ permission to use material in ways otherwise restricted by
+ copyright and certain other rights. Our licenses are
+ irrevocable. Licensors should read and understand the terms
+ and conditions of the license they choose before applying it.
+ Licensors should also secure all rights necessary before
+ applying our licenses so that the public can reuse the
+ material as expected. Licensors should clearly mark any
+ material not subject to the license. This includes other CC-
+ licensed material, or material used under an exception or
+ limitation to copyright. More considerations for licensors:
+ wiki.creativecommons.org/Considerations_for_licensors
+
+ Considerations for the public: By using one of our public
+ licenses, a licensor grants the public permission to use the
+ licensed material under specified terms and conditions. If
+ the licensor's permission is not necessary for any reason--for
+ example, because of any applicable exception or limitation to
+ copyright--then that use is not regulated by the license. Our
+ licenses grant only permissions under copyright and certain
+ other rights that a licensor has authority to grant. Use of
+ the licensed material may still be restricted for other
+ reasons, including because others have copyright or other
+ rights in the material. A licensor may make special requests,
+ such as asking that all changes be marked or described.
+ Although not required by our licenses, you are encouraged to
+ respect those requests where reasonable. More considerations
+ for the public:
+ wiki.creativecommons.org/Considerations_for_licensees
+
+=======================================================================
+
+Creative Commons Attribution 4.0 International Public License
+
+By exercising the Licensed Rights (defined below), You accept and agree
+to be bound by the terms and conditions of this Creative Commons
+Attribution 4.0 International Public License ("Public License"). To the
+extent this Public License may be interpreted as a contract, You are
+granted the Licensed Rights in consideration of Your acceptance of
+these terms and conditions, and the Licensor grants You such rights in
+consideration of benefits the Licensor receives from making the
+Licensed Material available under these terms and conditions.
+
+
+Section 1 -- Definitions.
+
+ a. Adapted Material means material subject to Copyright and Similar
+ Rights that is derived from or based upon the Licensed Material
+ and in which the Licensed Material is translated, altered,
+ arranged, transformed, or otherwise modified in a manner requiring
+ permission under the Copyright and Similar Rights held by the
+ Licensor. For purposes of this Public License, where the Licensed
+ Material is a musical work, performance, or sound recording,
+ Adapted Material is always produced where the Licensed Material is
+ synched in timed relation with a moving image.
+
+ b. Adapter's License means the license You apply to Your Copyright
+ and Similar Rights in Your contributions to Adapted Material in
+ accordance with the terms and conditions of this Public License.
+
+ c. Copyright and Similar Rights means copyright and/or similar rights
+ closely related to copyright including, without limitation,
+ performance, broadcast, sound recording, and Sui Generis Database
+ Rights, without regard to how the rights are labeled or
+ categorized. For purposes of this Public License, the rights
+ specified in Section 2(b)(1)-(2) are not Copyright and Similar
+ Rights.
+
+ d. Effective Technological Measures means those measures that, in the
+ absence of proper authority, may not be circumvented under laws
+ fulfilling obligations under Article 11 of the WIPO Copyright
+ Treaty adopted on December 20, 1996, and/or similar international
+ agreements.
+
+ e. Exceptions and Limitations means fair use, fair dealing, and/or
+ any other exception or limitation to Copyright and Similar Rights
+ that applies to Your use of the Licensed Material.
+
+ f. Licensed Material means the artistic or literary work, database,
+ or other material to which the Licensor applied this Public
+ License.
+
+ g. Licensed Rights means the rights granted to You subject to the
+ terms and conditions of this Public License, which are limited to
+ all Copyright and Similar Rights that apply to Your use of the
+ Licensed Material and that the Licensor has authority to license.
+
+ h. Licensor means the individual(s) or entity(ies) granting rights
+ under this Public License.
+
+ i. Share means to provide material to the public by any means or
+ process that requires permission under the Licensed Rights, such
+ as reproduction, public display, public performance, distribution,
+ dissemination, communication, or importation, and to make material
+ available to the public including in ways that members of the
+ public may access the material from a place and at a time
+ individually chosen by them.
+
+ j. Sui Generis Database Rights means rights other than copyright
+ resulting from Directive 96/9/EC of the European Parliament and of
+ the Council of 11 March 1996 on the legal protection of databases,
+ as amended and/or succeeded, as well as other essentially
+ equivalent rights anywhere in the world.
+
+ k. You means the individual or entity exercising the Licensed Rights
+ under this Public License. Your has a corresponding meaning.
+
+
+Section 2 -- Scope.
+
+ a. License grant.
+
+ 1. Subject to the terms and conditions of this Public License,
+ the Licensor hereby grants You a worldwide, royalty-free,
+ non-sublicensable, non-exclusive, irrevocable license to
+ exercise the Licensed Rights in the Licensed Material to:
+
+ a. reproduce and Share the Licensed Material, in whole or
+ in part; and
+
+ b. produce, reproduce, and Share Adapted Material.
+
+ 2. Exceptions and Limitations. For the avoidance of doubt, where
+ Exceptions and Limitations apply to Your use, this Public
+ License does not apply, and You do not need to comply with
+ its terms and conditions.
+
+ 3. Term. The term of this Public License is specified in Section
+ 6(a).
+
+ 4. Media and formats; technical modifications allowed. The
+ Licensor authorizes You to exercise the Licensed Rights in
+ all media and formats whether now known or hereafter created,
+ and to make technical modifications necessary to do so. The
+ Licensor waives and/or agrees not to assert any right or
+ authority to forbid You from making technical modifications
+ necessary to exercise the Licensed Rights, including
+ technical modifications necessary to circumvent Effective
+ Technological Measures. For purposes of this Public License,
+ simply making modifications authorized by this Section 2(a)
+ (4) never produces Adapted Material.
+
+ 5. Downstream recipients.
+
+ a. Offer from the Licensor -- Licensed Material. Every
+ recipient of the Licensed Material automatically
+ receives an offer from the Licensor to exercise the
+ Licensed Rights under the terms and conditions of this
+ Public License.
+
+ b. No downstream restrictions. You may not offer or impose
+ any additional or different terms or conditions on, or
+ apply any Effective Technological Measures to, the
+ Licensed Material if doing so restricts exercise of the
+ Licensed Rights by any recipient of the Licensed
+ Material.
+
+ 6. No endorsement. Nothing in this Public License constitutes or
+ may be construed as permission to assert or imply that You
+ are, or that Your use of the Licensed Material is, connected
+ with, or sponsored, endorsed, or granted official status by,
+ the Licensor or others designated to receive attribution as
+ provided in Section 3(a)(1)(A)(i).
+
+ b. Other rights.
+
+ 1. Moral rights, such as the right of integrity, are not
+ licensed under this Public License, nor are publicity,
+ privacy, and/or other similar personality rights; however, to
+ the extent possible, the Licensor waives and/or agrees not to
+ assert any such rights held by the Licensor to the limited
+ extent necessary to allow You to exercise the Licensed
+ Rights, but not otherwise.
+
+ 2. Patent and trademark rights are not licensed under this
+ Public License.
+
+ 3. To the extent possible, the Licensor waives any right to
+ collect royalties from You for the exercise of the Licensed
+ Rights, whether directly or through a collecting society
+ under any voluntary or waivable statutory or compulsory
+ licensing scheme. In all other cases the Licensor expressly
+ reserves any right to collect such royalties.
+
+
+Section 3 -- License Conditions.
+
+Your exercise of the Licensed Rights is expressly made subject to the
+following conditions.
+
+ a. Attribution.
+
+ 1. If You Share the Licensed Material (including in modified
+ form), You must:
+
+ a. retain the following if it is supplied by the Licensor
+ with the Licensed Material:
+
+ i. identification of the creator(s) of the Licensed
+ Material and any others designated to receive
+ attribution, in any reasonable manner requested by
+ the Licensor (including by pseudonym if
+ designated);
+
+ ii. a copyright notice;
+
+ iii. a notice that refers to this Public License;
+
+ iv. a notice that refers to the disclaimer of
+ warranties;
+
+ v. a URI or hyperlink to the Licensed Material to the
+ extent reasonably practicable;
+
+ b. indicate if You modified the Licensed Material and
+ retain an indication of any previous modifications; and
+
+ c. indicate the Licensed Material is licensed under this
+ Public License, and include the text of, or the URI or
+ hyperlink to, this Public License.
+
+ 2. You may satisfy the conditions in Section 3(a)(1) in any
+ reasonable manner based on the medium, means, and context in
+ which You Share the Licensed Material. For example, it may be
+ reasonable to satisfy the conditions by providing a URI or
+ hyperlink to a resource that includes the required
+ information.
+
+ 3. If requested by the Licensor, You must remove any of the
+ information required by Section 3(a)(1)(A) to the extent
+ reasonably practicable.
+
+ 4. If You Share Adapted Material You produce, the Adapter's
+ License You apply must not prevent recipients of the Adapted
+ Material from complying with this Public License.
+
+
+Section 4 -- Sui Generis Database Rights.
+
+Where the Licensed Rights include Sui Generis Database Rights that
+apply to Your use of the Licensed Material:
+
+ a. for the avoidance of doubt, Section 2(a)(1) grants You the right
+ to extract, reuse, reproduce, and Share all or a substantial
+ portion of the contents of the database;
+
+ b. if You include all or a substantial portion of the database
+ contents in a database in which You have Sui Generis Database
+ Rights, then the database in which You have Sui Generis Database
+ Rights (but not its individual contents) is Adapted Material; and
+
+ c. You must comply with the conditions in Section 3(a) if You Share
+ all or a substantial portion of the contents of the database.
+
+For the avoidance of doubt, this Section 4 supplements and does not
+replace Your obligations under this Public License where the Licensed
+Rights include other Copyright and Similar Rights.
+
+
+Section 5 -- Disclaimer of Warranties and Limitation of Liability.
+
+ a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
+ EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
+ AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
+ ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
+ IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
+ WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
+ PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
+ ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
+ KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
+ ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
+
+ b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
+ TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
+ NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
+ INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
+ COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
+ USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
+ ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
+ DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
+ IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
+
+ c. The disclaimer of warranties and limitation of liability provided
+ above shall be interpreted in a manner that, to the extent
+ possible, most closely approximates an absolute disclaimer and
+ waiver of all liability.
+
+
+Section 6 -- Term and Termination.
+
+ a. This Public License applies for the term of the Copyright and
+ Similar Rights licensed here. However, if You fail to comply with
+ this Public License, then Your rights under this Public License
+ terminate automatically.
+
+ b. Where Your right to use the Licensed Material has terminated under
+ Section 6(a), it reinstates:
+
+ 1. automatically as of the date the violation is cured, provided
+ it is cured within 30 days of Your discovery of the
+ violation; or
+
+ 2. upon express reinstatement by the Licensor.
+
+ For the avoidance of doubt, this Section 6(b) does not affect any
+ right the Licensor may have to seek remedies for Your violations
+ of this Public License.
+
+ c. For the avoidance of doubt, the Licensor may also offer the
+ Licensed Material under separate terms or conditions or stop
+ distributing the Licensed Material at any time; however, doing so
+ will not terminate this Public License.
+
+ d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
+ License.
+
+
+Section 7 -- Other Terms and Conditions.
+
+ a. The Licensor shall not be bound by any additional or different
+ terms or conditions communicated by You unless expressly agreed.
+
+ b. Any arrangements, understandings, or agreements regarding the
+ Licensed Material not stated herein are separate from and
+ independent of the terms and conditions of this Public License.
+
+
+Section 8 -- Interpretation.
+
+ a. For the avoidance of doubt, this Public License does not, and
+ shall not be interpreted to, reduce, limit, restrict, or impose
+ conditions on any use of the Licensed Material that could lawfully
+ be made without permission under this Public License.
+
+ b. To the extent possible, if any provision of this Public License is
+ deemed unenforceable, it shall be automatically reformed to the
+ minimum extent necessary to make it enforceable. If the provision
+ cannot be reformed, it shall be severed from this Public License
+ without affecting the enforceability of the remaining terms and
+ conditions.
+
+ c. No term or condition of this Public License will be waived and no
+ failure to comply consented to unless expressly agreed to by the
+ Licensor.
+
+ d. Nothing in this Public License constitutes or may be interpreted
+ as a limitation upon, or waiver of, any privileges and immunities
+ that apply to the Licensor or You, including from the legal
+ processes of any jurisdiction or authority.
+
+
+=======================================================================
+
+Creative Commons is not a party to its public licenses.
+Notwithstanding, Creative Commons may elect to apply one of its public
+licenses to material it publishes and in those instances will be
+considered the “Licensor.” The text of the Creative Commons public
+licenses is dedicated to the public domain under the CC0 Public Domain
+Dedication. Except for the limited purpose of indicating that material
+is shared under a Creative Commons public license or as otherwise
+permitted by the Creative Commons policies published at
+creativecommons.org/policies, Creative Commons does not authorize the
+use of the trademark "Creative Commons" or any other trademark or logo
+of Creative Commons without its prior written consent including,
+without limitation, in connection with any unauthorized modifications
+to any of its public licenses or any other arrangements,
+understandings, or agreements concerning use of licensed material. For
+the avoidance of doubt, this paragraph does not form part of the public
+licenses.
+
+Creative Commons may be contacted at creativecommons.org.
\ No newline at end of file
diff --git a/README.md b/README.md
index 04b9caa..f1a8874 100644
--- a/README.md
+++ b/README.md
@@ -1,28 +1,613 @@
-# ERA5 Exposure Aggregation Pipeline
+# The ERA5 Spatial Aggregation Pipeline
-This repository contains a pipeline for aggregating ERA5 environmental exposures data to a 0.1 degree grid. The pipeline is designed to be run on FASRC. We developed
-this pipeline using `nbdev`, which means that we can create modules and scripts from notebooks.
-Hence, all of the documentation for how the pipeline was developed and validated is
-available in `notes/index.ipynb` and the associated notebooks.
-## How to Review a PR
+
-To review a PR on this repository, follow these steps:
+``` python
+from era5_sandbox.core import *
+```
-0. Obtain an API key for the ERA5 datastore from [here](https://cds.climate.copernicus.eu/how-to-api), and ask Tinashe for access to the Golden Lab `googledriver` API key
+## era5_sandbox
-1. Clone this repository to your workspace on FASRC
+> Sandbox environment for era5 development
-2. Create a conda environment with `conda create -n era5_sandbox python=3.10` and install all of the necessary dependencies for the package with `pip install -e .`
+This package documents the development and implementation of functions
+and code for the Madagascar ERA5 dataset project. The goal is for
+exposure data to be made available at the daily resolution when
+possible. Finer resolutions shouldn’t ever be needed for our purposes,
+and it should then be relatively easy to aggregate at coarser
+resolutions, such as weekly or monthly. Additionally, we’ve extended
+this work to Nepal as well.
-3. Run the `core` module to test your API key and setup the data
-directory structure
+Variables should generally be made available from 2010 onward, as that’s
+where our clinic data starts.
-`python src/era5_sandbox/core.py`
+All data are ideally made available at the “healthshed” geographical
+level. Healthsheds are defined as geographical areas where people who
+live all go to the same clinic. There are a total of ~2700 public
+clinics in Madagascar, hence ~2700 healthsheds, with each healthshed
+containing ~10000 people on average.
-4. Symlink your local data directory to the original work
-`ln -s [YOUR WORKING DIRECTORY]/data /n/dominici_lab/lab/data_processing/csph-era5_sandbox/data`
+Preliminary list of environmental variables
-5. Dry run by removing a file from data `snakemake --dry-run`
+- [x] 2-m air temperature from ERA5: daily min, max, mean
-6. Run the pipeline `sbatch snakemake.sbatch`
+- [x] 2-m air dew point temperature from ERA5: daily min, max, mean
+
+- [x] Precipitation: daily total (ERA5)
+
+- [x] Soil moisture: daily average (ERA5)
+
+Variables from other sources:
+
+- [ ] Sea surface temperature: daily average and maximum in the nearest
+ neighbor for each healthshed.
+
+- [ ] Precipitation: daily total (CHIRPS)
+
+- [ ] Chlorophyll-A (Giacomo)
+
+- [ ] Wealth index: Available from Giacomo
+
+- [ ] NDVI
+
+- [ ] Tropical storm
+
+- [ ] Flooding
+
+- [ ] Deforestation
+
+- [ ] Linking/segmenting healthsheds into climate zones and other
+
+- [ ] Relative humidity: daily average (lower priority)
+
+Those from the ERA5 dataset will be housed here, but we may likely
+develop a separate repository for the other datasets.
+
+## Developer Guide
+
+This package is built and maintained with `nbdev`. If you are new to
+using `nbdev` here are some useful pointers to get you started.
+
+### Install era5_sandbox in Development mode
+
+``` sh
+# make sure era5_sandbox package is installed in development mode
+$ pip install -e .
+```
+
+To make changes, go to the “notes” directory and edit the notebooks as
+necessary. Each notebook refers to a module in the era5_sandbox package.
+Cells are exported to the module when the notebook is saved and you run
+the following command:
+
+``` sh
+$ nbdev_export
+```
+
+For e.g., to change functionality of the
+[`testAPI()`](https://TinasheMTapera.github.io/era5_sandbox/core.html#testapi)
+function in the testAPI Hydra rule, you would edit the
+[`testAPI`](https://TinasheMTapera.github.io/era5_sandbox/core.html#testapi)
+notebook in the `notes` directory `notes/testAPI.ipynb`, and then save
+that notebook and run `nbdev_export` to update the `core` module in the
+package.
+
+### How to Run the Pipeline
+
+The pipeline downloads ERA5 variables for a given date range and
+geographical bounding box. You can learn how each of these steps was by
+following the notebooks in `notes` in numerical order.
+
+> [!IMPORTANT]
+>
+> The pipeline has two implementations: one using `snakemake` and
+> `hydra`, and another using `pytask`. The `pytask` implementation is
+> the more recent one, and is recommended for future use. The
+> `snakemake` implementation is left here for reference to legacy code.
+
+#### Using `pytask`
+
+To run the pipeline, the `pytask` config at `note/20_pytask_config.qmd`
+should be reviewed and updated if necessary. The pipeline can then be
+run with the following command:
+
+``` sh
+$ sbatch pytask.sbatch
+```
+
+#### Using `snakemake` and `hydra`
+
+To run the pipeline, the config at `config/config.yaml` should be
+updated with the desired date range and geographical bounding box. The
+pipeline can then be run with the following command:
+
+``` sh
+sbatch snakemake.sbatch
+```
+
+### What Does the Pipeline Produce?
+
+Using `pytask`’s data catalog, you can investigate the downloaded raw
+data with python, eg.:
+
+``` python
+import xarray as xr
+from era5_sandbox.config import data_catalog
+from era5_sandbox.core import ClimateDataFileHandler
+
+ex_nc = list(data_catalog['download']['outputs']._entries).pop()
+ex_nc_path = data_catalog['download']['outputs'][ex_nc].load()
+
+with ClimateDataFileHandler(ex_nc_path) as handler:
+ ds = xr.open_dataset(handler.get_dataset("instant"))
+
+ds
+```
+
+
Describe the configuration file used by Hydra for the pipeline
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
cfg
+
DictConfig
+
None
+
Configuration file
+
+
+
Returns
+
None
+
+
+
+
+
+
+
+Exported source
+
def describe(
+ cfg: DictConfig=None, # Configuration file
+ )->None:
+"Describe the configuration file used by Hydra for the pipeline"
+
+if cfg isNone:
+print("No configuration file provided. Generating default configuration file.")
+ cfg = OmegaConf.create()
+
+print("This package fetches ERA5 data. The following is the config file used by Hydra for the pipeline:\n")
+print(OmegaConf.to_yaml(cfg))
We’re going to use a class to authenticate and interact with google drive. The goal is to have a simple interface to fetch the healthshed files dynamically from google drive in the pipeline.
+
+
+
+
+
+
+Important
+
+
+
+
This class was implemented when all of our data was stored on a private Google Drive. Since we have moved all of our data to FASRC, this will likely be deprecated in the near future.
*A class to handle Google Drive authentication and file management. This class uses the PyDrive2 library to authenticate with Google Drive using a service account.
+
It provides three methods: authenticating the account, getting the drive object, and downloading the healthshed files for madagascar.*
+
Here’s how we use it. The credentials for the data-pipeline service account are available in the sandbox folder, and the path to said folder is set in the config:
+
+
from hydra import initialize, compose
+from omegaconf import OmegaConf
+
+
+
# unfortunately, we have to use the initialize function to load the config file
+# this is because the @hydra decorator does not work with Notebooks very well
+# this is a known issue with Hydra: https://gist.github.com/bdsaglam/586704a98336a0cf0a65a6e7c247d248
+#
+# just use the relative path from the notebook to the config dir
+try:
+with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+exceptExceptionas e:
+print(f"Error initializing Hydra: {e}")
+with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+
+
+
+
+
+
+
+Important
+
+
+
+
If we continue with pytask, we will not need to use hydra at all, and so the above strategy may get deprecated.
Here’s how we might check that the healthsheds are accessible in the drive:
+
+
# we're using the madagascar healthshed folder as an example
+folder_id = cfg.geographies.madagascar.healthsheds
+folder_name ="healthsheds2022.zip"
+file_list = drive.ListFile({'q': f" title='{folder_name}' and trashed = false "}).GetList()
+
+forfilein file_list:
+print(f"{file['title']} - {file['mimeType']}")
+
+
That being said, we can read in the healthsheds into geopandas by downloading them to a temp directory. The healthsheds must be a zipped shapefiles package with the files at the root of the zip directory.
+
+
with tempfile.TemporaryDirectory() as temp_dir:
+# Create a temporary directory to store the downloaded file
+ zip_path = os.path.join(temp_dir, folder_name)
+
+# Download file from Google Drive
+ file_obj = drive.CreateFile({'id': file_list[0]['id']})
+ file_obj.GetContentFile(zip_path)
+
+# Read shapefile directly from ZIP
+ gdf = gpd.read_file(f"zip://{zip_path}")
+
+
That works! So now we can patch the class to include this workflow:
Usually, when you download data, it comes out as a simple .nc file that can be opened with xarray. However, the CDS API has a few different file types that are not .nc files. For example, the ERA5 data is stored in a .grib file format. This is a common format for meteorological data, and it is used by the ECMWF. When a query has multiple variables, sometimes they are downloaded as a .zip file to separat the grib from the netcdf.
+
So, below, we define a class that can handle the file no matter what the type is. It will check the file type and then use the appropriate method to open it. The class will also have a method to check if the file is a .zip file, and if so, it will unzip it and return the path to the unzipped file.
A class to handle file operations for the Climate Data Store (CDS). This class provides unpack files downloaded from the CDS API. It must be able to handle the unpacking of files downloaded from the CDS API. This means that if the file is a basic netcdf, it should be passed to the netcdf handler. If the file is a zip, it should be handled by the zip handler in temp and the data returned as required.
+
+
import xarray as xr
+from fastcore.test import test_fail
+
+
+
eg_file = here() /"bld/2019_5_madagascar.nc"
+
+# this fails because the nc file downloaded has grib and netcdf in it, so
+# xr cannot handle it.
+def wont_work(multilayer_file):
+
+ ds = xr.open_dataset(multilayer_file)
+
+test_fail(
+ wont_work,
+ args=(eg_file)
+)
+
+# equivalent to saying try: wont_work(eg_file) Except: some error handling
+
+
The above fails because the download contains temperature and precipitation data, which get encoded silently as different formats. Even though it is one file, it contains both grib and netcdf data and is encoded as a .zip file. So we use the class to read it instead:
The above line for ds2 is commented out because the example file does not separate accumulation data.
+
+
+
+
ds1
+
+
+
#ds2
+
+
+
handler.cleanup()
+
+
Great! Let’s add a context handler and this can be added to the pipeline, so that with the entry and exit methods, we can now use the class in a with statement:
+
+
with ClimateDataFileHandler(eg_file) as handler:
+ ds1 = xr.open_dataset(handler.get_dataset("instant"))
+#ds2 = xr.open_dataset(handler.get_dataset("accum"))
+
+print(ds1)
+#print(ds2)
+
+
+
+
+
Tests and Main
+
In nbdev, our tests are embedded in the notebook. Whenever you export the notebook, all the cells that are specified to run are run, and hence, the tests are executed. The tests are also exported. This is a great way to ensure that your documentation is always up-to-date. For this module, we’re using the testAPI() function as our main test.
def testAPI(
+ cfg: DictConfig=None,
+ dataset:str="reanalysis-era5-pressure-levels"
+ )->bool:
+
+# parse config
+ testing=cfg.development_mode
+ output_path=here("data") /"testing"
+
+print(OmegaConf.to_yaml(cfg))
+
+try:
+ client = cdsapi.Client()
+
+# build request
+ request = {
+'product_type': ['reanalysis'],
+'variable': ['geopotential'],
+'year': ['2024'],
+'month': ['03'],
+'day': ['01'],
+'time': ['13:00'],
+'pressure_level': ['1000'],
+'data_format': 'grib',
+ }
+
+ target = output_path /'test_download.grib'
+
+print("Testing API connection by downloading a dummy dataset to {}...".format(output_path))
+
+ client.retrieve(dataset, request, target)
+
+ifnot testing:
+ os.remove(target)
+
+print("API connection test successful.")
+returnTrue
+
+exceptExceptionas e:
+print("API connection test failed.")
+print("Did you set up your API key with CDS? If not, please visit https://cds.climate.copernicus.eu/how-to-api#install-the-cds-api-client")
+print("Error: {}".format(e))
+returnFalse
+
+
+
We can see that this API tester tool works with Hydra configuration:
+
+
from hydra import initialize, compose
+from omegaconf import OmegaConf
+
+
+
# unfortunately, we have to use the initialize function to load the config file
+# this is because the @hydra decorator does not work with Notebooks very well
+# this is a known issue with Hydra: https://gist.github.com/bdsaglam/586704a98336a0cf0a65a6e7c247d248
+#
+# just use the relative path from the notebook to the config dir
+try:
+with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+exceptExceptionas e:
+print(f"Error initializing Hydra: {e}")
+with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+
+describe(cfg)
+
+
+
+
Importing the Main Function
+
+
+
+
+
+
+Important
+
+
+
+
As mentioned, if we continue with pytask, we will not need to use hydra at all, and so the main function may get deprecated as pytask will handle the pipeline execution without __main__ scripts.
+
+
+
Important: using __main__ in nbdev and Hydra is a little bit tricky. We need to define the main function in the module ONLY ONCE and then when we export the notebook to script, we need to add the nbdev.imports.IN_NOTEBOOK variable. This way, the main function will only be executed when we run the notebook and not when we import the module.
+
+
+
+
+
\ No newline at end of file
diff --git a/_docs/00_core.md b/_docs/00_core.md
new file mode 100644
index 0000000..43d541c
--- /dev/null
+++ b/_docs/00_core.md
@@ -0,0 +1,529 @@
+# Core Module: Internal functions and testing
+
+
+## core
+
+> This is a core library for the ERA5 dataset pipeline. It defines a few
+> helpful functions such as an API tester to test your API key and
+> connection.
+
+
+
+
+Exported source
+
+``` python
+import os
+import cdsapi
+import hydra
+import json
+import tempfile
+import argparse
+import zipfile
+import shutil
+import geopandas as gpd
+from pathlib import Path
+from pydrive2.auth import GoogleAuth
+from pydrive2.drive import GoogleDrive
+from omegaconf import DictConfig, OmegaConf
+from pyprojroot import here
+from importlib import import_module
+```
+
+
+
+## Utilities
+
+Some utilities are provided to help you with the ERA5 dataset.
+
+------------------------------------------------------------------------
+
+source
+
+### describe
+
+> describe (cfg:omegaconf.dictconfig.DictConfig=None)
+
+*Describe the configuration file used by Hydra for the pipeline*
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
cfg
+
DictConfig
+
None
+
Configuration file
+
+
+
Returns
+
None
+
+
+
+
+
+
+
+Exported source
+
+``` python
+def describe(
+ cfg: DictConfig=None, # Configuration file
+ )-> None:
+ "Describe the configuration file used by Hydra for the pipeline"
+
+ if cfg is None:
+ print("No configuration file provided. Generating default configuration file.")
+ cfg = OmegaConf.create()
+
+ print("This package fetches ERA5 data. The following is the config file used by Hydra for the pipeline:\n")
+ print(OmegaConf.to_yaml(cfg))
+```
+
+
+
+In addition, we’ve defined 3 private functions to help with path
+expansion
+[`_expand_path`](https://TinasheMTapera.github.io/era5_sandbox/core.html#_expand_path),
+dynamic function importing
+[`_get_callable`](https://TinasheMTapera.github.io/era5_sandbox/core.html#_get_callable),
+and directory structure creation
+[`_create_directory_structure`](https://TinasheMTapera.github.io/era5_sandbox/core.html#_create_directory_structure).
+
+### A Simple Temperature Conversion Function
+
+------------------------------------------------------------------------
+
+source
+
+### kelvin_to_celsius
+
+> kelvin_to_celsius (kelvin:float)
+
+*Convert temperature from Kelvin to Celsius.*
+
+
+
+
+
+
Type
+
Details
+
+
+
+
+
kelvin
+
float
+
Temperature in Kelvin
+
+
+
Returns
+
float
+
Temperature in Celsius
+
+
+
+
+### A Class for Authenticating Google Drive
+
+We’re going to use a class to authenticate and interact with google
+drive. The goal is to have a simple interface to fetch the healthshed
+files dynamically from google drive in the pipeline.
+
+
+
+> **Important**
+>
+> This class was implemented when all of our data was stored on a
+> private Google Drive. Since we have moved all of our data to FASRC,
+> this will likely be deprecated in the near future.
+
+
+
+------------------------------------------------------------------------
+
+source
+
+### GoogleDriver
+
+> GoogleDriver (json_key_path=None)
+
+\*A class to handle Google Drive authentication and file management.
+This class uses the PyDrive2 library to authenticate with Google Drive
+using a service account.
+
+It provides three methods: authenticating the account, getting the drive
+object, and downloading the healthshed files for madagascar.\*
+
+Here’s how we use it. The credentials for the data-pipeline service
+account are available in the sandbox folder, and the path to said folder
+is set in the config:
+
+``` python
+from hydra import initialize, compose
+from omegaconf import OmegaConf
+```
+
+``` python
+# unfortunately, we have to use the initialize function to load the config file
+# this is because the @hydra decorator does not work with Notebooks very well
+# this is a known issue with Hydra: https://gist.github.com/bdsaglam/586704a98336a0cf0a65a6e7c247d248
+#
+# just use the relative path from the notebook to the config dir
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+```
+
+
+
+> **Important**
+>
+> If we continue with `pytask`, we will not need to use hydra at all,
+> and so the above strategy may get deprecated.
+
+
+
+``` python
+auth = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)
+drive = auth.get_drive()
+```
+
+Here’s how we might check that the healthsheds are accessible in the
+drive:
+
+``` python
+# we're using the madagascar healthshed folder as an example
+folder_id = cfg.geographies.madagascar.healthsheds
+folder_name = "healthsheds2022.zip"
+file_list = drive.ListFile({'q': f" title='{folder_name}' and trashed = false "}).GetList()
+
+for file in file_list:
+ print(f"{file['title']} - {file['mimeType']}")
+```
+
+That being said, we can read in the healthsheds into geopandas by
+downloading them to a temp directory. The healthsheds must be a zipped
+shapefiles package with the files at the root of the zip directory.
+
+``` python
+with tempfile.TemporaryDirectory() as temp_dir:
+ # Create a temporary directory to store the downloaded file
+ zip_path = os.path.join(temp_dir, folder_name)
+
+ # Download file from Google Drive
+ file_obj = drive.CreateFile({'id': file_list[0]['id']})
+ file_obj.GetContentFile(zip_path)
+
+ # Read shapefile directly from ZIP
+ gdf = gpd.read_file(f"zip://{zip_path}")
+```
+
+That works! So now we can patch the class to include this workflow:
+
+------------------------------------------------------------------------
+
+source
+
+### GoogleDriver.read_healthsheds
+
+> GoogleDriver.read_healthsheds (healthshed_zip_name)
+
+And to check that it works:
+
+``` python
+driver = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)
+drive = driver.get_drive()
+healthsheds = driver.read_healthsheds("healthsheds2022.zip")
+
+healthsheds.describe()
+```
+
+## CDS File Handler Type
+
+
+
+> **Important**
+>
+> This section may also be deprecated. Since adding `swvl1` to the
+> pipeline, we have not needed to use this class. We leave it here for
+> now for reference.
+
+
+
+We’re going to make a file handler type to help deal with CDS files.
+This is to fix
+[NSAPH-Data-Processing/era5_sandbox#13](https://github.com/NSAPH-Data-Processing/era5_sandbox/issues/13).
+
+Usually, when you download data, it comes out as a simple .nc file that
+can be opened with xarray. However, the CDS API has a few different file
+types that are not .nc files. For example, the ERA5 data is stored in a
+.grib file format. This is a common format for meteorological data, and
+it is used by the ECMWF. When a query has multiple variables, sometimes
+they are downloaded as a .zip file to separat the grib from the netcdf.
+
+So, below, we define a class that can handle the file no matter what the
+type is. It will check the file type and then use the appropriate method
+to open it. The class will also have a method to check if the file is a
+.zip file, and if so, it will unzip it and return the path to the
+unzipped file.
+
+------------------------------------------------------------------------
+
+source
+
+### ClimateDataFileHandler
+
+> ClimateDataFileHandler (input_path:str)
+
+*A class to handle file operations for the Climate Data Store (CDS).
+This class provides unpack files downloaded from the CDS API. It must be
+able to handle the unpacking of files downloaded from the CDS API. This
+means that if the file is a basic netcdf, it should be passed to the
+netcdf handler. If the file is a zip, it should be handled by the zip
+handler in temp and the data returned as required.*
+
+``` python
+import xarray as xr
+from fastcore.test import test_fail
+```
+
+``` python
+eg_file = here() / "bld/2019_5_madagascar.nc"
+
+# this fails because the nc file downloaded has grib and netcdf in it, so
+# xr cannot handle it.
+def wont_work(multilayer_file):
+
+ ds = xr.open_dataset(multilayer_file)
+
+test_fail(
+ wont_work,
+ args=(eg_file)
+)
+
+# equivalent to saying try: wont_work(eg_file) Except: some error handling
+```
+
+The above fails because the download contains temperature and
+precipitation data, which get encoded silently as different formats.
+Even though it is one file, it contains both grib and netcdf data and is
+encoded as a .zip file. So we use the class to read it instead:
+
+``` python
+handler = ClimateDataFileHandler(eg_file)
+handler.prepare()
+ds1 = xr.open_dataset(handler.get_dataset("instant"))
+#ds2 = xr.open_dataset(handler.get_dataset("accum"))
+```
+
+
+
+> **Important**
+>
+> The above line for `ds2` is commented out because the example file
+> does not separate accumulation data.
+
+
+
+``` python
+ds1
+```
+
+``` python
+#ds2
+```
+
+``` python
+handler.cleanup()
+```
+
+Great! Let’s add a context handler and this can be added to the
+pipeline, so that with the entry and exit methods, we can now use the
+class in a `with` statement:
+
+``` python
+with ClimateDataFileHandler(eg_file) as handler:
+ ds1 = xr.open_dataset(handler.get_dataset("instant"))
+ #ds2 = xr.open_dataset(handler.get_dataset("accum"))
+
+ print(ds1)
+ #print(ds2)
+```
+
+## Tests and Main
+
+In `nbdev`, our tests are embedded in the notebook. Whenever you export
+the notebook, all the cells that are specified to run are run, and
+hence, the tests are executed. The tests are also exported. This is a
+great way to ensure that your documentation is always up-to-date. For
+this module, we’re using the
+[`testAPI()`](https://TinasheMTapera.github.io/era5_sandbox/core.html#testapi)
+function as our main test.
+
+------------------------------------------------------------------------
+
+source
+
+### testAPI
+
+> testAPI (cfg:omegaconf.dictconfig.DictConfig=None,
+> dataset:str='reanalysis-era5-pressure-levels')
+
+
+Exported source
+
+``` python
+def testAPI(
+ cfg: DictConfig=None,
+ dataset:str="reanalysis-era5-pressure-levels"
+ )-> bool:
+
+ # parse config
+ testing=cfg.development_mode
+ output_path=here("data") / "testing"
+
+ print(OmegaConf.to_yaml(cfg))
+
+ try:
+ client = cdsapi.Client()
+
+ # build request
+ request = {
+ 'product_type': ['reanalysis'],
+ 'variable': ['geopotential'],
+ 'year': ['2024'],
+ 'month': ['03'],
+ 'day': ['01'],
+ 'time': ['13:00'],
+ 'pressure_level': ['1000'],
+ 'data_format': 'grib',
+ }
+
+ target = output_path / 'test_download.grib'
+
+ print("Testing API connection by downloading a dummy dataset to {}...".format(output_path))
+
+ client.retrieve(dataset, request, target)
+
+ if not testing:
+ os.remove(target)
+
+ print("API connection test successful.")
+ return True
+
+ except Exception as e:
+ print("API connection test failed.")
+ print("Did you set up your API key with CDS? If not, please visit https://cds.climate.copernicus.eu/how-to-api#install-the-cds-api-client")
+ print("Error: {}".format(e))
+ return False
+```
+
+
+
+We can see that this API tester tool works with Hydra configuration:
+
+``` python
+from hydra import initialize, compose
+from omegaconf import OmegaConf
+```
+
+``` python
+# unfortunately, we have to use the initialize function to load the config file
+# this is because the @hydra decorator does not work with Notebooks very well
+# this is a known issue with Hydra: https://gist.github.com/bdsaglam/586704a98336a0cf0a65a6e7c247d248
+#
+# just use the relative path from the notebook to the config dir
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+
+describe(cfg)
+```
+
+### Importing the Main Function
+
+
+
+> **Important**
+>
+> As mentioned, if we continue with `pytask`, we will not need to use
+> hydra at all, and so the main function may get deprecated as `pytask`
+> will handle the pipeline execution without `__main__` scripts.
+
+
+
+Important: using `__main__` in nbdev and Hydra is a little bit tricky.
+We need to define the main function in the module ONLY ONCE and then
+when we export the notebook to script, we need to add the
+`nbdev.imports.IN_NOTEBOOK` variable. This way, the main function will
+only be executed when we run the notebook and not when we import the
+module.
+
+``` python
+from nbdev.imports import IN_NOTEBOOK
+```
+
+You’ll see this listed throughout the notebooks.
+
+------------------------------------------------------------------------
+
+source
+
+### main
+
+> main (cfg:omegaconf.dictconfig.DictConfig)
+
+
+Exported source
+
+``` python
+@hydra.main(version_base=None, config_path="../../conf", config_name="config")
+def main(cfg: DictConfig) -> None:
+
+ # Create the directory structure
+ _create_directory_structure(here() / "data", cfg.datapaths)
+
+ # test the api
+ testAPI(cfg=cfg)
+```
+
+
+
+``` python
+try: from nbdev.imports import IN_NOTEBOOK
+except: IN_NOTEBOOK=False
+
+if __name__ == "__main__" and not IN_NOTEBOOK:
+ main()
+```
diff --git a/_docs/01_download_raw_data.html b/_docs/01_download_raw_data.html
new file mode 100644
index 0000000..37dfd00
--- /dev/null
+++ b/_docs/01_download_raw_data.html
@@ -0,0 +1,969 @@
+
+
+
+
+
+
+
+
+
+Download Module: Downloading Raw Data from CDSAPI – era5_sandbox
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
This module downloads the raw data from CDS and saves it in the local directory
+
+
+
We use a similar approach to the one in the tutorial to download the data to local storage.
+
The background functionality in this module involves downloading the bounding box of a region of interest, and sending that to the CDS API query. As such, we define two helper functions to fetch the OCHA/HDX shapefiles for a geographic region, and another to create the bounding box from the files.
def download_raw_era5(
+ cfg: DictConfig # hydra configuration file
+ )->None:
+'''
+ Send the query to the API and download the data
+ '''
+
+# parse the cfg
+ testing = cfg.development_mode # for testing
+ output_dir = here("data/input") # output directory
+
+ geography = cfg.query.geography
+
+ target = os.path.join(_expand_path(output_dir), "{}_{}_{}.nc".format(geography, cfg.query['year'], cfg.query['month']))
+
+ client = cdsapi.Client()
+
+ query = _validate_query(cfg.query)
+
+ dataset = cfg.dataset
+# to make sure the query is valid at the end
+del query['geography']
+
+# Send the query to the client
+ifnot testing:
+ bounds = create_bounding_box(cfg.geographies[geography]['shapefile'])
+ query['area'] = bounds
+ client.retrieve(dataset, query).download(target)
+
+print("Downloaded file to: {}".format(target))
+else:
+print(f"Testing mode. Not downloading data. Query is {query}")
+
+print("Done")
+
+
+
+
+
+
Tests and Main
+
Here we define some tests and the main function that will be used to download the data.
+
+
from hydra import initialize, compose
+from omegaconf import OmegaConf
+
+# unfortunately, we have to use the initialize function to load the config file
+# this is because the @hydra decorator does not work with Notebooks very well
+# this is a known issue with Hydra: https://gist.github.com/bdsaglam/586704a98336a0cf0a65a6e7c247d248
+#
+# just use the relative path from the notebook to the config dir
+try:
+with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+exceptExceptionas e:
+print(f"Error initializing Hydra: {e}")
+with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+
+cfg.development_mode =False
+cfg.query['year'] =2017
+cfg.query['month'] =11
+#cfg.query['day'] = 1
+#cfg.query['time'] = "00:00"
+cfg.query['geography'] ="nepal"
+download_raw_era5(cfg)
@hydra.main(config_path="../../conf", config_name="config", version_base=None)
+def main(cfg: DictConfig) ->None:
+ download_raw_era5(cfg=cfg)
+# better approach would be to have the function only use the specific arguments of the config
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/_docs/01_download_raw_data.md b/_docs/01_download_raw_data.md
new file mode 100644
index 0000000..cb8d8b0
--- /dev/null
+++ b/_docs/01_download_raw_data.md
@@ -0,0 +1,315 @@
+# Download Module: Downloading Raw Data from CDSAPI
+
+
+## download
+
+> This module downloads the raw data from CDS and saves it in the local
+> directory
+
+
+
+We use a similar approach to the one in the tutorial to download the
+data to local storage.
+
+The background functionality in this module involves downloading the
+bounding box of a region of interest, and sending that to the CDS API
+query. As such, we define two helper functions to fetch the OCHA/HDX
+shapefiles for a geographic region, and another to create the bounding
+box from the files.
+
+------------------------------------------------------------------------
+
+source
+
+### fetch_GADM
+
+> fetch_GADM
+> (url:str='https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/gadm4
+> 1_MDG.gpkg', output_file:str='gadm41_MDG.gpkg')
+
+*Fetch the GADM bounding box for geographic region*
+
+
+
+------------------------------------------------------------------------
+
+source
+
+### create_bounding_box
+
+> create_bounding_box (zip_url_or_path:str, buffer_km:float=50,
+> round_to:int=1)
+
+*Create a bounding box from OCHA/HDX shapefile data with a buffer.*
+
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
zip_url_or_path
+
str
+
+
URL or local path to the zipped shapefile.
+
+
+
buffer_km
+
float
+
50
+
Buffer distance in kilometers to expand the bounding box.
+
+
+
round_to
+
int
+
1
+
Number of decimal places to round the bounding box coordinates.
+
+
+
Returns
+
list
+
+
Bounding box in the CDS API area format [North, West, South,
+East]
+
+
+
+
+
+Exported source
+
+``` python
+def create_bounding_box(
+ zip_url_or_path: str, # URL or local path to the zipped shapefile.
+ buffer_km: float = 50, # Buffer distance in kilometers to expand the bounding box.
+ round_to: int = 1 # Number of decimal places to round the bounding box coordinates.
+) -> list: # Bounding box in the CDS API area format [North, West, South, East]
+ '''
+ Create a bounding box from OCHA/HDX shapefile data with a buffer.
+ '''
+ with tempfile.TemporaryDirectory() as tmpdir:
+ # Download if it's a URL
+ if zip_url_or_path.startswith("http"):
+ response = requests.get(zip_url_or_path)
+ zip_path = os.path.join(tmpdir, "ocha_data.zip")
+ with open(zip_path, "wb") as f:
+ f.write(response.content)
+ else:
+ zip_path = zip_url_or_path
+
+ # Unzip
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+ zip_ref.extractall(tmpdir)
+
+ # Find the .shp file
+ shp_files = list(Path(tmpdir).rglob("*.shp"))
+ if not shp_files:
+ raise FileNotFoundError("No shapefile (.shp) found in the extracted archive.")
+ shp_path = str(shp_files[0]) # Use first found .shp
+
+ # Read shapefile
+ shape = gpd.read_file(shp_path)
+
+ # Reproject to projected CRS (you may want to detect the correct UTM zone)
+ shape_proj = shape.to_crs(epsg=32738)
+
+ # Apply buffer
+ buffered = shape_proj.geometry.buffer(buffer_km * 1000)
+
+ # Convert back to geographic coordinates
+ buffered_geo = gpd.GeoSeries(buffered, crs=shape_proj.crs).to_crs(epsg=4326)
+
+ # Get bounding box
+ bounds = buffered_geo.total_bounds # [min_x, min_y, max_x, max_y]
+ bbox = [
+ round(bounds[3], round_to), # North
+ round(bounds[0], round_to), # West
+ round(bounds[1], round_to), # South
+ round(bounds[2], round_to) # East
+ ]
+
+ return bbox
+```
+
+
+
+The primary function to download the data from CDSAPI is defined below.
+
+------------------------------------------------------------------------
+
+source
+
+### download_raw_era5
+
+> download_raw_era5 (cfg:omegaconf.dictconfig.DictConfig)
+
+*Send the query to the API and download the data*
+
+
+
+
+
+
Type
+
Details
+
+
+
+
+
cfg
+
DictConfig
+
hydra configuration file
+
+
+
Returns
+
None
+
+
+
+
+
+
+Exported source
+
+``` python
+def download_raw_era5(
+ cfg: DictConfig # hydra configuration file
+ )->None:
+ '''
+ Send the query to the API and download the data
+ '''
+
+ # parse the cfg
+ testing = cfg.development_mode # for testing
+ output_dir = here("data/input") # output directory
+
+ geography = cfg.query.geography
+
+ target = os.path.join(_expand_path(output_dir), "{}_{}_{}.nc".format(geography, cfg.query['year'], cfg.query['month']))
+
+ client = cdsapi.Client()
+
+ query = _validate_query(cfg.query)
+
+ dataset = cfg.dataset
+ # to make sure the query is valid at the end
+ del query['geography']
+
+ # Send the query to the client
+ if not testing:
+ bounds = create_bounding_box(cfg.geographies[geography]['shapefile'])
+ query['area'] = bounds
+ client.retrieve(dataset, query).download(target)
+
+ print("Downloaded file to: {}".format(target))
+ else:
+ print(f"Testing mode. Not downloading data. Query is {query}")
+
+ print("Done")
+```
+
+
+
+## Tests and Main
+
+Here we define some tests and the main function that will be used to
+download the data.
+
+``` python
+from hydra import initialize, compose
+from omegaconf import OmegaConf
+
+# unfortunately, we have to use the initialize function to load the config file
+# this is because the @hydra decorator does not work with Notebooks very well
+# this is a known issue with Hydra: https://gist.github.com/bdsaglam/586704a98336a0cf0a65a6e7c247d248
+#
+# just use the relative path from the notebook to the config dir
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+
+cfg.development_mode = False
+cfg.query['year'] = 2017
+cfg.query['month'] = 11
+#cfg.query['day'] = 1
+#cfg.query['time'] = "00:00"
+cfg.query['geography'] = "nepal"
+download_raw_era5(cfg)
+```
+
+------------------------------------------------------------------------
+
+source
+
+### main
+
+> main (cfg:omegaconf.dictconfig.DictConfig)
+
+
+Exported source
+
+``` python
+@hydra.main(config_path="../../conf", config_name="config", version_base=None)
+def main(cfg: DictConfig) -> None:
+ download_raw_era5(cfg=cfg)
+ # better approach would be to have the function only use the specific arguments of the config
+```
+
+
diff --git a/_docs/02_aggregate.html b/_docs/02_aggregate.html
new file mode 100644
index 0000000..762b66f
--- /dev/null
+++ b/_docs/02_aggregate.html
@@ -0,0 +1,1556 @@
+
+
+
+
+
+
+
+
+
+Aggregate Module: Spatial Aggregation to Healthsheds – era5_sandbox
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Aggregate Module: Spatial Aggregation to Healthsheds
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
aggregate
+
+
This module aggregates the downloaded data into the respective output dataframes.
+
+
+
We prototyped the code in this module using a Jupyter notebook. The notebook is available in notes/prototypes/learning_aggregations_w_michelle_20250328.ipynb. The code in this module is a cleaned-up version of the code in that notebook. The notebook contains additional comments and explanations of the code, which may be helpful for understanding the code in this module.
+
The basic process is as follows:
+
+
Load the netCDF data in memory
+
Statistically aggregate the hourly data to daily data per exposure using resample()
+
Write out the data to tiff
+
Read the tiff data back in
+
Read in the shapefile that defines the healthsheds
+
Spatially aggregate the exposure data to the healthsheds
+
Quality check the aggregations
+
Write out final aggregations to tiff
+
+
+
+Exported source
+
import tempfile
+import rasterio
+import hydra
+import argparse
+import os
+
+import pandas as pd
+import geopandas as gpd
+import numpy as np
+import xarray as xr
+import matplotlib.pyplot as plt
+
+from dataclasses import dataclass, field
+from typing import Optional, Tuple
+from pyprojroot import here
+from hydra import initialize, compose
+from omegaconf import OmegaConf, DictConfig
+from tqdm import tqdm
+from math import ceil, floor
+from rasterstats.io import Raster
+from rasterstats.utils import boxify_points, rasterize_geom
+
+try: from era5_sandbox.core import GoogleDriver, _get_callable, describe, ClimateDataFileHandler, kelvin_to_celsius
+except: from core import GoogleDriver, _get_callable, describe, ClimateDataFileHandler, kelvin_to_celsius
We’re going to write a function that aggregates the data for a single exposure from a file. This file should be the single month data we got from the previous step in the pipeline.
resample_netcdf (fpath:str, resample:str='1D', agg_func:<built-
+ infunctioncallable>=<function mean at 0x145cb6c3b930>,
+ time_dim:str='valid_time', **xr_open_kwargs)
+
+
*Resample a netCDF file to a specified frequency and aggregation method.
+
Args: fpath (str): Path to the netCDF file. resample (str): Resampling frequency (e.g., ‘1H’, ‘1D’). agg_func (callable): Aggregation function (e.g., np.mean, np.sum).
+
Returns: xarray.Dataset: Resampled dataset.*
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
fpath
+
str
+
+
Path to the netCDF file.
+
+
+
resample
+
str
+
1D
+
Resampling frequency (e.g., ‘1H’, ‘1D’)
+
+
+
agg_func
+
callable
+
mean
+
Aggregation function (e.g., np.mean, np.sum).
+
+
+
time_dim
+
str
+
valid_time
+
Name of the time dimension in the dataset.
+
+
+
xr_open_kwargs
+
VAR_KEYWORD
+
+
+
+
+
Returns
+
Dataset
+
+
keywords for python’s xarray module
+
+
+
+
We pull the aggregation function from the config file:
+
+
var ='swvl1'
+agg_func = _get_callable(cfg['aggregation']['aggregation'][var]['hourly_to_daily'][0]['function'])
+
+
+
with ClimateDataFileHandler(eg_file) as handler:
+
+ ds_path = handler.get_dataset("instant")
+ resampled_data = resample_netcdf(ds_path, agg_func=agg_func)
+
+
I’m going to use a dataclass to represent the tiff data. This will allow us to easily pass around the data and metadata associated with the tiff file. Why? I’ve never used dataclasses and I’m curious about them — ChatGPT thinks this will make the code cleaner and easier to read.
The day to rasterise; 1 indexed just like human english
+
+
+
variable
+
str
+
+
The variable name to convert.
+
+
+
crs
+
str
+
EPSG:4326
+
Coordinate reference system (default is WGS84).
+
+
+
+
+
+Exported source
+
def netcdf_to_tiff(
+ ds: xr.Dataset, # The aggregated xarray dataset to convert.
+ band: int, # The day to rasterise; 1 indexed just like human english
+ variable: str, # The variable name to convert.
+ crs: str="EPSG:4326", # Coordinate reference system (default is WGS84).
+ ):
+
+"""
+ Convert a netCDF file to a GeoTIFF file.
+ """
+
+with tempfile.TemporaryDirectory() as tmpdirname:
+
+# Select the variable and time index
+ variable = ds[variable]
+ ds_ = variable.rio.set_spatial_dims(x_dim="longitude", y_dim="latitude")
+ ds_ = ds_.rio.write_crs(crs)
+# Save as GeoTIFF
+ ds_.rio.to_raster(f"{tmpdirname}/output.tif")
+# Load the raster file
+ raster_file = RasterFile(path=f"{tmpdirname}/output.tif", band=band).load()
+
+return raster_file
Super cool! The tiff file is created and the data is read back in correctly. Now we can move on to the next step, which is to aggregate the data by healthshed.
+
+
+
+
Polygon to Raster Cells
+
This function was initially shared from a previous NSAPH aggregation pipeline here. To better understand this, here is a ChatGPT explanation of the code:
+
+
This function, polygon_to_raster_cells, is doing a crucial first step in spatial alignment: it determines which raster cells are “touched” by each polygon geometry (e.g., administrative areas, watersheds, etc.).
+Essentially, this function helps figure out which pixels from a raster image fall inside each polygon (like a district, region, or shape). It does this by looking at each polygon one by one, zooming in on just the part of the raster that overlaps with that shape, and marking the pixels that are inside. This is kind of like placing a cookie cutter (the polygon) on a pixelated map (the raster) and seeing which pixels get cut.
+The result is a list where each item tells you the pixel locations that match a specific polygon. You can then use those pixel locations to pull out data from the raster, like temperatures or rainfall, and calculate statistics (like the average) for each shape. This is a key step when you want to summarize raster data within specific regions, like figuring out the average temperature in each county or how much vegetation is in each park.
Returns an index map for each vector geometry to indices in the raster source.
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
vectors
+
+
+
list of geometries from a shapefile
+
+
+
raster
+
+
+
the raster data as a numpy array
+
+
+
nodata
+
NoneType
+
None
+
the nodata value of the raster
+
+
+
affine
+
NoneType
+
None
+
the affine transform of the raster
+
+
+
all_touched
+
bool
+
False
+
whether to include all touched pixels
+
+
+
verbose
+
bool
+
False
+
+
+
+
kwargs
+
VAR_KEYWORD
+
+
+
+
+
Returns
+
list
+
+
A dictionary mapping vector the ids of geometries to locations (indices) in the raster source.
+
+
+
+
+
+Exported source
+
def polygon_to_raster_cells(
+ vectors, # list of geometries from a shapefile
+ raster, # the raster data as a numpy array
+ nodata=None, # the nodata value of the raster
+ affine=None, # the affine transform of the raster
+ all_touched=False, # whether to include all touched pixels
+ verbose=False,
+**kwargs,
+) ->list: # A dictionary mapping vector the ids of geometries to locations (indices) in the raster source.
+"""Returns an index map for each vector geometry to indices in the raster source."""
+
+ cell_map = []
+
+with Raster(raster, affine, nodata) as rast:
+# used later to crop raster and find start row and col
+ min_lon, dlon = affine.c, affine.a
+ max_lat, dlat = affine.f, -affine.e
+ H, W = rast.shape
+
+for geom in tqdm(vectors, disable=(not verbose)):
+if"Point"in geom.geom_type:
+ geom = boxify_points(geom, rast)
+
+# find geometry bounds to crop raster
+# the raster and geometry must be in the same lon/lat coordinate system
+ start_row =max(0, min(H -1, floor((max_lat - geom.bounds[3]) / dlat)))
+ start_col =min(W -1, max(0, floor((geom.bounds[0] - min_lon) / dlon)))
+ end_col =max(0, min(W -1, ceil((geom.bounds[2] - min_lon) / dlon)))
+ end_row =min(H -1, max(0, ceil((max_lat - geom.bounds[1]) / dlat)))
+ geom_bounds = (
+ min_lon + dlon * start_col, # left
+ max_lat - dlat * end_row -1e-12, # bottom
+ min_lon + dlon * end_col +1e-12, # right
+ max_lat - dlat * start_row, # top
+ )
+
+# crop raster to area of interest and rasterize
+ fsrc = rast.read(bounds=geom_bounds)
+ rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched)
+ indices = np.nonzero(rv_array)
+
+iflen(indices[0]) >0:
+ indices = (indices[0] + start_row, indices[1] + start_col)
+assert0<= indices[0].min() < rast.shape[0]
+assert0<= indices[1].min() < rast.shape[1]
+else:
+pass# stop here for debug
+
+ cell_map.append(indices)
+
+return cell_map
+
+
+
To use this, we must define the polygon and raster data. The polygon data is the healthshed shapefile, and the raster data is the tiff file we created earlier. We can use the GoogleDriver class we defined in core to read in the shapefile.
res_poly2cell=polygon_to_raster_cells(
+ vectors = healthsheds.geometry.values, # the geometries of the shapefile of the regions
+ raster=resampled_tiff.data, # the raster data above
+ nodata=resampled_tiff.nodata, # any intersections with no data, may have to be np.nan
+ affine=resampled_tiff.transform, # some math thing need to revise
+ all_touched=True,
+ verbose=True
+)
+
+
The data below maps which grid entries fall into each of the regions in the shapefile (e.g. which pixel is in which state)
+
+
res_poly2cell[:5]
+
+
Last but not least, we aggregate these data to the healthshed level. We can use the rasterstats package to do this.
the unique identifier column name of the health sheds
+
+
+
aggregation_func
+
callable
+
nanmean
+
the aggregation function
+
+
+
aggregation_name
+
str
+
mean
+
the name of the aggregation function
+
+
+
Returns
+
GeoDataFrame
+
+
+
+
+
+
+
+Exported source
+
def aggregate_to_healthsheds(
+ res_poly2cell: list, # the result of polygon_to_raster_cells
+ raster: RasterFile, # the raster data
+ shapes: gpd.GeoDataFrame, # the shapes of the health sheds
+ names_column: str="fs_uid", # the unique identifier column name of the health sheds
+ aggregation_func: callable= np.nanmean, # the aggregation function
+ aggregation_name: str="mean"# the name of the aggregation function
+ ) -> gpd.GeoDataFrame:
+"""
+ Aggregate the raster data to the health sheds.
+ """
+
+ stats = []
+
+for indices in res_poly2cell:
+iflen(indices[0]) ==0:
+# no cells found for this polygon
+ stats.append(np.nan)
+else:
+ cells = raster.data[indices]
+ifsum(~np.isnan(cells)) ==0:
+# no valid cells found for this polygon
+ stats.append(np.nan)
+continue
+else:
+# compute MEAN of valid cells
+# but this stat can be ANYTHING
+ stats.append(aggregation_func(cells))
+
+# clean up the result into a dataframe
+ stats = pd.Series(stats)
+ shapes[aggregation_name] = stats
+ df = pd.DataFrame(
+ {"healthshed": shapes[names_column], aggregation_name: stats}
+ )
+ gdf = gpd.GeoDataFrame(df, geometry=shapes.geometry.values, crs=shapes.crs)
+return gdf
result.plot(column="mean_soil_moisture", legend=True)
+plt.title("Mean Soil Moisture (m^3 m^-3) by Health Shed Nov 2017 day 1")
+plt.show()
+
+
That looks great! The data is aggregated to the healthshed level, and we can see the differences in exposure across the healthsheds. We can also see that the data is not uniform across the healthsheds, which is what we expect.
+
+
+
+
Tests and Main
+
Now we can wrap this up in a main function that will simply take in the input file and generate this output. We can also add some tests to make sure the data is aggregated correctly; tests will run automatically in this notebook.
+
+
import random
+
+
+
# variables = ["t2m", "d2m"]
+# years = ["20{:02d}".format(m) for m in range(9, 24)]
+# months = [str(m) for m in range(1, 13)]
+# aggregations = [
+# ("Mean", np.nanmean),
+# ("Max", np.nanmax),
+# ("Min", np.nanmin)
+# ]
+
+# exposure_variable = random.choice(variables)
+# year = random.choice(years)
+# month = random.choice(months)
+# aggregation_str, agg_func = random.choice(aggregations)
+# input_file = here() / "data/input/{}_{}.nc".format(year, month)
+
+# with initialize(version_base=None, config_path="../conf"):
+# cfg = compose(config_name='config.yaml')
+
+# driver = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)
+# drive = driver.get_drive()
+# healthsheds = driver.read_healthsheds(cfg.GOOGLE_DRIVE_AUTH_JSON.healthsheds_id)
+
+# with ClimateDataFileHandler(input_file) as handler:
+# ds_path = handler.get_dataset("instant")
+# resampled_nc_file = resample_netcdf(ds_path, agg_func=agg_func)
+
+# days = len(resampled_nc_file.valid_time.values)
+# day = random.choice(range(1, days + 1))
+
+# resampled_tiff = netcdf_to_tiff(
+# ds=resampled_nc_file,
+# band=day, # the day we're aggregating
+# variable=exposure_variable,
+# crs="EPSG:4326"
+# )
+
+# res_poly2cell=polygon_to_raster_cells(
+# vectors = healthsheds.geometry.values, # the geometries of the shapefile of the regions
+# raster=resampled_tiff.data, # the raster data above
+# nodata=resampled_tiff.nodata, # any intersections with no data, may have to be np.nan
+# affine=resampled_tiff.transform, # some math thing need to revise
+# all_touched=True,
+# verbose=True
+# )
+
+# result = aggregate_to_healthsheds(
+# res_poly2cell=res_poly2cell,
+# raster=resampled_tiff,
+# shapes=healthsheds,
+# names_column="fs_uid",
+# aggregation_func=agg_func,
+# aggregation_name=exposure_variable
+# )
+
+# result.plot(column=exposure_variable, legend=True)
+# plt.title("{} {} (K) by Health Shed {}".format(aggregation_str, exposure_variable, input_file.stem))
+# plt.suptitle("Aggregation: {}, Day: {}".format(aggregation_str, str(day)))
+# plt.show()
+
+
+
+
+
+
+
+Note
+
+
+
+
Note: The above code is commented out to prevent execution during documentation generation. You can uncomment and run it in an appropriate environment to test the aggregation process.
+
+
+
+
+
\ No newline at end of file
diff --git a/_docs/02_aggregate.md b/_docs/02_aggregate.md
new file mode 100644
index 0000000..0291c2a
--- /dev/null
+++ b/_docs/02_aggregate.md
@@ -0,0 +1,988 @@
+# Aggregate Module: Spatial Aggregation to Healthsheds
+
+
+## aggregate
+
+> This module aggregates the downloaded data into the respective output
+> dataframes.
+
+
+
+We prototyped the code in this module using a Jupyter notebook. The
+notebook is available in
+`notes/prototypes/learning_aggregations_w_michelle_20250328.ipynb`. The
+code in this module is a cleaned-up version of the code in that
+notebook. The notebook contains additional comments and explanations of
+the code, which may be helpful for understanding the code in this
+module.
+
+The basic process is as follows:
+
+1. Load the netCDF data in memory
+2. Statistically aggregate the hourly data to daily data per exposure
+ using resample()
+3. Write out the data to tiff
+4. Read the tiff data back in
+5. Read in the shapefile that defines the healthsheds
+6. Spatially aggregate the exposure data to the healthsheds
+7. Quality check the aggregations
+8. Write out final aggregations to tiff
+
+
+Exported source
+
+``` python
+import tempfile
+import rasterio
+import hydra
+import argparse
+import os
+
+import pandas as pd
+import geopandas as gpd
+import numpy as np
+import xarray as xr
+import matplotlib.pyplot as plt
+
+from dataclasses import dataclass, field
+from typing import Optional, Tuple
+from pyprojroot import here
+from hydra import initialize, compose
+from omegaconf import OmegaConf, DictConfig
+from tqdm import tqdm
+from math import ceil, floor
+from rasterstats.io import Raster
+from rasterstats.utils import boxify_points, rasterize_geom
+
+try: from era5_sandbox.core import GoogleDriver, _get_callable, describe, ClimateDataFileHandler, kelvin_to_celsius
+except: from core import GoogleDriver, _get_callable, describe, ClimateDataFileHandler, kelvin_to_celsius
+```
+
+
+
+``` python
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+```
+
+We’re going to write a function that aggregates the data for a single
+exposure from a file. This file should be the single month data we got
+from the previous step in the pipeline.
+
+``` python
+eg_file = here() / "bld/2009_01_nepal.nc"
+```
+
+------------------------------------------------------------------------
+
+source
+
+### resample_netcdf
+
+> resample_netcdf (fpath:str, resample:str='1D', agg_func: infunctioncallable>=,
+> time_dim:str='valid_time', **xr_open_kwargs)
+
+\*Resample a netCDF file to a specified frequency and aggregation
+method.
+
+Args: fpath (str): Path to the netCDF file. resample (str): Resampling
+frequency (e.g., ‘1H’, ‘1D’). agg_func (callable): Aggregation function
+(e.g., np.mean, np.sum).
+
+Returns: xarray.Dataset: Resampled dataset.\*
+
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
fpath
+
str
+
+
Path to the netCDF file.
+
+
+
resample
+
str
+
1D
+
Resampling frequency (e.g., ‘1H’, ‘1D’)
+
+
+
agg_func
+
callable
+
mean
+
Aggregation function (e.g., np.mean, np.sum).
+
+
+
time_dim
+
str
+
valid_time
+
Name of the time dimension in the dataset.
+
+
+
xr_open_kwargs
+
VAR_KEYWORD
+
+
+
+
+
Returns
+
Dataset
+
+
keywords for python’s xarray module
+
+
+
+
+We pull the aggregation function from the config file:
+
+``` python
+var = 'swvl1'
+agg_func = _get_callable(cfg['aggregation']['aggregation'][var]['hourly_to_daily'][0]['function'])
+```
+
+``` python
+with ClimateDataFileHandler(eg_file) as handler:
+
+ ds_path = handler.get_dataset("instant")
+ resampled_data = resample_netcdf(ds_path, agg_func=agg_func)
+```
+
+I’m going to use a dataclass to represent the tiff data. This will allow
+us to easily pass around the data and metadata associated with the tiff
+file. Why? I’ve never used dataclasses and I’m curious about them —
+ChatGPT thinks this will make the code cleaner and easier to read.
+
+------------------------------------------------------------------------
+
+source
+
+### RasterFile
+
+> RasterFile (path:str, band:int)
+
+
+Exported source
+
+``` python
+@dataclass
+class RasterFile:
+ path: str
+ band: int # note that this is 1-indexed
+ data: Optional[np.ndarray] = field(default=None, init=False)
+ transform: Optional[rasterio.Affine] = field(default=None, init=False)
+ crs: Optional[str] = field(default=None, init=False)
+ nodata: Optional[float] = field(default=None, init=False)
+ bounds: Optional[Tuple[float, float, float, float]] = field(default=None, init=False)
+
+ def load(self):
+ """Load raster data and basic metadata."""
+ with rasterio.open(self.path) as src:
+ self.data = src.read(self.band) # each day gets one rasterfile
+ self.transform = src.transform
+ self.crs = src.crs
+ self.nodata = src.nodata
+ self.bounds = src.bounds
+ return self
+
+ def shape(self) -> Optional[Tuple[int, int]]:
+ """Return the shape of the raster data."""
+ return self.data.shape if self.data is not None else None
+
+ def __str__(self):
+ return f"RasterFile(path='{self.path}', shape={self.shape()}, crs='{self.crs}')"
+```
+
+
+
+Next, a function to write and read the netCDF to tiff:
+
+------------------------------------------------------------------------
+
+source
+
+### netcdf_to_tiff
+
+> netcdf_to_tiff (ds:xarray.core.dataset.Dataset, band:int, variable:str,
+> crs:str='EPSG:4326')
+
+*Convert a netCDF file to a GeoTIFF file.*
+
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
ds
+
Dataset
+
+
The aggregated xarray dataset to convert.
+
+
+
band
+
int
+
+
The day to rasterise; 1 indexed just like human english
+
+
+
variable
+
str
+
+
The variable name to convert.
+
+
+
crs
+
str
+
EPSG:4326
+
Coordinate reference system (default is WGS84).
+
+
+
+
+
+Exported source
+
+``` python
+def netcdf_to_tiff(
+ ds: xr.Dataset, # The aggregated xarray dataset to convert.
+ band: int, # The day to rasterise; 1 indexed just like human english
+ variable: str, # The variable name to convert.
+ crs: str = "EPSG:4326", # Coordinate reference system (default is WGS84).
+ ):
+
+ """
+ Convert a netCDF file to a GeoTIFF file.
+ """
+
+ with tempfile.TemporaryDirectory() as tmpdirname:
+
+ # Select the variable and time index
+ variable = ds[variable]
+ ds_ = variable.rio.set_spatial_dims(x_dim="longitude", y_dim="latitude")
+ ds_ = ds_.rio.write_crs(crs)
+ # Save as GeoTIFF
+ ds_.rio.to_raster(f"{tmpdirname}/output.tif")
+ # Load the raster file
+ raster_file = RasterFile(path=f"{tmpdirname}/output.tif", band=band).load()
+
+ return raster_file
+```
+
+
+
+Now to test it:
+
+``` python
+with ClimateDataFileHandler(eg_file) as handler:
+ ds_path = handler.get_dataset("instant")
+ resampled_nc = resample_netcdf(ds_path)
+
+print(resampled_nc)
+resampled_tiff = netcdf_to_tiff(
+ ds=resampled_nc,
+ band=28,
+ variable="swvl1",
+ crs="EPSG:4326"
+)
+```
+
+``` python
+resampled_tiff.data.shape, resampled_tiff.transform, resampled_tiff.crs, resampled_tiff.bounds
+```
+
+Super cool! The tiff file is created and the data is read back in
+correctly. Now we can move on to the next step, which is to aggregate
+the data by healthshed.
+
+## Polygon to Raster Cells
+
+This function was initially shared from a previous NSAPH aggregation
+pipeline
+[here](https://github.com/NSAPH-Data-Processing/air_pollution__aqdh/blob/2a8109075fe7a8fbf7c435cc34ffa97b63f5e133/utils/faster_zonal_stats.py#L17).
+To better understand this, here is a ChatGPT explanation of the code:
+
+> This function,
+> [`polygon_to_raster_cells`](https://TinasheMTapera.github.io/era5_sandbox/aggregate.html#polygon_to_raster_cells),
+> is doing a crucial first step in spatial alignment: it determines
+> which raster cells are “touched” by each polygon geometry (e.g.,
+> administrative areas, watersheds, etc.).
+> Essentially, this function helps figure out which pixels from a raster
+> image fall inside each polygon (like a district, region, or shape). It
+> does this by looking at each polygon one by one, zooming in on just
+> the part of the raster that overlaps with that shape, and marking the
+> pixels that are inside. This is kind of like placing a cookie cutter
+> (the polygon) on a pixelated map (the raster) and seeing which pixels
+> get cut.
+> The result is a list where each item tells you the pixel locations
+> that match a specific polygon. You can then use those pixel locations
+> to pull out data from the raster, like temperatures or rainfall, and
+> calculate statistics (like the average) for each shape. This is a key
+> step when you want to summarize raster data within specific regions,
+> like figuring out the average temperature in each county or how much
+> vegetation is in each park.
+
+------------------------------------------------------------------------
+
+source
+
+### polygon_to_raster_cells
+
+> polygon_to_raster_cells (vectors, raster, nodata=None, affine=None,
+> all_touched=False, verbose=False, **kwargs)
+
+*Returns an index map for each vector geometry to indices in the raster
+source.*
+
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
vectors
+
+
+
list of geometries from a shapefile
+
+
+
raster
+
+
+
the raster data as a numpy array
+
+
+
nodata
+
NoneType
+
None
+
the nodata value of the raster
+
+
+
affine
+
NoneType
+
None
+
the affine transform of the raster
+
+
+
all_touched
+
bool
+
False
+
whether to include all touched pixels
+
+
+
verbose
+
bool
+
False
+
+
+
+
kwargs
+
VAR_KEYWORD
+
+
+
+
+
Returns
+
list
+
+
A dictionary mapping vector the ids of geometries to
+locations (indices) in the raster source.
+
+
+
+
+
+Exported source
+
+``` python
+def polygon_to_raster_cells(
+ vectors, # list of geometries from a shapefile
+ raster, # the raster data as a numpy array
+ nodata=None, # the nodata value of the raster
+ affine=None, # the affine transform of the raster
+ all_touched=False, # whether to include all touched pixels
+ verbose=False,
+ **kwargs,
+) -> list: # A dictionary mapping vector the ids of geometries to locations (indices) in the raster source.
+ """Returns an index map for each vector geometry to indices in the raster source."""
+
+ cell_map = []
+
+ with Raster(raster, affine, nodata) as rast:
+ # used later to crop raster and find start row and col
+ min_lon, dlon = affine.c, affine.a
+ max_lat, dlat = affine.f, -affine.e
+ H, W = rast.shape
+
+ for geom in tqdm(vectors, disable=(not verbose)):
+ if "Point" in geom.geom_type:
+ geom = boxify_points(geom, rast)
+
+ # find geometry bounds to crop raster
+ # the raster and geometry must be in the same lon/lat coordinate system
+ start_row = max(0, min(H - 1, floor((max_lat - geom.bounds[3]) / dlat)))
+ start_col = min(W - 1, max(0, floor((geom.bounds[0] - min_lon) / dlon)))
+ end_col = max(0, min(W - 1, ceil((geom.bounds[2] - min_lon) / dlon)))
+ end_row = min(H - 1, max(0, ceil((max_lat - geom.bounds[1]) / dlat)))
+ geom_bounds = (
+ min_lon + dlon * start_col, # left
+ max_lat - dlat * end_row - 1e-12, # bottom
+ min_lon + dlon * end_col + 1e-12, # right
+ max_lat - dlat * start_row, # top
+ )
+
+ # crop raster to area of interest and rasterize
+ fsrc = rast.read(bounds=geom_bounds)
+ rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched)
+ indices = np.nonzero(rv_array)
+
+ if len(indices[0]) > 0:
+ indices = (indices[0] + start_row, indices[1] + start_col)
+ assert 0 <= indices[0].min() < rast.shape[0]
+ assert 0 <= indices[1].min() < rast.shape[1]
+ else:
+ pass # stop here for debug
+
+ cell_map.append(indices)
+
+ return cell_map
+```
+
+
+
+To use this, we must define the polygon and raster data. The polygon
+data is the healthshed shapefile, and the raster data is the tiff file
+we created earlier. We can use the
+[`GoogleDriver`](https://TinasheMTapera.github.io/era5_sandbox/core.html#googledriver)
+class we defined in `core` to read in the shapefile.
+
+``` python
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+
+driver = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)
+drive = driver.get_drive()
+healthsheds = driver.read_healthsheds("Nepal_Healthsheds2024.zip")
+```
+
+``` python
+res_poly2cell=polygon_to_raster_cells(
+ vectors = healthsheds.geometry.values, # the geometries of the shapefile of the regions
+ raster=resampled_tiff.data, # the raster data above
+ nodata=resampled_tiff.nodata, # any intersections with no data, may have to be np.nan
+ affine=resampled_tiff.transform, # some math thing need to revise
+ all_touched=True,
+ verbose=True
+)
+```
+
+The data below maps which grid entries fall into each of the regions in
+the shapefile (e.g. which pixel is in which state)
+
+``` python
+res_poly2cell[:5]
+```
+
+Last but not least, we aggregate these data to the healthshed level. We
+can use the `rasterstats` package to do this.
+
+------------------------------------------------------------------------
+
+source
+
+### aggregate_to_healthsheds
+
+> aggregate_to_healthsheds (res_poly2cell:list, raster:__main__.RasterFile,
+> shapes:geopandas.geodataframe.GeoDataFrame,
+> names_column:str='fs_uid',
+> aggregation_func: infunctioncallable>= 0x145cb6bbbdf0>, aggregation_name:str='mean')
+
+*Aggregate the raster data to the health sheds.*
+
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
res_poly2cell
+
list
+
+
the result of polygon_to_raster_cells
+
+
+
raster
+
RasterFile
+
+
the raster data
+
+
+
shapes
+
GeoDataFrame
+
+
the shapes of the health sheds
+
+
+
names_column
+
str
+
fs_uid
+
the unique identifier column name of the health sheds
+
+
+
aggregation_func
+
callable
+
nanmean
+
the aggregation function
+
+
+
aggregation_name
+
str
+
mean
+
the name of the aggregation function
+
+
+
Returns
+
GeoDataFrame
+
+
+
+
+
+
+
+Exported source
+
+``` python
+def aggregate_to_healthsheds(
+ res_poly2cell: list, # the result of polygon_to_raster_cells
+ raster: RasterFile, # the raster data
+ shapes: gpd.GeoDataFrame, # the shapes of the health sheds
+ names_column: str = "fs_uid", # the unique identifier column name of the health sheds
+ aggregation_func: callable = np.nanmean, # the aggregation function
+ aggregation_name: str = "mean" # the name of the aggregation function
+ ) -> gpd.GeoDataFrame:
+ """
+ Aggregate the raster data to the health sheds.
+ """
+
+ stats = []
+
+ for indices in res_poly2cell:
+ if len(indices[0]) == 0:
+ # no cells found for this polygon
+ stats.append(np.nan)
+ else:
+ cells = raster.data[indices]
+ if sum(~np.isnan(cells)) == 0:
+ # no valid cells found for this polygon
+ stats.append(np.nan)
+ continue
+ else:
+ # compute MEAN of valid cells
+ # but this stat can be ANYTHING
+ stats.append(aggregation_func(cells))
+
+ # clean up the result into a dataframe
+ stats = pd.Series(stats)
+ shapes[aggregation_name] = stats
+ df = pd.DataFrame(
+ {"healthshed": shapes[names_column], aggregation_name: stats}
+ )
+ gdf = gpd.GeoDataFrame(df, geometry=shapes.geometry.values, crs=shapes.crs)
+ return gdf
+```
+
+
+
+And now we apply it:
+
+``` python
+result = aggregate_to_healthsheds(
+ res_poly2cell=res_poly2cell,
+ raster=resampled_tiff,
+ shapes=healthsheds,
+ names_column="fid",
+ aggregation_func=np.nanmean,
+ aggregation_name="mean_soil_moisture"
+)
+result.head()
+```
+
+And plot for QA:
+
+``` python
+result.plot(column="mean_soil_moisture", legend=True)
+plt.title("Mean Soil Moisture (m^3 m^-3) by Health Shed Nov 2017 day 1")
+plt.show()
+```
+
+That looks great! The data is aggregated to the healthshed level, and we
+can see the differences in exposure across the healthsheds. We can also
+see that the data is not uniform across the healthsheds, which is what
+we expect.
+
+## Tests and Main
+
+Now we can wrap this up in a main function that will simply take in the
+input file and generate this output. We can also add some tests to make
+sure the data is aggregated correctly; tests will run automatically in
+this notebook.
+
+``` python
+import random
+```
+
+``` python
+# variables = ["t2m", "d2m"]
+# years = ["20{:02d}".format(m) for m in range(9, 24)]
+# months = [str(m) for m in range(1, 13)]
+# aggregations = [
+# ("Mean", np.nanmean),
+# ("Max", np.nanmax),
+# ("Min", np.nanmin)
+# ]
+
+# exposure_variable = random.choice(variables)
+# year = random.choice(years)
+# month = random.choice(months)
+# aggregation_str, agg_func = random.choice(aggregations)
+# input_file = here() / "data/input/{}_{}.nc".format(year, month)
+
+# with initialize(version_base=None, config_path="../conf"):
+# cfg = compose(config_name='config.yaml')
+
+# driver = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)
+# drive = driver.get_drive()
+# healthsheds = driver.read_healthsheds(cfg.GOOGLE_DRIVE_AUTH_JSON.healthsheds_id)
+
+# with ClimateDataFileHandler(input_file) as handler:
+# ds_path = handler.get_dataset("instant")
+# resampled_nc_file = resample_netcdf(ds_path, agg_func=agg_func)
+
+# days = len(resampled_nc_file.valid_time.values)
+# day = random.choice(range(1, days + 1))
+
+# resampled_tiff = netcdf_to_tiff(
+# ds=resampled_nc_file,
+# band=day, # the day we're aggregating
+# variable=exposure_variable,
+# crs="EPSG:4326"
+# )
+
+# res_poly2cell=polygon_to_raster_cells(
+# vectors = healthsheds.geometry.values, # the geometries of the shapefile of the regions
+# raster=resampled_tiff.data, # the raster data above
+# nodata=resampled_tiff.nodata, # any intersections with no data, may have to be np.nan
+# affine=resampled_tiff.transform, # some math thing need to revise
+# all_touched=True,
+# verbose=True
+# )
+
+# result = aggregate_to_healthsheds(
+# res_poly2cell=res_poly2cell,
+# raster=resampled_tiff,
+# shapes=healthsheds,
+# names_column="fs_uid",
+# aggregation_func=agg_func,
+# aggregation_name=exposure_variable
+# )
+
+# result.plot(column=exposure_variable, legend=True)
+# plt.title("{} {} (K) by Health Shed {}".format(aggregation_str, exposure_variable, input_file.stem))
+# plt.suptitle("Aggregation: {}, Day: {}".format(aggregation_str, str(day)))
+# plt.show()
+```
+
+
+
+> **Note**
+>
+> **Note:** The above code is commented out to prevent execution during
+> documentation generation. You can uncomment and run it in an
+> appropriate environment to test the aggregation process.
+
+
+
+3.2 seconds per aggregation is pretty cool!
+
+``` python
+result.to_parquet(here() / "data/testing/test_aggregation.parquet")
+```
+
+------------------------------------------------------------------------
+
+source
+
+### aggregate_data
+
+> aggregate_data (cfg:omegaconf.dictconfig.DictConfig, input_file:str,
+> output_file:str, exposure_variable:str)
+
+*Aggregate raster data day-by-day and store all days and statistics as
+separate columns in a single Parquet file.*
+
+
+
+
+
+
Type
+
Details
+
+
+
+
+
cfg
+
DictConfig
+
the hydra config
+
+
+
input_file
+
str
+
the input netcdf file
+
+
+
output_file
+
str
+
the output parquet file
+
+
+
exposure_variable
+
str
+
Which variable in the dataset to aggregate
+
+
+
Returns
+
None
+
+
+
+
+
+
+Exported source
+
+``` python
+def aggregate_data(
+ cfg: DictConfig, # the hydra config
+ input_file: str, # the input netcdf file
+ output_file: str, # the output parquet file
+ exposure_variable: str # Which variable in the dataset to aggregate
+ ) -> None:
+ '''
+ Aggregate raster data day-by-day and store all days and statistics as separate columns in a single Parquet file.
+ '''
+
+ if cfg.development_mode:
+ describe(cfg)
+ return None
+
+ geography = cfg['query'].geography
+ year = cfg['query']['year']
+ month = cfg['query']['month']
+ daily_aggs = cfg['aggregation']['aggregation'][exposure_variable]['hourly_to_daily']
+ healthshed_aggs = cfg['aggregation']['aggregation'][exposure_variable]['daily_to_healthshed']
+
+ # Load healthsheds
+ driver = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)
+ drive = driver.get_drive()
+ healthsheds = driver.read_healthsheds(cfg.geographies[geography].healthsheds)
+
+ # Initialize output DataFrame
+ result_df = healthsheds[[cfg.geographies[geography].unique_id, "geometry"]].copy()
+
+ for daily_agg in daily_aggs:
+ print(f"Processing daily aggregation: {daily_agg['name']}...")
+
+ daily_agg_func = _get_callable(daily_agg['function'])
+
+ with ClimateDataFileHandler(input_file) as handler:
+ if exposure_variable in ["t2m", "d2m", "swvl1"]:
+ ds_path = handler.get_dataset("instant")
+ else:
+ ds_path = handler.get_dataset("accum")
+ resampled_nc_file = resample_netcdf(ds_path, agg_func=daily_agg_func)
+
+ for healthshed_agg in healthshed_aggs:
+ print(f"Aggregating to healthshed by: {healthshed_agg['name']}...")
+
+ # Get the number of days in the dataset
+ days = len(resampled_nc_file.valid_time.values)
+
+ # Get the aggregation function for healthshed
+ healthshed_agg_func = _get_callable(healthshed_agg['function'])
+ days = len(resampled_nc_file.valid_time.values)
+
+ for day in range(1, days + 1):
+ print(f"Processing day {day}...")
+
+ day_col = f"day_{day:02d}_daily_{daily_agg['name']}"
+ resampled_tiff = netcdf_to_tiff(
+ ds=resampled_nc_file,
+ band=day,
+ variable=exposure_variable,
+ crs="EPSG:4326"
+ )
+
+ result_poly2cell = polygon_to_raster_cells(
+ vectors=healthsheds.geometry.values,
+ raster=resampled_tiff.data,
+ nodata=resampled_tiff.nodata,
+ affine=resampled_tiff.transform,
+ all_touched=True,
+ verbose=True
+ )
+
+ res = aggregate_to_healthsheds(
+ res_poly2cell=result_poly2cell,
+ raster=resampled_tiff,
+ shapes=healthsheds,
+ names_column=cfg.geographies[geography].unique_id,
+ aggregation_func=healthshed_agg_func,
+ aggregation_name=exposure_variable
+ )
+
+ result_df[day_col] = res[exposure_variable]
+
+ print(f"Saving final monthly parquet file: {output_file}")
+ result_df.to_parquet(output_file, compression="snappy")
+ # return(result_df)
+```
+
+
+
+``` python
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+
+cfg.development_mode = False
+cfg.query['year'] = 2017
+cfg.query['month'] = 11
+cfg.query['geography'] = "nepal"
+
+variable = "swvl1"
+
+aggregate_data(cfg, here() / "bld/2017_11_nepal.nc", here() / "data/testing/test_nepal_aggregation.parquet", exposure_variable=variable)
+```
+
+``` python
+parquet_file = gpd.read_parquet(here() / "data/testing/test_nepal_aggregation.parquet")
+```
+
+``` python
+parquet_file
+```
+
+``` python
+parquet_file.plot(column="day_22_daily_mean", legend=True)
+```
+
+------------------------------------------------------------------------
+
+source
+
+### main
+
+> main (cfg:omegaconf.dictconfig.DictConfig)
+
+
+Exported source
+
+``` python
+@hydra.main(version_base=None, config_path="../../conf", config_name="config")
+def main(cfg: DictConfig) -> None:
+ # Parse command-line arguments
+ input_file = str(snakemake.input[0]) # First input file
+ output_file = str(snakemake.output[0])
+ geography = str(snakemake.params.geography)
+ aggregation_variable = str(snakemake.params.variable)
+
+ variables_dict = {
+ "2m_temperature": "t2m",
+ "2m_dewpoint_temperature": "d2m",
+ "volumetric_soil_water_layer_1": "swvl1",
+ "total_precipitation": "tp"
+ }
+
+ cfg['query']['geography'] = geography
+
+ aggregate_data(cfg, input_file=input_file, output_file=output_file, exposure_variable=variables_dict[aggregation_variable])
+```
+
+
diff --git a/_docs/03_publish.html b/_docs/03_publish.html
new file mode 100644
index 0000000..9793d35
--- /dev/null
+++ b/_docs/03_publish.html
@@ -0,0 +1,1098 @@
+
+
+
+
+
+
+
+
+
+Publish: Gather the Aggregated Data and Publish to DataVerse – era5_sandbox
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Publish: Gather the Aggregated Data and Publish to DataVerse
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
publish
+
+
This is the publish module for the ERA5 dataset pipeline. It defines a functions that make use of the pyDataverse library and API to publish our outputs to the Harvard Dataverse.
+
+
+
First, we’ll test out the API by pinging the Harvard DataVerse
#
+
+resp = search_api.search("Youth from Austria", data_type="dataset")
+results = resp.json()['data']['items']
+result = [x for x in results if"Youth from Austria"in x['name']][0]
+result
+
+
+
pid = result['global_id']
+
+
Now to look at the data we created using the NativeAPI again, and delete the dataset:
So, for the above file, we’ll end up with the LEGO path data/environmental/exposures_era5/healthshed_monthly/dewpoint_2024.parquet. In it, we should have the following columns:
+
healthshed_id year month day stat_1 stat_2 ... stat_n
+
This means we should read in all of the exposures for a single timepoint at once. I think the smart thing to do is use a glob string to gather all of the pertinent files. This will be the first function we export to the library:
Read in a list of geo dataframes from the same time frame and merge them
+
+
+
+
+
+
+
+
+
+
Type
+
Details
+
+
+
+
+
glob_string
+
str
+
string for the path to search for the pertinent files
+
+
+
polygon_id
+
str
+
the string signifying the healthshed ID of the polygon
+
+
+
exposure
+
str
+
the exposure name
+
+
+
Returns
+
list
+
+
+
+
+
+
+Exported source
+
def gather_exposure_geodataframes(
+ glob_string: str, # string for the path to search for the pertinent files
+ polygon_id: str, # the string signifying the healthshed ID of the polygon
+ exposure: str# the exposure name
+ )->list:
+"Read in a list of geo dataframes from the same time frame and merge them"
+
+# first get the initial one so we have the polygon ID and geometry
+ frames = glob.glob(str(glob_string))
+ initial_gdf=gpd.read_parquet(frames[0])
+ merged_df = []
+
+for f in tqdm(frames, desc="Processing files"):
+# read in as a regular dataframe by ignoring geometry
+ df = gpd.read_parquet(f).drop(["geometry"], axis=1)
+
+# get the year and month
+# Extract year and month
+ search_str =rf'_{exposure}_(\d{{4}})_(\d{{1,2}})\.parquet$'
+ match = re.search(search_str, f)
+
+if match:
+ year =int(match.group(1))
+ month =int(match.group(2))
+#print(f"Year: {year}, Month: {month}")
+else:
+raiseValueError(f"Could not extract year and month from filename: {search_str}{f}")
+
+ df['exposure'] = exposure
+ df['month'] = month
+ df['year'] = year
+
+# Step 1: Melt all day columns (leave 'month' and 'year' as identifiers)
+ df_long = df.melt(id_vars=[polygon_id, "exposure", "year", "month"], var_name="day_stat", value_name="value")
+
+# Step 2: Extract day and stat type from column names
+# Example column: "day_01_daily_mean"
+ df_long[["day", "stat"]] = df_long["day_stat"].str.extract(r"day_(\d{2})_daily_(mean|max|min|total)")
+
+# Optional: convert 'day' and month to integer
+ df_long["day"] = df_long["day"].astype(int)
+ df_long["month"] = df_long["month"].astype(int)
+
+# Drop the original combined column
+ df_long = df_long.drop(columns="day_stat")
+
+# Reorder columns
+ df_long = df_long[[polygon_id, "exposure", "year", "month", "day", "stat", "value"]]
+
+ df_long = df_long.sort_values(by=["year", "month", "day"])
+ df_clean = df_long.pivot(index=[polygon_id, "exposure", "year", "month", "day"], columns="stat", values="value").reset_index()
+ merged_df.append(df_clean)
+
+return [pd.concat(merged_df).reset_index(drop=True), initial_gdf[[polygon_id, "geometry"]]]
This returns one file with all of the geometries and one file with the statistics and exposures.
+
Now, with this, we can move on. The dataset was created in the UI and is available via search and test out how to upload it:
+
+
resp = search_api.search("ERA5", data_type="dataset")
+
+results = resp.json()['data']['items']
+
+result = [x for x in results if"ERA5"in x['name']][0]
+era5_pid = result['global_id']
+result
+
+
+
+Exported source
+
from pyDataverse.models import Datafile
+import os
+import pathlib
+
+
+
We’ll upload directly from file. In the case of ERA5 vs. LEGO, we store the file on disk as LEGO hierarchy, but to upload it to dataverse using a flat filename (since creating subdatasets to represent directories might be a bit of a hassle)
+
+
# assuming the file has a path on disk like:
+f_out ="environmental/exposures_era5/healthshed_daily/dewpoint_2024.parquet"
+os.makedirs(here() /"data"/"testing"/ os.path.dirname(f_out), exist_ok=True)
+aggregations, geo = merged
+aggregations.to_parquet(here() /"data"/"testing"/ f_out, index=False)
+
+datafile = Datafile()
+datafile.set({
+# the id of the era5 dataset
+"pid": era5_pid,
+# the path to the file on disk goes here
+"filename": str(here() /"data"/"testing"/ f_out),
+# use the "label" to name the file
+"label": f_out.replace("/", "-")
+})
Now, we just need a main function to upload this data. The final upload is one file per exposure per year, so these should be the variables we gather data for.
+
We should get some functionality to gather the groups of these files automatically, based on the hydra config:
@hydra.main(version_base=None, config_path="../../conf", config_name="config")
+def main(cfg: DictConfig) ->None:
+
+ variables_dict = {
+"2m_temperature": "t2m",
+"2m_dewpoint_temperature": "d2m",
+"volumetric_soil_water_layer_1": "swvl1",
+"total_precipitation": "tp"
+ }
+
+print(OmegaConf.to_yaml(cfg))
+
+#prep dataverse
+ api_token_file = here() /"sandbox/dataverse_api_key.yml"
+withopen(api_token_file, "r") as f:
+ apiconfig = yaml.load(f, Loader=yaml.BaseLoader)
+ api = NativeApi(apiconfig['base_url'], apiconfig['api_token'])
+ search_api = SearchApi(apiconfig['base_url'], apiconfig['api_token'])
+ resp = search_api.search("ERA5", data_type="dataset")
+
+ results = resp.json()['data']['items']
+
+ result = [x for x in results if"ERA5"in x['name']][0]
+ era5_pid = result['global_id']
+
+for geography in cfg.geographies:
+for year in cfg.query['year']:
+for variable, v in variables_dict.items():
+
+print(f"Processing {geography} for {variable} in {year}")
+ glob_string = here() /"data"/"intermediate"/f"*{geography}*{variable}*{year}*"
+print(f"Glob: {glob_string}")
+ polygon_id = cfg.geographies[geography]['unique_id']
+print(f"polygon_id: {polygon_id}")
+ merged = gather_exposure_geodataframes(glob_string, polygon_id, variable)
+print(merged[0].head())
+print(merged[1].head())
+
+ output_dir = here() /"data"/"output"
+
+ f_out =f"environmental/exposures_era5/healthshed_daily/{geography}_{v}_{year}.parquet"
+ os.makedirs(output_dir / os.path.dirname(f_out), exist_ok=True)
+ output_path = output_dir / f_out
+
+print(f"Writing to {output_path}")
+ merged[0].to_parquet(output_path, index=False)
+
+
+print(f"Uploading {f_out.replace('/', '-')} to Dataverse...")
+# upload to dataverse
+ datafile = Datafile()
+ datafile.set({
+"pid": era5_pid,
+"filename": str(output_path),
+"label": f_out.replace("/", "-")
+ })
+
+ resp = api.upload_datafile(era5_pid, output_path, datafile.json())
+assert resp.json()['status'] =="OK", f"Failed to upload datafile: {resp.text}"
+
+# also save the geometry for the region
+ merged[1].to_parquet(output_path.parent /f"{geography}_geometry.parquet", index=False)
+
+# and upload it to dataverse
+ datafile = Datafile()
+ datafile.set({
+"pid": era5_pid,
+"filename": str(output_path.parent /f"{geography}_geometry.parquet"),
+"label": f"{geography}_geometry.parquet"
+ })
+
+ resp = api.upload_datafile(era5_pid, output_path.parent /f"{geography}_geometry.parquet", datafile.json())
+assert resp.json()['status'] =="OK", f"Failed to upload geometry datafile: {resp.text}"
+
+print("All files processed and uploaded successfully.")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/_docs/03_publish.md b/_docs/03_publish.md
new file mode 100644
index 0000000..ee32f98
--- /dev/null
+++ b/_docs/03_publish.md
@@ -0,0 +1,511 @@
+# Publish: Gather the Aggregated Data and Publish to DataVerse
+
+
+## publish
+
+> This is the `publish` module for the ERA5 dataset pipeline. It defines
+> a functions that make use of the `pyDataverse` library and API to
+> publish our outputs to the Harvard Dataverse.
+
+
+
+First, we’ll test out the API by pinging the Harvard DataVerse
+
+
+Exported source
+
+``` python
+import hydra
+import yaml
+import json
+from tqdm import tqdm
+from pyprojroot import here
+```
+
+
+
+``` python
+api_token_file = here() / "sandbox/dataverse_api_key.yml"
+with open(api_token_file, "r") as f:
+ config = yaml.load(f, Loader=yaml.BaseLoader)
+```
+
+Now, following the [docs]() for the dataverse tutorial, load a NativeAPI
+up:
+
+
+Exported source
+
+``` python
+from pyDataverse.api import NativeApi
+```
+
+
+
+The NativeAPI is a catchall API object to be able to do general stuff:
+
+``` python
+api = NativeApi(config['base_url'], config['api_token'])
+resp=api.get_info_version()
+#resp.text()
+```
+
+``` python
+resp.json()
+```
+
+Looks good! Now that we know that it works, we can think more about how
+to publish data there.
+
+## Harvard Dataverse
+
+Let’s create a dummy dataset with the components we’re planning to
+upload, and then upload and promptly delete it.
+
+To do that, we must import the `models` module and create a Dataset
+object:
+
+``` python
+from pyDataverse.models import Dataset
+```
+
+``` python
+ds = Dataset()
+```
+
+This `ds` object is pretty straightforward since it doesn’t contain
+anything yet:
+
+``` python
+ds.get()
+```
+
+We can populate the object from the dummy data on the github repo:
+
+``` python
+from pyDataverse.utils import read_file
+from urllib.request import urlretrieve
+import tempfile
+```
+
+``` python
+# url for dummy data
+url = "https://raw.githubusercontent.com/gdcc/pyDataverse/refs/heads/main/tests/data/user-guide/dataset.json"
+
+
+with tempfile.NamedTemporaryFile(mode='w+') as tmp:
+ urlretrieve(url, tmp.name)
+ ds.from_json(read_file(tmp.name))
+```
+
+We have to validate the JSON correctly:
+
+``` python
+ds.validate_json()
+```
+
+Modifying it is easy:
+
+``` python
+ds.set({"title": "Youth from Austria 2005"})
+ds.get()
+```
+
+Now, to create the dataset we use the API:
+
+``` python
+# this is only run in interactive sessions for demo purposes
+resp = api.create_dataset(":root", ds.json())
+```
+
+If you caught the `resp` object, it contains the PID for the newly
+created dataset.
+
+However, if you didn’t you can use the SearchAPI to find it:
+
+
+Exported source
+
+``` python
+from pyDataverse.api import SearchApi
+```
+
+
+
+``` python
+search_api = SearchApi(config['base_url'], config['api_token'])
+```
+
+``` python
+#
+
+resp = search_api.search("Youth from Austria", data_type="dataset")
+results = resp.json()['data']['items']
+result = [x for x in results if "Youth from Austria" in x['name']][0]
+result
+```
+
+``` python
+pid = result['global_id']
+```
+
+Now to look at the data we created using the NativeAPI again, and delete
+the dataset:
+
+``` python
+uploaded_ds = api.get_dataset(pid)
+uploaded_ds.json()['data']
+
+resp = api.delete_dataset(pid)
+resp.json()
+```
+
+With that understanding, we can develop a quick module to do the
+following:
+
+1. Make the dataset LEGO Compatible
+2. Upload and publish the data to dataverse
+
+## LEGO Compatibility
+
+Let’s take an example file to use as a model for LEGO compatibility
+
+
+Exported source
+
+``` python
+import geopandas as gpd
+import pandas as pd
+import re
+import glob
+```
+
+
+
+``` python
+ex = gpd.read_parquet(here() / "bld/2009_06_madagascar_day_swvl1_mean.parquet")
+ex.describe()
+```
+
+We know that the LEGO data model should look like this:
+
+ /lego
+ ├──
+ │ ├── __
+ │ │ ├── __
+ │ │ │ ├── _yyyy.parquet
+
+So, for the above file, we’ll end up with the LEGO path
+`data/environmental/exposures_era5/healthshed_monthly/dewpoint_2024.parquet`.
+In it, we should have the following columns:
+
+ healthshed_id year month day stat_1 stat_2 ... stat_n
+
+This means we should read in all of the exposures for a single timepoint
+at once. I think the smart thing to do is use a glob string to gather
+all of the pertinent files. This will be the first function we export to
+the library:
+
+------------------------------------------------------------------------
+
+source
+
+### gather_exposure_geodataframes
+
+> gather_exposure_geodataframes (glob_string:str, polygon_id:str,
+> exposure:str)
+
+*Read in a list of geo dataframes from the same time frame and merge
+them*
+
+
+
+
+
+
+
+
+
+
+
Type
+
Details
+
+
+
+
+
glob_string
+
str
+
string for the path to search for the pertinent files
+
+
+
polygon_id
+
str
+
the string signifying the healthshed ID of the polygon
+
+
+
exposure
+
str
+
the exposure name
+
+
+
Returns
+
list
+
+
+
+
+
+
+Exported source
+
+``` python
+def gather_exposure_geodataframes(
+ glob_string: str, # string for the path to search for the pertinent files
+ polygon_id: str, # the string signifying the healthshed ID of the polygon
+ exposure: str # the exposure name
+ )-> list:
+ "Read in a list of geo dataframes from the same time frame and merge them"
+
+ # first get the initial one so we have the polygon ID and geometry
+ frames = glob.glob(str(glob_string))
+ initial_gdf=gpd.read_parquet(frames[0])
+ merged_df = []
+
+ for f in tqdm(frames, desc="Processing files"):
+ # read in as a regular dataframe by ignoring geometry
+ df = gpd.read_parquet(f).drop(["geometry"], axis=1)
+
+ # get the year and month
+ # Extract year and month
+ search_str = rf'_{exposure}_(\d{{4}})_(\d{{1,2}})\.parquet$'
+ match = re.search(search_str, f)
+
+ if match:
+ year = int(match.group(1))
+ month = int(match.group(2))
+ #print(f"Year: {year}, Month: {month}")
+ else:
+ raise ValueError(f"Could not extract year and month from filename: {search_str} {f}")
+
+ df['exposure'] = exposure
+ df['month'] = month
+ df['year'] = year
+
+ # Step 1: Melt all day columns (leave 'month' and 'year' as identifiers)
+ df_long = df.melt(id_vars=[polygon_id, "exposure", "year", "month"], var_name="day_stat", value_name="value")
+
+ # Step 2: Extract day and stat type from column names
+ # Example column: "day_01_daily_mean"
+ df_long[["day", "stat"]] = df_long["day_stat"].str.extract(r"day_(\d{2})_daily_(mean|max|min|total)")
+
+ # Optional: convert 'day' and month to integer
+ df_long["day"] = df_long["day"].astype(int)
+ df_long["month"] = df_long["month"].astype(int)
+
+ # Drop the original combined column
+ df_long = df_long.drop(columns="day_stat")
+
+ # Reorder columns
+ df_long = df_long[[polygon_id, "exposure", "year", "month", "day", "stat", "value"]]
+
+ df_long = df_long.sort_values(by=["year", "month", "day"])
+ df_clean = df_long.pivot(index=[polygon_id, "exposure", "year", "month", "day"], columns="stat", values="value").reset_index()
+ merged_df.append(df_clean)
+
+ return [pd.concat(merged_df).reset_index(drop=True), initial_gdf[[polygon_id, "geometry"]]]
+```
+
+
+
+``` python
+frames = here() / "data" / "testing" / "*madagascar*"
+
+merged = gather_exposure_geodataframes(frames, "fs_uid", "2m_dewpoint_temperature")
+merged[0].describe()
+```
+
+This returns one file with all of the geometries and one file with the
+statistics and exposures.
+
+Now, with this, we can move on. The dataset was created in the UI and is
+available via search and test out how to upload it:
+
+``` python
+resp = search_api.search("ERA5", data_type="dataset")
+
+results = resp.json()['data']['items']
+
+result = [x for x in results if "ERA5" in x['name']][0]
+era5_pid = result['global_id']
+result
+```
+
+
+Exported source
+
+``` python
+from pyDataverse.models import Datafile
+import os
+import pathlib
+```
+
+
+
+We’ll upload directly from file. In the case of ERA5 vs. LEGO, we store
+the file on disk as LEGO hierarchy, but to upload it to dataverse using
+a flat filename (since creating subdatasets to represent directories
+might be a bit of a hassle)
+
+``` python
+# assuming the file has a path on disk like:
+f_out = "environmental/exposures_era5/healthshed_daily/dewpoint_2024.parquet"
+os.makedirs(here() / "data" / "testing" / os.path.dirname(f_out), exist_ok=True)
+aggregations, geo = merged
+aggregations.to_parquet(here() / "data" / "testing" / f_out, index=False)
+
+datafile = Datafile()
+datafile.set({
+ # the id of the era5 dataset
+ "pid": era5_pid,
+ # the path to the file on disk goes here
+ "filename": str(here() / "data" / "testing" / f_out),
+ # use the "label" to name the file
+ "label": f_out.replace("/", "-")
+})
+```
+
+``` python
+resp = api.upload_datafile(era5_pid, str(here() / "data" / "testing" / f_out), datafile.json())
+```
+
+Pretty simple!
+
+Now, we just need a main function to upload this data. The final upload
+is one file per exposure per year, so these should be the variables we
+gather data for.
+
+We should get some functionality to gather the groups of these files
+automatically, based on the hydra config:
+
+
+Exported source
+
+``` python
+from hydra import initialize, compose
+from omegaconf import OmegaConf, DictConfig
+from tqdm import tqdm
+```
+
+
+
+``` python
+target_dir = here() / "data" / "intermediate"
+
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+
+cfg.development_mode = False
+#cfg.query['year'] = 2017
+#cfg.query['month'] = 11
+#cfg.query['geography'] = "nepal"
+```
+
+------------------------------------------------------------------------
+
+source
+
+### main
+
+> main (cfg:omegaconf.dictconfig.DictConfig)
+
+
+Exported source
+
+``` python
+@hydra.main(version_base=None, config_path="../../conf", config_name="config")
+def main(cfg: DictConfig) -> None:
+
+ variables_dict = {
+ "2m_temperature": "t2m",
+ "2m_dewpoint_temperature": "d2m",
+ "volumetric_soil_water_layer_1": "swvl1",
+ "total_precipitation": "tp"
+ }
+
+ print(OmegaConf.to_yaml(cfg))
+
+ #prep dataverse
+ api_token_file = here() / "sandbox/dataverse_api_key.yml"
+ with open(api_token_file, "r") as f:
+ apiconfig = yaml.load(f, Loader=yaml.BaseLoader)
+ api = NativeApi(apiconfig['base_url'], apiconfig['api_token'])
+ search_api = SearchApi(apiconfig['base_url'], apiconfig['api_token'])
+ resp = search_api.search("ERA5", data_type="dataset")
+
+ results = resp.json()['data']['items']
+
+ result = [x for x in results if "ERA5" in x['name']][0]
+ era5_pid = result['global_id']
+
+ for geography in cfg.geographies:
+ for year in cfg.query['year']:
+ for variable, v in variables_dict.items():
+
+ print(f"Processing {geography} for {variable} in {year}")
+ glob_string = here() / "data" / "intermediate" / f"*{geography}*{variable}*{year}*"
+ print(f"Glob: {glob_string}")
+ polygon_id = cfg.geographies[geography]['unique_id']
+ print(f"polygon_id: {polygon_id}")
+ merged = gather_exposure_geodataframes(glob_string, polygon_id, variable)
+ print(merged[0].head())
+ print(merged[1].head())
+
+ output_dir = here() / "data" / "output"
+
+ f_out = f"environmental/exposures_era5/healthshed_daily/{geography}_{v}_{year}.parquet"
+ os.makedirs(output_dir / os.path.dirname(f_out), exist_ok=True)
+ output_path = output_dir / f_out
+
+ print(f"Writing to {output_path}")
+ merged[0].to_parquet(output_path, index=False)
+
+
+ print(f"Uploading {f_out.replace('/', '-')} to Dataverse...")
+ # upload to dataverse
+ datafile = Datafile()
+ datafile.set({
+ "pid": era5_pid,
+ "filename": str(output_path),
+ "label": f_out.replace("/", "-")
+ })
+
+ resp = api.upload_datafile(era5_pid, output_path, datafile.json())
+ assert resp.json()['status'] == "OK", f"Failed to upload datafile: {resp.text}"
+
+ # also save the geometry for the region
+ merged[1].to_parquet(output_path.parent / f"{geography}_geometry.parquet", index=False)
+
+ # and upload it to dataverse
+ datafile = Datafile()
+ datafile.set({
+ "pid": era5_pid,
+ "filename": str(output_path.parent / f"{geography}_geometry.parquet"),
+ "label": f"{geography}_geometry.parquet"
+ })
+
+ resp = api.upload_datafile(era5_pid, output_path.parent / f"{geography}_geometry.parquet", datafile.json())
+ assert resp.json()['status'] == "OK", f"Failed to upload geometry datafile: {resp.text}"
+
+ print("All files processed and uploaded successfully.")
+```
+
+
diff --git a/_docs/10_pytask_demo.html b/_docs/10_pytask_demo.html
new file mode 100644
index 0000000..ca9be4b
--- /dev/null
+++ b/_docs/10_pytask_demo.html
@@ -0,0 +1,1114 @@
+
+
+
+
+
+
+
+
+
+Demo: How to Create Pipelines with pytask – era5_sandbox
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
In this notebook, we are demonstrating how to convert our snakemake workflow into a pytask workflow. We use the basic tutorial to demonstrate this, but continue to use nbdev for development of functions in notebooks.
+
pytask is a task management system that allows you to define tasks and their dependencies, similar to Snakemake. It is particularly useful for data science workflows.
+
There are a number of reasons to use pytask over snakemake: - Pythonic: pytask is designed to be purely Pythonic by default, allowing you to write tasks and entire pipelines as Python functions. - Flexibility: pytask allows you to define tasks and their dependencies in a more flexible way, using Python functions and decorators, as opposed to orchestrating numerous scripts. - Integration: pytask integrates well with other Python libraries, such as nbdev here, or hydra configurations if you need, allowing you to use your existing code, notebooks, or configs in your workflows. - Parallelism: pytask supports parallel execution of tasks with pytask-parallel, which can speed up your workflows significantly, especially for data processing tasks.
+
We’ll use nbdev to define the task functions, and then export them to the src directory. pytask is then invoked at the command line to run the tasks.
+
+
This demo task is taken from the tutorial at pytask documentation. At minimum, you need your package to contain the following in a config.py file:
The former tells Python where to find the source code and build directory for pytask objects and shims, while the latter tells pytask where to find the task definitions and dependency DAG.
To define a task, simply use the task_ prefix in the function name (or, if you are familiar and comfortable with decorators, use @pytask.mark.task). Be verbose and expressive in your use of type hints to specify the input and output data, so that pytask can automatically detect and handle the dependencies between tasks.
+
+
+
Defining Tracked Outputs
+
To define something as a tracked output, you can annotate the input of the task with Annotated[Path, Product], where Product is imported from pytask. This tells pytask that this is a product of the task and should be saved in the build directory.
+
In this example, we’re generating random data into a data frame and saving the object as a pickle in the bld directory (bld is the default build directory for pytask’s intermediate data). To get that directory, we use the BLD variable from the era5_sandbox.config module as above. This module itself could also be generated using nbdev if you want to keep your configuration in notebooks.
+
Using nbdev, we can also include all of the bells and whistles of function documentation.
def task_create_random_data(
+ seed: Annotated[int, 42], # Default seed for reproducibility
+ path_to_data: Annotated[Path, Product] = BLD /"data.pkl"# Path to the object in the build directory
+ ) ->None:
+"Create a random data set and save it as a pickle file. Return the path to the saved file."
+ rng = np.random.default_rng(seed)
+ beta =2
+
+ x = rng.normal(loc=5, scale=10, size=1_000)
+ epsilon = rng.standard_normal(1_000)
+
+ y = beta * x + epsilon
+
+ df = pd.DataFrame({"x": x, "y": y})
+
+# this is a tracked output, so we annotate the return value with `Annotated[Path, Product]`
+ df.to_pickle(path_to_data)
+
+
+
We can test the function directly in the notebook:
+
+
task_create_random_data(42)
+
+
Once this module and function are exported with nbdev_export, the functions are in a python package. We can then use the command line to look at the registered tasks:
+
+
pytask collect
+
+
Let’s add another task in the same module. This task plots the data we generated. To link the previous task to this one as a dependency, we can list the output of the previous task as an input to this one. This way, pytask will know that it needs to run the first task before this one.
Plot the data from the pickle file and save the plot. Note that this task: 1. depends on the data.pkl file created by the previous task, 2. does not return any value, but saves a plot to the build directory. So the side effect of the task is what we are interested in here (though this is probably bad practice).
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
path_to_data
+
Annotated
+
+
Path to the data file created by the previous task
def task_plot_data(
+ path_to_data: Annotated[Path, BLD /"data.pkl"], # Path to the data file created by the previous task
+ path_to_plot: Annotated[Path, Product] = BLD /"plot.png"# Path to the build directory for the plot
+) ->None:
+"""
+ Plot the data from the pickle file and save the plot. Note that this task:
+ 1. depends on the data.pkl file created by the previous task,
+ 2. does not return any value, but saves a plot to the build directory. So the side effect of the task is what we are interested in here (though this is probably bad practice).
+ """
+
+ df = pd.read_pickle(path_to_data)
+
+ _, ax = plt.subplots()
+ df.plot(x="x", y="y", ax=ax, kind="scatter")
+
+ plt.savefig(path_to_plot)
+ plt.close()
+
+
+
We now have a DAG of tasks that pytask can execute. To see the tasks, we can use the command line to create a pygraphviz graph of the tasks:
+
pytask dag
+
The DAG is saved as a pdf file, and you can view it using any viewer. Now, to run the pipeline, just invoke pytask at the command line:
+
pytask
+
In Jupyter or iPython, you can interact with the task outputs directly:
+
+
# list all the files in the build directory
+forfilein os.listdir(BLD):
+print(file)
+
+
We can use these to build subsequent tasks later.
+
+
+
+
More Complex Tasks & The Data Catalog
+
As we define more complex tasks, we can use the pytask data catalog to manage the inputs and outputs of our tasks. The data catalog allows us to imperatively name the data and their formats, making it easier to manage the data flow in our tasks. Importantly, we can define the data pythonically, which allows us to use the full power of Python to manipulate and transform our data. This is particularly more useful than snakemake’s approach, which requires you to define the data in a more static way using paths and a separate pseudo-language.
+
The content of the era5_sandbox.config module can be extended to include a data catalog:
With just this definition, we’re now able to refer directly to data by name in our tasks, and pytask will handle the paths and formats for us. This allows us to focus on the logic of our tasks rather than the details of data management.
+
+
+
+
+
+
+Note
+
+
+
+
This is a major advantage of pytask over snakemake, as it allows you to define the data in a more flexible and Pythonic way, while still maintaining the benefits of a task management system. It is a similar approach to building pipelines in R with targets, which allows you to define the data in a more flexible way.
+
+
+
Let’s create a task that modifies the data frame by adding a new column. This task will depend on the previous task’s output, and we will use the data catalog to define the input and output data.
def task_add_one(
+ path_to_data: Annotated[Path, BLD /"data.pkl"], # Path to the data file created by the previous task
+ node: Annotated[PickleNode, Product] = demo_catalog["mydata"]
+) ->None:
+"""
+ Add one to the 'y' column of the data frame and save it as a new pickle file.
+ """
+ df = pd.read_pickle(path_to_data)
+ df['z'] = df['y'] +1
+
+ node.save(df)
+
+
+
In this function, we’ve defined that the task relies on the output of the first task being there, the data.pkl file. But importantly, we’ve also defined our product as a node from the PickleNode module. This will allow pytask to handle the serialization and deserialization of the data frame automatically, so we don’t have to worry about the details of how the data is stored. We create the datacatalog in our config file, and then tell this task to create a Node in that catalog called mydata. Whatever we save with the node.save() method will be saved in the build directory, but more importantly will be indexed and hashed by pytask. This means that if the data changes, pytask will know to rerun the task.
+
To make this even more pythonic, we can modify the format of our task function so that the return type annotator is used as a node in the data catalog. This allows us to define the output of the task as a PickleNode, which will automatically handle the serialization and deserialization of the data frame.
+
+
+
+
+
+
+Note
+
+
+
+
This is another trick I’m deriving from {targets}. By formatting tasks as pure functions where inputs are parameters and targets are return type annotations, we can define the output of the task as a PickleNode, which will automatically handle the serialization and deserialization of the data frame. This again allows us to focus on the logic of our tasks rather than the details of data management.
+
+
+
So below, we’re directly accessing the data_catalog to get the mydata node, and then modifying it by adding a new column. It feels like we are doing this in place, such as in an iPython session, because we are allowing pytask to handle the serialization of the file on disk for us.
Add another column to the data frame stored in the PickleNode.
+
+
+
+
+
+
+
+
+
+
Type
+
Details
+
+
+
+
+
df
+
Annotated
+
which object in the catalog to fetch from the catalog with node.load()
+
+
+
Returns
+
Annotated
+
which object in the catalog to save the return value to
+
+
+
+
+
+Exported source
+
def task_add_another_column(
+ df: Annotated[pd.DataFrame, demo_catalog["mydata"]] # which object in the catalog to fetch from the catalog with node.load()
+) -> Annotated[pd.DataFrame, demo_catalog["mydata2"]]: # which object in the catalog to save the return value to
+"""
+ Add another column to the data frame stored in the PickleNode.
+ """
+
+# use the datacatalog directly to access the node
+# this is a bit like accessing the node in an iPython session, but pytask
+# will handle the serialization and deserialization for us
+ df['w'] = df['z'] * df['y']
+
+return df
+
+
+
To test this interactively, we’d have to import the data catalog’s object
+
+
df = demo_catalog["mydata"].load() # load the data frame from the PickleNode
+result = task_add_another_column(df) # call the task function with the loaded data frame
+
+
+
result
+
+
Now that we know it will work, we can invoke pytask:
+
+
pytask
+
+
Notice that the outputs are cached and not recomputed unless the inputs change. This is a key feature of pytask and other DAGs, allowing you to efficiently manage your data processing tasks without unnecessary recomputation.
+
+
+
+
Conclusion
+
The takeaway here is that with pytask, you can define pure functions that take inputs and return outputs, and build a DAG of tasks that can be executed in a flexible and efficient way. This allows you to focus on the logic of your tasks rather than the details of data management, while still maintaining the benefits of a task management system. The key elements are:
+
+
Task annotation: You define your tasks by creating pure functions that take inputs and return outputs, and use decorators or naming conventions to mark them as “tasks” in a dag
+
Input and output annotation: You define the inputs and outputs of your tasksusing type hints, and allow pytask to automatically detect and handle the dependencies between tasks.
+
Data catalog: You define your data in a Pythonic object in your config called data_catalog. As you iteratively develop your DAG, you add objects to the data catalog, which are called nodes. As long as a node is a pythonic object and has a pickle method, pytask will handle the serialization and deserialization of the data for you.
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/_docs/10_pytask_demo.md b/_docs/10_pytask_demo.md
new file mode 100644
index 0000000..d0a20e4
--- /dev/null
+++ b/_docs/10_pytask_demo.md
@@ -0,0 +1,603 @@
+# Demo: How to Create Pipelines with `pytask`
+
+
+## Data Preparation Demo
+
+> Data preparation task for `pytask` demo
+
+In this notebook, we are demonstrating how to convert our snakemake
+workflow into a `pytask` workflow. We use the basic tutorial to
+demonstrate this, but continue to use nbdev for development of functions
+in notebooks.
+
+`pytask` is a task management system that allows you to define tasks and
+their dependencies, similar to `Snakemake`. It is particularly useful
+for data science workflows.
+
+There are a number of reasons to use `pytask` over `snakemake`: -
+**Pythonic**: `pytask` is designed to be purely Pythonic by default,
+allowing you to write tasks and entire pipelines as Python functions. -
+**Flexibility**: `pytask` allows you to define tasks and their
+dependencies in a more flexible way, using Python functions and
+decorators, as opposed to orchestrating numerous scripts. -
+**Integration**: `pytask` integrates well with other Python libraries,
+such as `nbdev` here, or `hydra` configurations if you need, allowing
+you to use your existing code, notebooks, or configs in your
+workflows. - **Parallelism**: `pytask` supports parallel execution of
+tasks with `pytask-parallel`, which can speed up your workflows
+significantly, especially for data processing tasks.
+
+We’ll use nbdev to define the task functions, and then export them to
+the `src` directory. `pytask` is then invoked at the command line to run
+the tasks.
+
+
+
+This demo task is taken from the tutorial at [pytask
+documentation](https://pytask-dev.readthedocs.io/en/stable/tutorials/write_a_task.html).
+At minimum, you need your package to contain the following in a
+config.py file:
+
+``` md
+my_project
+│
+├───.pytask
+│
+├───bld
+│ └────...
+│
+├───src
+│ └───my_project
+│ ├────__init__.py
+│ ├────config.py
+│ └────...
+│
+└───pyproject.toml
+```
+
+``` python
+#contents of `era5_sandbox.config` module
+from pathlib import Path
+
+
+SRC = Path(__file__).parent.resolve()
+BLD = SRC.joinpath("..", "..", "bld").resolve()
+```
+
+Additionally, your pyproject.toml file should contain the following at
+minimum:
+
+``` toml
+[tool.pytask.ini_options]
+paths = ["src/era5_sandbox"]
+```
+
+The former tells Python where to find the source code and build
+directory for `pytask` objects and shims, while the latter tells
+`pytask` where to find the task definitions and dependency DAG.
+
+
+Exported source
+
+``` python
+import os
+from pathlib import Path
+from typing import Annotated
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+from era5_sandbox.config import BLD
+from era5_sandbox.config import data_catalog, demo_catalog
+
+from pytask import PickleNode
+from pytask import Product
+```
+
+
+
+### Defining Tasks
+
+To define a task, simply use the `task_` prefix in the function name
+(or, if you are familiar and comfortable with decorators, use
+`@pytask.mark.task`). Be verbose and expressive in your use of type
+hints to specify the input and output data, so that `pytask` can
+automatically detect and handle the dependencies between tasks.
+
+### Defining Tracked Outputs
+
+To define something as a tracked output, you can annotate the input of
+the task with `Annotated[Path, Product]`, where `Product` is imported
+from `pytask`. This tells `pytask` that this is a product of the task
+and should be saved in the build directory.
+
+In this example, we’re generating random data into a data frame and
+saving the object as a pickle in the `bld` directory (`bld` is the
+default build directory for `pytask`’s intermediate data). To get that
+directory, we use the `BLD` variable from the `era5_sandbox.config`
+module as above. This module itself could also be generated using
+`nbdev` if you want to keep your configuration in notebooks.
+
+Using `nbdev`, we can also include all of the bells and whistles of
+function documentation.
+
+------------------------------------------------------------------------
+
+source
+
+### task_create_random_data
+
+> task_create_random_data (seed:typing.Annotated[int,42], path_to_data:typi
+> ng.Annotated[pathlib.Path,ProductType()]=Path('/
+> net/rcstorenfs02/ifs/rc_labs/dominici_lab/lab/da
+> ta_processing/csph-era5_sandbox/bld/data.pkl'))
+
+*Create a random data set and save it as a pickle file. Return the path
+to the saved file.*
+
+
+
+
+Exported source
+
+``` python
+def task_create_random_data(
+ seed: Annotated[int, 42], # Default seed for reproducibility
+ path_to_data: Annotated[Path, Product] = BLD / "data.pkl" # Path to the object in the build directory
+ ) -> None:
+ "Create a random data set and save it as a pickle file. Return the path to the saved file."
+ rng = np.random.default_rng(seed)
+ beta = 2
+
+ x = rng.normal(loc=5, scale=10, size=1_000)
+ epsilon = rng.standard_normal(1_000)
+
+ y = beta * x + epsilon
+
+ df = pd.DataFrame({"x": x, "y": y})
+
+ # this is a tracked output, so we annotate the return value with `Annotated[Path, Product]`
+ df.to_pickle(path_to_data)
+```
+
+
+
+We can test the function directly in the notebook:
+
+``` python
+task_create_random_data(42)
+```
+
+Once this module and function are exported with `nbdev_export`, the
+functions are in a python package. We can then use the command line to
+look at the registered tasks:
+
+``` sh
+pytask collect
+```
+
+Let’s add another task in the same module. This task plots the data we
+generated. To link the previous task to this one as a dependency, we can
+list the output of the previous task as an input to this one. This way,
+`pytask` will know that it needs to run the first task before this one.
+
+------------------------------------------------------------------------
+
+source
+
+### task_plot_data
+
+> task_plot_data (path_to_data:typing.Annotated[pathlib.Path,Path('/net/rcs
+> torenfs02/ifs/rc_labs/dominici_lab/lab/data_processing/cs
+> ph-era5_sandbox/bld/data.pkl')], path_to_plot:typing.Anno
+> tated[pathlib.Path,ProductType()]=Path('/net/rcstorenfs02
+> /ifs/rc_labs/dominici_lab/lab/data_processing/csph-
+> era5_sandbox/bld/plot.png'))
+
+*Plot the data from the pickle file and save the plot. Note that this
+task: 1. depends on the data.pkl file created by the previous task, 2.
+does not return any value, but saves a plot to the build directory. So
+the side effect of the task is what we are interested in here (though
+this is probably bad practice).*
+
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
path_to_data
+
Annotated
+
+
Path to the data file created by the previous task
+
+
+Exported source
+
+``` python
+def task_plot_data(
+ path_to_data: Annotated[Path, BLD / "data.pkl"], # Path to the data file created by the previous task
+ path_to_plot: Annotated[Path, Product] = BLD / "plot.png" # Path to the build directory for the plot
+) -> None:
+ """
+ Plot the data from the pickle file and save the plot. Note that this task:
+ 1. depends on the data.pkl file created by the previous task,
+ 2. does not return any value, but saves a plot to the build directory. So the side effect of the task is what we are interested in here (though this is probably bad practice).
+ """
+
+ df = pd.read_pickle(path_to_data)
+
+ _, ax = plt.subplots()
+ df.plot(x="x", y="y", ax=ax, kind="scatter")
+
+ plt.savefig(path_to_plot)
+ plt.close()
+```
+
+
+
+We now have a DAG of tasks that `pytask` can execute. To see the tasks,
+we can use the command line to create a pygraphviz graph of the tasks:
+
+``` bash
+pytask dag
+```
+
+The DAG is saved as a pdf file, and you can view it using any viewer.
+Now, to run the pipeline, just invoke `pytask` at the command line:
+
+``` bash
+pytask
+```
+
+In Jupyter or iPython, you can interact with the task outputs directly:
+
+``` python
+# list all the files in the build directory
+for file in os.listdir(BLD):
+ print(file)
+```
+
+We can use these to build subsequent tasks later.
+
+## More Complex Tasks & The Data Catalog
+
+As we define more complex tasks, we can use the `pytask` data catalog to
+manage the inputs and outputs of our tasks. The data catalog allows us
+to imperatively name the data and their formats, making it easier to
+manage the data flow in our tasks. Importantly, we can define the data
+pythonically, which allows us to use the full power of Python to
+manipulate and transform our data. This is particularly more useful than
+snakemake’s approach, which requires you to define the data in a more
+static way using paths and a separate pseudo-language.
+
+The content of the `era5_sandbox.config` module can be extended to
+include a data catalog:
+
+``` python
+from pathlib import Path
+from pytask import DataCatalog, Product
+
+SRC = Path(__file__).parent.resolve()
+BLD = SRC.joinpath("..", "..", "bld").resolve()
+
+demo_catalog = DataCatalog()
+```
+
+With just this definition, we’re now able to refer directly to data by
+name in our tasks, and `pytask` will handle the paths and formats for
+us. This allows us to focus on the logic of our tasks rather than the
+details of data management.
+
+
+
+> **Note**
+>
+> This is a major advantage of `pytask` over `snakemake`, as it allows
+> you to define the data in a more flexible and Pythonic way, while
+> still maintaining the benefits of a task management system. It is a
+> similar approach to building pipelines in R with targets, which allows
+> you to define the data in a more flexible way.
+
+
+
+Let’s create a task that modifies the data frame by adding a new column.
+This task will depend on the previous task’s output, and we will use the
+data catalog to define the input and output data.
+
+------------------------------------------------------------------------
+
+source
+
+### task_add_one
+
+> task_add_one (path_to_data:typing.Annotated[pathlib.Path,Path('/net/rcsto
+> renfs02/ifs/rc_labs/dominici_lab/lab/data_processing/csph-
+> era5_sandbox/bld/data.pkl')], node:typing.Annotated[_pytask
+> .nodes.PickleNode,ProductType()]=PickleNode(path=Path('/net
+> /rcstorenfs02/ifs/rc_labs/dominici_lab/lab/data_processing/
+> csph-era5_sandbox/.pytask/data_catalogs/default/1eef510d81e
+> ea49161cd821b318aa999e630bdd292b093aa9a9319e9f282b984.pkl')
+> , name='mydata', attributes={'catalog_name': 'default'},
+> serializer=, deserializer= function load>))
+
+*Add one to the ‘y’ column of the data frame and save it as a new pickle
+file.*
+
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
path_to_data
+
Annotated
+
+
Path to the data file created by the previous task
+
+
+Exported source
+
+``` python
+def task_add_one(
+ path_to_data: Annotated[Path, BLD / "data.pkl"], # Path to the data file created by the previous task
+ node: Annotated[PickleNode, Product] = demo_catalog["mydata"]
+) -> None:
+ """
+ Add one to the 'y' column of the data frame and save it as a new pickle file.
+ """
+ df = pd.read_pickle(path_to_data)
+ df['z'] = df['y'] + 1
+
+ node.save(df)
+```
+
+
+
+In this function, we’ve defined that the task relies on the output of
+the first task being there, the `data.pkl` file. But importantly, we’ve
+also defined our product as a `node` from the `PickleNode` module. This
+will allow `pytask` to handle the serialization and deserialization of
+the data frame automatically, so we don’t have to worry about the
+details of how the data is stored. We create the datacatalog in our
+config file, and then tell this task to create a Node in that catalog
+called `mydata`. Whatever we save with the `node.save()` method will be
+saved in the build directory, but more importantly *will be indexed and
+hashed by `pytask`*. This means that if the data changes, `pytask` will
+know to rerun the task.
+
+To make this even more pythonic, we can modify the format of our task
+function so that the return type annotator is used as a node in the data
+catalog. This allows us to define the output of the task as a
+`PickleNode`, which will automatically handle the serialization and
+deserialization of the data frame.
+
+
+
+> **Note**
+>
+> This is another trick I’m deriving from {targets}. By formatting tasks
+> as pure functions where inputs are parameters and targets are return
+> type annotations, we can define the output of the task as a
+> `PickleNode`, which will automatically handle the serialization and
+> deserialization of the data frame. This again allows us to focus on
+> the logic of our tasks rather than the details of data management.
+
+
+
+So below, we’re directly accessing the `data_catalog` to get the
+`mydata` node, and then modifying it by adding a new column. It *feels*
+like we are doing this in place, such as in an iPython session, because
+we are allowing `pytask` to handle the serialization of the file on disk
+for us.
+
+------------------------------------------------------------------------
+
+source
+
+### task_add_another_column
+
+> task_add_another_column (df:typing.Annotated[pandas.core.frame.DataFrame,
+> PickleNode(path=Path('/net/rcstorenfs02/ifs/rc_l
+> abs/dominici_lab/lab/data_processing/csph-era5_s
+> andbox/.pytask/data_catalogs/default/1eef510d81e
+> ea49161cd821b318aa999e630bdd292b093aa9a9319e9f28
+> 2b984.pkl'),name='mydata',attributes={'catalog_n
+> ame':'default'},serializer= infunctiondump>,deserializer= infunctionload>)])
+
+*Add another column to the data frame stored in the PickleNode.*
+
+
+
+
+
+
+
+
+
+
+
Type
+
Details
+
+
+
+
+
df
+
Annotated
+
which object in the catalog to fetch from the catalog with
+node.load()
+
+
+
Returns
+
Annotated
+
which object in the catalog to save the return value
+to
+
+
+
+
+
+Exported source
+
+``` python
+def task_add_another_column(
+ df: Annotated[pd.DataFrame, demo_catalog["mydata"]] # which object in the catalog to fetch from the catalog with node.load()
+) -> Annotated[pd.DataFrame, demo_catalog["mydata2"]]: # which object in the catalog to save the return value to
+ """
+ Add another column to the data frame stored in the PickleNode.
+ """
+
+ # use the datacatalog directly to access the node
+ # this is a bit like accessing the node in an iPython session, but pytask
+ # will handle the serialization and deserialization for us
+ df['w'] = df['z'] * df['y']
+
+ return df
+```
+
+
+
+To test this interactively, we’d have to import the data catalog’s
+object
+
+``` python
+df = demo_catalog["mydata"].load() # load the data frame from the PickleNode
+result = task_add_another_column(df) # call the task function with the loaded data frame
+```
+
+``` python
+result
+```
+
+Now that we know it will work, we can invoke pytask:
+
+``` sh
+pytask
+```
+
+Notice that the outputs are cached and not recomputed unless the inputs
+change. This is a key feature of `pytask` and other DAGs, allowing you
+to efficiently manage your data processing tasks without unnecessary
+recomputation.
+
+## Conclusion
+
+The takeaway here is that with `pytask`, you can define pure functions
+that take inputs and return outputs, and build a DAG of tasks that can
+be executed in a flexible and efficient way. This allows you to focus on
+the logic of your tasks rather than the details of data management,
+while still maintaining the benefits of a task management system. The
+key elements are:
+
+- **Task annotation**: You define your tasks by creating pure functions
+ that take inputs and return outputs, and use decorators or naming
+ conventions to mark them as “tasks” in a dag
+- **Input and output annotation**: You define the inputs and outputs of
+ your tasksusing type hints, and allow `pytask` to automatically detect
+ and handle the dependencies between tasks.
+- **Data catalog**: You define your data in a Pythonic object in your
+ config called `data_catalog`. As you iteratively develop your DAG, you
+ add objects to the data catalog, which are called nodes. As long as a
+ node is a pythonic object and has a pickle method, `pytask` will
+ handle the serialization and deserialization of the data for you.
diff --git a/_docs/20_pytask_config.html b/_docs/20_pytask_config.html
new file mode 100644
index 0000000..aaf6c2a
--- /dev/null
+++ b/_docs/20_pytask_config.html
@@ -0,0 +1,927 @@
+
+
+
+
+
+
+
+
+
+pytask Config: Defining the Pipeline Internals in pytask – era5_sandbox
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
pytask Config: Defining the Pipeline Internals in pytask
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
config
+
+
This is the config module for the pytask pipeline. This module defines the data catalog(s) and any hard-coded parameters that are used throughout the pipeline.
I’m adding a flag to the config that can be used for quick development. If you import this boolean variable, it can be used to skip tasks, setup samples, etc. on the fly by marking a task with the pytask.mark.skipif decorator. Change this to False when you’re ready to run the full pipeline.
+
+
+Exported source
+
DEV_MODE=True
+
+
+
+
+
The Data Catalog
+
To manage our pipeline, we’re going to use a nested data catalog structure. This way, we can easily return specific entries to specific tasks without having to manage multiple different data catalogs. Specifically, we’ll have a data catalog for each stage of the pipeline, and each catalog will have entries for the inputs, outputs, and any other parameters needed for that stage. This is similar to how we used Hydra configs, but using the pytask data catalog, we can more easily gather the data for a specific task in structured manner entirely in Python.
+
+
+Exported source
+
stages = ["mydata", 'mydata2', # from the demo, ignore
+"download", # download task
+"aggregate", # aggregation task
+"publish", # publishing task
+"viz"] # visualization task
+
+buckets = [
+"inputs", # any specific inputs, eg for carrying over between tasks
+"outputs", # specific output task returns
+"jobs", # job parameters as a dataframe
+"params"# any lingering hardcoded parameters
+ ]
+
+data_catalog = {
+
+ stage: {bucket: DataCatalog(name=f"{stage}_{bucket}") for bucket in buckets}
+for stage in stages
+}
+
+
+
+
+
The Download Task
+
A good strategy may be to set pipeline stage parameters in the config file, and then use the pytask data catalog to manage the data. This way, we can easily change the parameters without having to modify the code. This is especially useful for the API query, where we need to be able to set the parameter grid for the years and data types we want to download data for. So, let’s create an entry in the data catalog specifically for the download task.
+
A good strategy I thought about for grid parameter comprehension is to create a dataframe expands all the combinations of parameters, and then uses each combination to create the tasks which are then easily added to the data catalog. This way, we can still easily inspect the pipeline and see what tasks are being run, while also being able to easily change the parameters in the config file without too much hassle.
+
An important framework decision I’m making here is that each ROW of the dataframe corresponds to a single task, so that we can quickly understand at a glance what the task is doing, and also easily develop the code for the task itself. This is different from the hydra approach where a job is first specified by a default config, and then the parameters are swept over in multiple config files. This is a more flexible approach, IMO, because:
+
+
each row defines a single task run, so it’s easy to understand what the run is doing
+
it’s easy to add or remove runs by simply expanding the list of parameters and using dataframe filters to remove irrelevant parameter combinations
+
we don’t have to independently inspect and manage multiple different/overriding config files
+
it’s all in Python, so we can use the full power of the language to define the parameters and the tasks in a single sweep, not through the need of hydra+snakemake multi stage/multi-lingual config system
+
+
So, to do this, we define one job as a query to the CDS API that must contain: - The dataset (re-analysis) - The year - The month - All days in the month - All times of day (hour) - The geography (region), which will need: - The URL to the shapefile to calculate the bounding box
+
Given one combination of all of these, a single SLURM job can complete the first “task” in parallel by having a run assigned to each row of the dataframe.
+
+
+Exported source
+
# Dimensions
+years = [str(x) for x inrange(2009, 2025)] # 16 years
+months = [str(x).zfill(2) for x inrange(1, 13)] # 12 months
+geographies = ["madagascar", "nepal"] # 2 geographies
+
+# nested values; we want ALL days, times, and variables for each job
+days = [str(x).zfill(2) for x inrange(1, 32)]
+times = [f"{x:02d}:00"for x inrange(24)]
+variables = ["2m_dewpoint_temperature", "2m_temperature", "total_precipitation", "volumetric_soil_water_layer_1"]
+
+product_type ="reanalysis"
+
+# Map shapefiles to geography
+shapefiles = {
+"madagascar": "https://data.humdata.org/dataset/26fa506b-0727-4d9d-a590-d2abee21ee22/resource/ed94d52e-349e-41be-80cb-62dc0435bd34/download/mdg_adm_bngrc_ocha_20181031_shp.zip",
+"nepal": "https://data.humdata.org/dataset/07db728a-4f0f-4e98-8eb0-8fa9df61f01c/resource/2eb4c47f-fd6e-425d-b623-d35be1a7640e/download/npl_adm_nd_20240314_ab_shp.zip"
+}
+
+# Build row-wise combinations of (year, month, geography)
+rows = []
+for year in years:
+for month in months:
+for geo in geographies:
+ rows.append({
+"year": year,
+"month": month,
+"geography": geo,
+"shapefile": shapefiles[geo],
+"product_type": product_type,
+"day": days,
+"time": times,
+"variables": variables,
+"output": f"{year}_{month}_{geo}"
+ })
+
+# Create dataframe
+query_df = pd.DataFrame(rows)
+
+
+
+
query_df
+
+
+
print(f"Number of estimated jobs: {query_df.shape[0]}. Examples...")
+
+for i, row in query_df.sample(3).iterrows():
+print(f"Year: {row['year']}, Month: {row['month']}, Geography: {row['geography']}, Link: {row['shapefile']}, Variables: {row['variables']}")
+
+
Now add them to the catalog. We’re going to use a dictionary to nest data catalogs so that we can return specific task products to named data catalog nodes.
+
Our data catalog now has a download|jobs node with a queries_df entry that contains the dataframe of all the jobs to be run in this task.
To carry out the aggregation, we will follow similar logic to the original pipeline and use xarray to aggregate data into spatial and temporal averages. The aggregation task will take the downloaded data and compute the mean over the specified time period and spatial region. However, in this case, we want to aggregate the data diurnally, so we will need to fetch the sundown and sunrise times for the region and use them to compute the diurnal averages.
+
Once again, we will use a dataframe to define the parameters for the aggregation task.
+
Here we will use a dataframe with the jobs as rows; the first column is “input” which is the list of query names from the download task, and the last column is the output object name. Columns in between can be the parameters needed for the aggregation task, which then get expanded to the full list of jobs with itertools.product, explode or similar, and filtered as necessary.
+
For explanations of the parameters, see the Aggregation Task notebook’s final task_aggregate_data_diurnal function.
+
+
+Exported source
+
inputs = query_df["output"].tolist()
+outputs = [f"{i}_agg"for i in inputs]
+
+variable_dict = {
+"2m_dewpoint_temperature": "d2m",
+"2m_temperature": "t2m",
+"total_precipitation": "tp",
+"volumetric_soil_water_layer_1": "swvl1"
+}
+
+# list of params that get fed into the task functions
+agg_params = {
+"time": ["day", "night"],
+"solar_classification": ["before"],
+"variables": variables,
+"variables_short": [variable_dict[x] for x in variables],
+"aggregation_name": ["mean", "sum", "max", "min"]
+}
+
+from itertools import product
+import pandas as pd
+
+# expand all the params
+agg_params = pd.DataFrame(list(product(*agg_params.values())), columns=agg_params.keys())
+
+
+
Inspecting it:
+
+
agg_params
+
+
Let’s keep only rows where the variables and variables_short match
Great, and now keeping sum only for total precipitation (we don’t need mean, max, min for that variable), and removing sum for all other variables (we don’t need sum for temperature or soil moisture):
+
+
+Exported source
+
mask = (agg_params['variables_short'] =="tp") & (agg_params['aggregation_name'] !="sum")
+agg_params = agg_params[~mask]
+
+# remove rows where non-tp aggregation is sum
+mask = (agg_params['variables_short'] !="tp") & (agg_params['aggregation_name'] =="sum")
+agg_params = agg_params[~mask]
+
+
+
+
agg_params
+
+
Now we add the input and output columns by joining:
A few more configuration items need to be added, like the local timezone for each geography, the healthshed filename, the healthshed unique ID variable name in the shapefile, and whether the variable is instantaneous or accumulated:
+
+
+Exported source
+
aggregate_jobs['local_tz'] = aggregate_jobs['input'].apply(
+lambda x: "Asia/Kathmandu"if"nepal"in x else"Indian/Antananarivo"
+)
+aggregate_jobs['shapefile'] = aggregate_jobs['input'].apply(
+lambda x: "Nepal_Healthsheds2024.zip"if"nepal"in x else"healthsheds2022.zip"
+)
+
+aggregate_jobs['hshd_unique_id'] = aggregate_jobs['input'].apply(
+lambda x: "fid"if"nepal"in x else"fs_uid"
+)
+
+aggregate_jobs['climate_handler_var'] = aggregate_jobs['variables_short'].apply(
+lambda x: "accum"if x =="tp"else"instant"
+)
+
+
+
+
+
\ No newline at end of file
diff --git a/_docs/20_pytask_config.md b/_docs/20_pytask_config.md
new file mode 100644
index 0000000..689d80c
--- /dev/null
+++ b/_docs/20_pytask_config.md
@@ -0,0 +1,365 @@
+# `pytask` Config: Defining the Pipeline Internals in `pytask`
+
+
+## config
+
+> This is the config module for the `pytask` pipeline. This module
+> defines the data catalog(s) and any hard-coded parameters that are
+> used throughout the pipeline.
+
+
+
+
+Exported source
+
+``` python
+import pandas as pd
+
+from pathlib import Path
+from pyprojroot import here
+from pytask import DataCatalog
+
+
+SRC = here() / "src" / "era5_sandbox"
+BLD = here() / "bld"
+
+demo_catalog = DataCatalog()
+```
+
+
+
+## `DEV_MODE`: A Quick Development Flag
+
+I’m adding a flag to the config that can be used for quick development.
+If you import this boolean variable, it can be used to skip tasks, setup
+samples, etc. on the fly by `marking` a task with the
+`pytask.mark.skipif` decorator. Change this to `False` when you’re ready
+to run the full pipeline.
+
+
+Exported source
+
+``` python
+DEV_MODE=True
+```
+
+
+
+## The Data Catalog
+
+To manage our pipeline, we’re going to use a nested data catalog
+structure. This way, we can easily return specific entries to specific
+tasks without having to manage multiple different data catalogs.
+Specifically, we’ll have a data catalog for each stage of the pipeline,
+and each catalog will have entries for the inputs, outputs, and any
+other parameters needed for that stage. This is similar to how we used
+Hydra configs, but using the `pytask` data catalog, we can more easily
+gather the data for a specific task in structured manner entirely in
+Python.
+
+
+Exported source
+
+``` python
+stages = ["mydata", 'mydata2', # from the demo, ignore
+ "download", # download task
+ "aggregate", # aggregation task
+ "publish", # publishing task
+ "viz"] # visualization task
+
+buckets = [
+ "inputs", # any specific inputs, eg for carrying over between tasks
+ "outputs", # specific output task returns
+ "jobs", # job parameters as a dataframe
+ "params" # any lingering hardcoded parameters
+ ]
+
+data_catalog = {
+
+ stage: {bucket: DataCatalog(name=f"{stage}_{bucket}") for bucket in buckets}
+ for stage in stages
+}
+```
+
+
+
+## The Download Task
+
+A good strategy may be to set pipeline stage parameters in the config
+file, and then use the `pytask` data catalog to manage the data. This
+way, we can easily change the parameters without having to modify the
+code. This is especially useful for the API query, where we need to be
+able to set the parameter grid for the years and data types we want to
+download data for. So, let’s create an entry in the data catalog
+specifically for the download task.
+
+A good strategy I thought about for grid parameter comprehension is to
+create a dataframe expands all the combinations of parameters, and then
+uses each combination to create the tasks which are then easily added to
+the data catalog. This way, we can still easily inspect the pipeline and
+see what tasks are being run, while also being able to easily change the
+parameters in the config file without too much hassle.
+
+An important framework decision I’m making here is that each ROW of the
+dataframe corresponds to a single task, so that we can quickly
+understand at a glance what the task is doing, and also easily develop
+the code for the task itself. This is different from the hydra approach
+where a job is first specified by a default config, and then the
+parameters are swept over in multiple config files. This is a more
+flexible approach, IMO, because:
+
+1. each row defines a single task run, so it’s easy to understand what
+ the run is doing
+2. it’s easy to add or remove runs by simply expanding the list of
+ parameters and using dataframe filters to remove irrelevant
+ parameter combinations
+3. we don’t have to independently inspect and manage multiple
+ different/overriding config files
+4. it’s all in Python, so we can use the full power of the language to
+ define the parameters and the tasks in a single sweep, not through
+ the need of hydra+snakemake multi stage/multi-lingual config system
+
+So, to do this, we define one job as a query to the CDS API that must
+contain: - The dataset (re-analysis) - The year - The month - All days
+in the month - All times of day (hour) - The geography (region), which
+will need: - The URL to the shapefile to calculate the bounding box
+
+Given one combination of all of these, a single SLURM job can complete
+the first “task” in parallel by having a run assigned to each row of the
+dataframe.
+
+
+Exported source
+
+``` python
+# Dimensions
+years = [str(x) for x in range(2009, 2025)] # 16 years
+months = [str(x).zfill(2) for x in range(1, 13)] # 12 months
+geographies = ["madagascar", "nepal"] # 2 geographies
+
+# nested values; we want ALL days, times, and variables for each job
+days = [str(x).zfill(2) for x in range(1, 32)]
+times = [f"{x:02d}:00" for x in range(24)]
+variables = ["2m_dewpoint_temperature", "2m_temperature", "total_precipitation", "volumetric_soil_water_layer_1"]
+
+product_type = "reanalysis"
+
+# Map shapefiles to geography
+shapefiles = {
+ "madagascar": "https://data.humdata.org/dataset/26fa506b-0727-4d9d-a590-d2abee21ee22/resource/ed94d52e-349e-41be-80cb-62dc0435bd34/download/mdg_adm_bngrc_ocha_20181031_shp.zip",
+ "nepal": "https://data.humdata.org/dataset/07db728a-4f0f-4e98-8eb0-8fa9df61f01c/resource/2eb4c47f-fd6e-425d-b623-d35be1a7640e/download/npl_adm_nd_20240314_ab_shp.zip"
+}
+
+# Build row-wise combinations of (year, month, geography)
+rows = []
+for year in years:
+ for month in months:
+ for geo in geographies:
+ rows.append({
+ "year": year,
+ "month": month,
+ "geography": geo,
+ "shapefile": shapefiles[geo],
+ "product_type": product_type,
+ "day": days,
+ "time": times,
+ "variables": variables,
+ "output": f"{year}_{month}_{geo}"
+ })
+
+# Create dataframe
+query_df = pd.DataFrame(rows)
+```
+
+
+
+``` python
+query_df
+```
+
+``` python
+print(f"Number of estimated jobs: {query_df.shape[0]}. Examples...")
+
+for i, row in query_df.sample(3).iterrows():
+ print(f"Year: {row['year']}, Month: {row['month']}, Geography: {row['geography']}, Link: {row['shapefile']}, Variables: {row['variables']}")
+```
+
+Now add them to the catalog. We’re going to use a dictionary to nest
+data catalogs so that we can return specific task products to named data
+catalog nodes.
+
+Our data catalog now has a `download|jobs` node with a `queries_df`
+entry that contains the dataframe of all the jobs to be run in this
+task.
+
+``` python
+data_catalog['download']['jobs']['queries_df'].load().head()
+```
+
+## The Aggregation Task
+
+To carry out the aggregation, we will follow similar logic to the
+original pipeline and use xarray to aggregate data into spatial and
+temporal averages. The aggregation task will take the downloaded data
+and compute the mean over the specified time period and spatial region.
+However, in this case, we want to aggregate the data diurnally, so we
+will need to fetch the sundown and sunrise times for the region and use
+them to compute the diurnal averages.
+
+Once again, we will use a dataframe to define the parameters for the
+aggregation task.
+
+Here we will use a dataframe with the jobs as rows; the first column is
+“input” which is the list of query names from the download task, and the
+last column is the output object name. Columns in between can be the
+parameters needed for the aggregation task, which then get expanded to
+the full list of jobs with `itertools.product`, `explode` or similar,
+and filtered as necessary.
+
+For explanations of the parameters, see the Aggregation Task notebook’s
+final `task_aggregate_data_diurnal` function.
+
+
+Exported source
+
+``` python
+inputs = query_df["output"].tolist()
+outputs = [f"{i}_agg" for i in inputs]
+
+variable_dict = {
+ "2m_dewpoint_temperature": "d2m",
+ "2m_temperature": "t2m",
+ "total_precipitation": "tp",
+ "volumetric_soil_water_layer_1": "swvl1"
+}
+
+# list of params that get fed into the task functions
+agg_params = {
+ "time": ["day", "night"],
+ "solar_classification": ["before"],
+ "variables": variables,
+ "variables_short": [variable_dict[x] for x in variables],
+ "aggregation_name": ["mean", "sum", "max", "min"]
+}
+
+from itertools import product
+import pandas as pd
+
+# expand all the params
+agg_params = pd.DataFrame(list(product(*agg_params.values())), columns=agg_params.keys())
+```
+
+
+
+Inspecting it:
+
+``` python
+agg_params
+```
+
+Let’s keep only rows where the variables and variables_short match
+
+
+Exported source
+
+``` python
+agg_params = agg_params[agg_params.apply(lambda x: variable_dict[x['variables']] == x['variables_short'], axis=1)]
+```
+
+
+
+``` python
+agg_params
+```
+
+Great, and now keeping `sum` only for total precipitation (we don’t need
+mean, max, min for that variable), and removing `sum` for all other
+variables (we don’t need sum for temperature or soil moisture):
+
+
+Exported source
+
+``` python
+mask = (agg_params['variables_short'] == "tp") & (agg_params['aggregation_name'] != "sum")
+agg_params = agg_params[~mask]
+
+# remove rows where non-tp aggregation is sum
+mask = (agg_params['variables_short'] != "tp") & (agg_params['aggregation_name'] == "sum")
+agg_params = agg_params[~mask]
+```
+
+
+
+``` python
+agg_params
+```
+
+Now we add the input and output columns by joining:
+
+
+Exported source
+
+``` python
+inputs = pd.DataFrame({"input": inputs})
+aggregate_jobs = inputs.merge(agg_params, how="cross")
+```
+
+
+
+This result gives us the full list of jobs for the aggregation task. 20
+rows for the parameters, and 384 inputs/outputs, giving a total of 7680
+jobs:
+
+``` python
+assert aggregate_jobs.shape[0] == 20 * len(inputs)
+aggregate_jobs
+```
+
+A few more configuration items need to be added, like the local timezone
+for each geography, the healthshed filename, the healthshed unique ID
+variable name in the shapefile, and whether the variable is
+instantaneous or accumulated:
+
+
+Exported source
+
+``` python
+aggregate_jobs['local_tz'] = aggregate_jobs['input'].apply(
+ lambda x: "Asia/Kathmandu" if "nepal" in x else "Indian/Antananarivo"
+)
+aggregate_jobs['shapefile'] = aggregate_jobs['input'].apply(
+ lambda x: "Nepal_Healthsheds2024.zip" if "nepal" in x else "healthsheds2022.zip"
+)
+
+aggregate_jobs['hshd_unique_id'] = aggregate_jobs['input'].apply(
+ lambda x: "fid" if "nepal" in x else "fs_uid"
+)
+
+aggregate_jobs['climate_handler_var'] = aggregate_jobs['variables_short'].apply(
+ lambda x: "accum" if x == "tp" else "instant"
+)
+```
+
+
+
+``` python
+aggregate_jobs
+```
+
+Now we add this to the data catalog:
+
+
+Exported source
+
+``` python
+data_catalog['aggregate']['jobs'].add("jobs_df", aggregate_jobs)
+```
+
+
+
+Our data catalog now has an `aggregate|jobs` node with a `jobs_df` entry
+that contains the dataframe of all the jobs to be run in this task.
+
+``` python
+data_catalog['aggregate']['jobs']['jobs_df'].load().head()
+```
diff --git a/_docs/20_pytask_logger.html b/_docs/20_pytask_logger.html
new file mode 100644
index 0000000..bb3016d
--- /dev/null
+++ b/_docs/20_pytask_logger.html
@@ -0,0 +1,734 @@
+
+
+
+
+
+
+
+
+
+Logging: A simple logger to inject into pytask jobs – era5_sandbox
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
This module downloads the raw era5 data from the CDS API. It is similar to the original script, refactored for pytask.
+
+
+
We’re going to quickly refactor the pipeline to use pytask instead of hydra and snakemake. This will hopefully demonstrate a simpler and more flexible way to manage data pipelines in Python.
+
To start off, we need to create a function that queries the CDS API with one job. This function will be used to download the data for each query in the range specified in the data catalog in the config file.
+
Let’s take a look at the data catalog we created in the config module:
+
You can see the queries entry we created in the data catalog. Each query is a row of a dataframe that contains the parameters for the CDS API query.
In this way, we have a similar approach as Hydra configs, but, using the pytask data catalog, we can more easily gather the data for a specific task in structured manner entirely in Python.
This works! So now we just need to create a task_ function that pytask will recognise to parallelise the download of queries over:
+
+
How this works (with some help from GPT):
+
+
🧠 How pytask Discovers and Executes Tasks
+
When you run pytask, it automatically scans your project for Python files named task_*.py. In these files, it looks for: - Functions decorated with @task, or - Functions prefixed with task_
+
These functions are not executed immediately. Instead, pytask: 1. Imports each task_*.py module (just like Python would) 2. Registers any matching task functions as nodes in a directed acyclic graph (DAG) 3. Resolves dependencies by analyzing: - Input annotations (e.g., Annotated[x, DependsOn]) - Output declarations (e.g., return values or Product annotations) 4. Builds the DAG, where each task function is a node 5. Executes the tasks, respecting dependency order and skipping up-to-date nodes
+
So even though the task functions aren’t explicitly “run” in the Python code itself, pytask knows how and when to execute them — based on their position in the DAG.
+
+
+
🔄 How This Differs from Snakemake
+
In snakemake, you’re expected to define a series of explicitly executable rules, often using shell commands or Python scripts. You “stitch together” rules using filenames and wildcard matching.
+
In contrast: - 🐍 pytask is Python-native — tasks are just regular Python functions - ⚙️ It builds a DAG from those functions and tracks inputs/outputs automatically - 🧱 You are declaring nodes, not scripting execution
+
Think of your Python files not as scripts to run, but as a way to define and wire together declarative tasks that will be executed by the pytask engine.
+
+
Because we defined this task in a function and loop, we can easily debug a node in the DAG by simply calling it:
+
+
task_download_raw_data()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/_docs/21_pytask_download.md b/_docs/21_pytask_download.md
new file mode 100644
index 0000000..6ff9682
--- /dev/null
+++ b/_docs/21_pytask_download.md
@@ -0,0 +1,110 @@
+# Download: `download` Module as a `pytask` Task
+
+
+## task_download
+
+> This module downloads the raw era5 data from the CDS API. It is
+> similar to the original script, refactored for `pytask`.
+
+
+
+We’re going to quickly refactor the pipeline to use pytask instead of
+hydra and snakemake. This will hopefully demonstrate a simpler and more
+flexible way to manage data pipelines in Python.
+
+To start off, we need to create a function that queries the CDS API with
+one job. This function will be used to download the data for each query
+in the range specified in the data catalog in the config file.
+
+Let’s take a look at the data catalog we created in the config module:
+
+You can see the queries entry we created in the data catalog. Each query
+is a row of a dataframe that contains the parameters for the CDS API
+query.
+
+``` python
+queries = data_catalog['download']['jobs']['queries_df'].load()
+queries
+```
+
+We can test this query like we did in the original work:
+
+``` python
+example_query = queries.iloc[0]
+
+create_bounding_box(example_query['shapefile'])
+```
+
+In this way, we have a similar approach as Hydra configs, but, using the
+`pytask` data catalog, we can more easily gather the data for a specific
+task in structured manner entirely in Python.
+
+``` python
+client = cdsapi.Client()
+
+ex_bounding_box = create_bounding_box(example_query['shapefile'])
+
+request = {
+ "product_type": example_query['product_type'],
+ "variable": example_query['variables'],
+ "year": str(example_query['year']),
+ "month": str(example_query['month']),
+ "day": example_query['day'],
+ "time": example_query['time'],
+ "data_format": "netcdf",
+ "download_format": "unarchived",
+ "area": ex_bounding_box
+ }
+
+target = f"{example_query['output']}.nc"
+
+client.retrieve("reanalysis-era5-single-levels", request).download(target)
+```
+
+This works! So now we just need to create a `task_` function that pytask
+will recognise to parallelise the download of queries over:
+
+### How this works (with some help from GPT):
+
+#### 🧠 How pytask Discovers and Executes Tasks
+
+When you run pytask, it automatically scans your project for Python
+files named `task_*.py`. In these files, it looks for: - Functions
+decorated with `@task`, or - Functions prefixed with `task_`
+
+These functions are not executed immediately. Instead, `pytask`: 1.
+Imports each task\_\*.py module (just like Python would) 2. Registers
+any matching task functions as nodes in a directed acyclic graph (DAG)
+3. Resolves dependencies by analyzing: - Input annotations (e.g.,
+`Annotated[x, DependsOn]`) - Output declarations (e.g., `return` values
+or `Product` annotations) 4. Builds the DAG, where each task function is
+a node 5. Executes the tasks, respecting dependency order and skipping
+up-to-date nodes
+
+So even though the task functions aren’t explicitly “run” in the Python
+code itself, pytask knows how and when to execute them — based on their
+position in the DAG.
+
+#### 🔄 How This Differs from Snakemake
+
+In `snakemake`, you’re expected to define a series of explicitly
+executable rules, often using shell commands or Python scripts. You
+“stitch together” rules using filenames and wildcard matching.
+
+In contrast: - 🐍 pytask is Python-native — tasks are just regular
+Python functions - ⚙️ It builds a DAG from those functions and tracks
+inputs/outputs automatically - 🧱 You are declaring nodes, not scripting
+execution
+
+Think of your Python files not as scripts to run, but as a way to define
+and wire together declarative tasks that will be executed by the pytask
+engine.
+
+------------------------------------------------------------------------
+
+Because we defined this task in a function and loop, we can easily debug
+a node in the DAG by simply calling it:
+
+``` python
+task_download_raw_data()
+```
diff --git a/_docs/22_pytask_aggregate.html b/_docs/22_pytask_aggregate.html
new file mode 100644
index 0000000..221b794
--- /dev/null
+++ b/_docs/22_pytask_aggregate.html
@@ -0,0 +1,1189 @@
+
+
+
+
+
+
+
+
+
+Aggregation: The aggregation Module as a pytask Task – era5_sandbox
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Aggregation: The aggregation Module as a pytask Task
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
task_aggregate
+
+
This task aggregates the downloaded data into spatial and temporal averages. It uses xarray to compute summary statistics over the specified time period and spatial region. The aggregation is done diurnally, so we will fetch the sundown and sunrise times for the region and use them to compute the diurnal averages.
+
+
+
+
Diurnal Classification Based on Sun Position
+
To do diurnal classificaiton, we will need to fetch the sundown and sunrise times for the region and use them to compute the diurnal averages. We will use the astral library to get the sunrise and sunset times for the specified latitude and longitude. The aggregation will be done using xarray, which allows us to compute the mean over the specified time period and spatial region.
from astral import Observer, sun
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+import random
+import datetime
+from pytz import UTC
+
+
+
+
# get the location of a datapoint in the dataset
+lat, long= ds.coords["latitude"].values[0], ds.coords["longitude"].values[0]
+time = ds['valid_time'].values[0]
+dt = pd.to_datetime(time, utc=True)
#fetch a random time from valid_time
+options = ds['valid_time'].values
+
+random_time = random.choice(options)
+dt = pd.to_datetime(random_time, utc=True)
+sun_info = sun.sun(observer, date=dt)
+if dt < sun_info['sunrise']:
+print(f"Randomly selected time: {dt} is pre_dawn")
+elif dt >= sun_info['sunrise'] and dt < sun_info['sunset']:
+print(f"Randomly selected time: {dt} is day")
+else:
+print(f"Randomly selected time: {dt} is post_dusk")
+
+
This tells us that we can use the valid time for the specific location of each data point in the query and know based on the sun whether it was daytime or nighttime. The runtime will be limited only by the looping. Let’s put this in a function so that we can use the resampling in xarray.
+
The resampling approach will be a single function that can resample in three ways:
+
+
By calendar date, default (1 value per calendar date)
+
By diurnal class by calendar date (3 values, pre-dawn, day, post-dusk)
+
By solar date (2 values per calendar date, with night classified as “before” or “after”)
+
+
Therefore, we’ll need 2 internal functions; one to do diurnal, and one to do solar date bins.
+
Essentially, we are going to create an array-shaped index/mask, (time, latitude, longitude). As a demonstration, this loop goes through the first 24 time points in the dataset, and calculates the sun info for each latitude and longitude, assigning the values to an array:
+
+
times = ds['valid_time'].values[:24]
+lats = ds.coords['latitude'].values
+lons = ds.coords['longitude'].values
+
+result = np.full((len(times), len(lats), len(lons)), "", dtype=object)
+
+for i, dt inenumerate(times):
+
+for j, lat inenumerate(lats):
+
+for k, lon inenumerate(lons):
+
+# set the geographical position
+ observer = Observer(latitude=lat, longitude=lon, elevation=0)
+
+# use the time
+ dt = pd.to_datetime(dt, utc=True)
+
+# where/when is the sun at this time for this position
+ sun_info = sun.sun(observer, date=dt)
+ result[i, j, k] = sun_info
+
+
So we know that in the first hour, the sun goes up and comes down at slightly different times based on latitude and longitude. Take the first hour, for example:
+
+
print(result.shape)
+hour_1 =0# 0th index of the results
+
+min_lat =0
+min_lon =0
+max_lat =48
+max_lon =90
+print(f"Even though the reading came from the first HOUR of data UTC, the sun info at the minimum latitude/longitude is: {result[hour_1, min_lat, min_lon]}")
+
+print(f"this is different from the sun info at the maximum latitude/longitude is: {result[hour_1, max_lat, max_lon]}")
Compute the diurnal value for each data point in the dataset. This function iterates over each data point in the dataset, calculates the sunrise and sunset times for the given time, latitude and longitude, and returns whether or not that data point is before dawn, during the day, or after dusk.
+
+
ex=compute_diurnal_class_bins(ds)
+
+
So, for our 720 time points, we should find that if we take the set() of all the classifications within that slice, there should be a few of them with 2 classes. In other words, at any given hour, almost all of the readings are “day”, because it is daytime across all of Madagascar, but at certain timepoints, the sun is rising or setting in the northern part of the country and so some portion of the slice is classified differently:
+
+
+
+illustrated
+
+
+
+
for x inrange(720):
+print(set(ex[x].flatten()))
+
+
This works! Now we can do a similar, but slightly more complicated function to define “night” and “day”, where “night” includes all of the values after the sun goes down.
Compute the diurnal value for each data point in the dataset. This function iterates over each data point in the dataset, calculates the sunrise and sunset times for the given time, latitude and longitude, and returns whether or not that data point is daytime or nighttime. The definition of “nighttime” can be set to be all the darkness before the sun came up (before), or all the darkness after it went down (after).
+
+
+Exported source
+
def compute_solar_day_night_class_bins(
+ ds: xr.Dataset,
+ night_direction: Literal["before", "after"],
+ )->list:
+"""
+ Compute the diurnal value for each data point in the dataset.
+ This function iterates over each data point in the dataset,
+ calculates the sunrise and sunset times for the given time, latitude and longitude,
+ and returns whether or not that data point is daytime or nighttime.
+ The definition of "nighttime" can be set to be all the darkness before the sun
+ came up (before), or all the darkness after it went down (after).
+ """
+
+ times = ds['valid_time'].values
+ lats = ds.coords['latitude'].values
+ lons = ds.coords['longitude'].values
+
+ result = np.full((len(times), len(lats), len(lons)), "", dtype=object)
+ datetimes = np.full((len(times), len(lats), len(lons)), "", dtype=object)
+
+for i, dt inenumerate(tqdm(times, desc="Classifying data points by sun position")):
+# use the time
+ dt = pd.to_datetime(dt, utc=True)
+
+for j, lat inenumerate(lats):
+
+for k, lon inenumerate(lons):
+
+# set the geographical position
+ observer = Observer(latitude=lat, longitude=lon, elevation=0)
+if night_direction =="before":
+# Night is from previous sunset to today's sunrise
+ sun_today = sun.sun(observer, date=dt.date())
+ sun_prev = sun.sun(observer, date=(dt - pd.Timedelta(days=1)).date())
+ night_start = sun_prev["sunset"].astimezone(pd.Timestamp.utcnow().tz)
+ night_end = sun_today["sunrise"].astimezone(pd.Timestamp.utcnow().tz)
+
+# the reading is from yesterday's nighttime
+if night_start <= dt < night_end:
+ result[i, j, k] ="night"
+# the date counts as today
+ datetimes[i, j, k] = dt.date()
+
+# the reading is from daytime
+elif sun_today["sunrise"] <= dt < sun_today["sunset"]:
+ result[i, j, k] ="day"
+# the date counts as today
+ datetimes[i, j, k] = dt.date()
+
+# the reading is from today's nighttime, but counts as tomorrow's night
+else:
+ result[i, j, k] ="night"
+# the date is tomorrow
+ datetimes[i, j, k] = (dt + pd.Timedelta(days=1)).date()
+
+elif night_direction =="after":
+# Night is from today's sunset to next sunrise
+ sun_today = sun.sun(observer, date=dt.date())
+ sun_next = sun.sun(observer, date=(dt + pd.Timedelta(days=1)).date())
+ night_start = sun_today["sunset"].astimezone(pd.Timestamp.utcnow().tz)
+ night_end = sun_next["sunrise"].astimezone(pd.Timestamp.utcnow().tz)
+
+# the reading is from daytime
+if sun_today["sunrise"] <= dt < sun_today["sunset"]:
+ result[i, j, k] ="day"
+# the date counts as today
+ datetimes[i, j, k] = dt.date()
+# the reading is from tonight
+elif night_start <= dt < night_end:
+ result[i, j, k] ="night"
+# the date counts as today
+ datetimes[i, j, k] = dt.date()
+
+# the reading is from yesterday night
+else:
+# the date counts as yesterday
+ result[i, j, k] ="day"
+ datetimes[i, j, k] = (dt - pd.Timedelta(days=1)).date()
+else:
+raiseValueError(f"Invalid night_direction: {night_direction}")
+
+return result, datetimes
As before, we should see that most slices are homogenous, meaning most of the time, all the readings are from the day, but some slices should have day and night values:
+
+
for slice_ inrange(720):
+print(set(ex_class[slice_].flatten()))
+
+
The returned array can serve as new “variable indexes” for the dataset:
Looks great! These two rasters represent one calendar day of daytime and nighttime values.
+
+
Testing Polygon to Raster Cells & Healthshed Aggregation
+
The penultimate step of the aggregate pipeline in the original version is assigning each datapoint to the respective healthshed. The vectors argument comes from the healthshed, and represents each geographic polygon on the ground that we want to aggregate data to.
res_poly2cell=polygon_to_raster_cells(
+ vectors = healthsheds.geometry.values, # the geometries of the shapefile of the regions
+ raster=raster_day.data, # the raster data above
+ nodata=np.nan, # any intersections with no data, may have to be np.nan
+ affine=raster_day.transform, # some math thing need to revise
+ all_touched=True,
+ verbose=True
+)
+
+
This works fine. Finally, we aggregate to healthsheds:
+
+
from era5_sandbox.aggregate import aggregate_to_healthsheds
Below shows the result of aggregating the daytime dewpoint temperature to the healthshed level:
+
+
result_day
+
+
+
result_night
+
+
So from one input, we will have two outputs, one for daytime and one for nighttime, and this will have to loop over the bands (ie each day in the month).
+
+
+
+
+
Putting it all together in a pytask task
+
Below we define our pytask task to aggregate data to the healthshed level.
+
+
+Exported source
+
job_rows = data_catalog['aggregate']['jobs']['jobs_df'].load()
+
+aggregation_funcs = {
+"mean": np.nanmean,
+"sum": np.nansum,
+"max": np.nanmax,
+"min": np.nanmin
+}
+
+for i, job in job_rows.iterrows():
+#print(f"Job {i+1}: variable={job['variables']}, time={job['time']}, aggregation={job['aggregation_name']}")
+
+# parse the row into function parameters
+ input_file = data_catalog['download']['outputs'][job['input']]
+ solar_classification = job['solar_classification']
+ variable = job['variables_short']
+ time = job['time']
+ aggregation_func = aggregation_funcs[job['aggregation_name']]
+ aggregation_name = job['aggregation_name']
+
+ climate_handler_var = job['climate_handler_var']
+ local_tz = job['local_tz']
+
+ shapefile = job['shapefile']
+ hshd_unique_id = job['hshd_unique_id']
+
+ output_file = job['input'] +"_"+ job['time'] +"_"+ job['variables_short'] +"_"+ job['aggregation_name'] +".parquet"
+
+@task(id=output_file, name=f"Aggregate {output_file}", after="task_download_raw_data")
+def task_aggregate_data_diurnal(
+ input_file: Path = data_catalog['download']['outputs'][job['input']], # input data Path from the download task
+ aggregation_func: callable= aggregation_func, # the aggregation function
+ aggregation_name: str= aggregation_name, # the name of the aggregation function
+ time: Literal["day", "night"] = time, # whether to aggregate by day or night
+ night_direction: Literal["before", "after"] = solar_classification, # how to define night
+ variable: str= variable, # the variable to aggregate,
+ climate_handler_var: Literal["instant", "accum"] = climate_handler_var, # whether the variable is instant or accum,
+ local_tz: str= local_tz, # the local timezone for resampling
+ shapefile: str= shapefile, # the shapefile for the healthsheds,
+ hshd_unique_id: str= hshd_unique_id, # the unique id column in the shapefile,
+ output_file: str= output_file # the output file name
+ ) -> Annotated[Path, data_catalog['aggregate']['outputs'][output_file]]:
+"""
+ Task to aggregate data from a CDSAPI Query to the healthshed
+ level. Returns path to parquet file with aggregated data.
+ """
+
+ logger = setup_logger(output_file)
+
+ logger.info(f"Aggregating: {output_file}")
+
+# check if the string path exists
+# if os.path.exists(output_file):
+# logger.info(f"File {output_file} already exists. Skipping aggregation.")
+# return output_file
+
+# get input data
+ logger.info("Reading input data...")
+with ClimateDataFileHandler(input_file) as handler:
+ ds = xr.open_dataset(handler.get_dataset('instant'))
+
+#get the healthshed shapefile
+ logger.info(f"Reading healthshed shapefile from yaml {here()}...")
+withopen(here() /"conf"/"config.yaml") as f:
+ healthshed_config = yaml.safe_load(f)
+
+ key_path = here() / healthshed_config['GOOGLE_DRIVE_AUTH_JSON']['path']
+
+ driver = GoogleDriver(json_key_path=key_path)
+ drive = driver.get_drive()
+ healthsheds = driver.read_healthsheds(shapefile)
+
+# compute the diurnal classification bins
+ logger.info("Computing diurnal classification bins...")
+ class_bins, class_dts = compute_solar_day_night_class_bins(ds, night_direction)
+
+ ds_masked = ds.copy()
+
+# assign classifications
+ logger.info("Assigning classification bins to dataset...")
+ ds['solar_class'] = (('valid_time', 'latitude', 'longitude'), class_bins)
+ ds["solar_date"] = (("valid_time", "latitude", "longitude"), class_dts)
+
+# mask the dataset to the requested time
+ mask = ds["solar_class"] == time
+ ds_masked = ds_masked.where(mask)
+
+# set the local timezone
+ ds_masked = ds_masked.assign_coords(valid_time=pd.to_datetime(ds["valid_time"].values).tz_localize("UTC").tz_convert(local_tz))
+
+# resample by local date
+ logger.info("Resampling by local date...")
+ ds_rs = ds_masked.resample(valid_time="1D").reduce(aggregation_func)
+
+# convert to tiff
+ logger.info("Rasterizing resampled data...")
+ n_bands = ds_rs.dims['valid_time']
+
+# polygon to raster cells for the first band
+ logger.info("Converting polygons to raster cells...")
+ raster = netcdf_to_tiff(ds_rs, band=1, variable=variable)
+ res_poly2cell=polygon_to_raster_cells(
+ vectors = healthsheds.geometry.values, # the geometries of the shapefile of the regions
+ raster=raster.data, # the raster data above
+ nodata=np.nan, # any intersections with no data, may have to be np.nan
+ affine=raster.transform, # some math thing need to revise
+ all_touched=True,
+ verbose=True
+ )
+
+ result_df = healthsheds[[hshd_unique_id, "geometry"]].copy()
+
+# loop over bands and aggregate to healthsheds
+for band in tqdm(range(1, n_bands +1)):
+ logger.info(f"Processing band {band} of {n_bands}...")
+
+ day = band # band is 1-indexed
+
+ day_col =f"day_{day:02d}"
+
+# calculate raster for this band
+ raster = netcdf_to_tiff(ds_rs, band=band, variable=variable)
+
+# aggregate to healthsheds
+ result = aggregate_to_healthsheds(
+ res_poly2cell=res_poly2cell,
+ raster=raster,
+ shapes=healthsheds,
+ names_column=hshd_unique_id,
+ aggregation_func=aggregation_func,
+ aggregation_name=variable
+ )
+
+# add band to result dataframe
+ result_df[day_col] = result[variable]
+
+# save to parquet
+ result_df.to_parquet(f"{BLD}/{output_file}")
+
+ logger.info("Aggregation complete.")
+
+return Path(f"{BLD}/{output_file}")
+
+
+
That should wrap it up! To test, we can run a single job:
+
+
# runs the last defined job only
+task_aggregate_data_diurnal()
This package documents the development and implementation of functions and code for the Madagascar ERA5 dataset project. The goal is for exposure data to be made available at the daily resolution when possible. Finer resolutions shouldn’t ever be needed for our purposes, and it should then be relatively easy to aggregate at coarser resolutions, such as weekly or monthly. Additionally, we’ve extended this work to Nepal as well.
+
Variables should generally be made available from 2010 onward, as that’s where our clinic data starts.
+
All data are ideally made available at the “healthshed” geographical level. Healthsheds are defined as geographical areas where people who live all go to the same clinic. There are a total of ~2700 public clinics in Madagascar, hence ~2700 healthsheds, with each healthshed containing ~10000 people on average.
+
Preliminary list of environmental variables
+
+
+
+
+
+
+
Variables from other sources:
+
+
+
+
+
+
+
+
+
+
+
+
+
Those from the ERA5 dataset will be housed here, but we may likely develop a separate repository for the other datasets.
+
+
+
Developer Guide
+
This package is built and maintained with nbdev. If you are new to using nbdev here are some useful pointers to get you started.
+
+
Install era5_sandbox in Development mode
+
# make sure era5_sandbox package is installed in development mode
+$ pip install -e .
+
To make changes, go to the “notes” directory and edit the notebooks as necessary. Each notebook refers to a module in the era5_sandbox package. Cells are exported to the module when the notebook is saved and you run the following command:
+
$ nbdev_export
+
For e.g., to change functionality of the testAPI() function in the testAPI Hydra rule, you would edit the testAPI notebook in the notes directory notes/testAPI.ipynb, and then save that notebook and run nbdev_export to update the core module in the package.
+
+
+
How to Run the Pipeline
+
The pipeline downloads ERA5 variables for a given date range and geographical bounding box. You can learn how each of these steps was by following the notebooks in notes in numerical order.
+
+
+
+
+
+
+Important
+
+
+
+
The pipeline has two implementations: one using snakemake and hydra, and another using pytask. The pytask implementation is the more recent one, and is recommended for future use. The snakemake implementation is left here for reference to legacy code.
+
+
+
+
Using pytask
+
To run the pipeline, the pytask config at note/20_pytask_config.qmd should be reviewed and updated if necessary. The pipeline can then be run with the following command:
+
$ sbatch pytask.sbatch
+
+
+
Using snakemake and hydra
+
To run the pipeline, the config at config/config.yaml should be updated with the desired date range and geographical bounding box. The pipeline can then be run with the following command:
+
sbatch snakemake.sbatch
+
+
+
+
What Does the Pipeline Produce?
+
Using pytask’s data catalog, you can investigate the downloaded raw data with python, eg.:
European Centre for Medium-Range Weather Forecasts
GRIB_subCentre :
0
Conventions :
CF-1.7
institution :
European Centre for Medium-Range Weather Forecasts
history :
2025-09-16T20:55 GRIB to CDM+CF via cfgrib-0.9.15.0/ecCodes-2.42.0 with {"source": "data.grib", "filter_by_keys": {}, "encode_cf": ["parameter", "time", "geography", "vertical"]}
+
+
+
And plot it with cartopy, eg.:
+
+
import matplotlib.pyplot as plt
+import cartopy.crs as ccrs
+import cartopy.feature as cfeature
+
+temperature = ds["t2m"]
+
+# Select a specific time step
+temperature_at_time = temperature.isel(valid_time=0)
+
+# Plot the data on a map
+plt.figure(figsize=(12, 8))
+ax = plt.axes(projection=ccrs.PlateCarree())
+temperature_at_time.plot(ax=ax, cmap="coolwarm", transform=ccrs.PlateCarree(), cbar_kwargs={"label": "Temperature (K)"})
+ax.coastlines()
+ax.add_feature(cfeature.BORDERS, linestyle=":")
+ax.set_title("Temperature at Time Step 0")
+plt.show()
+
+
+
+
+
+
+
+
+
You can also load the aggregated data:
+
+
import pandas as pd
+import geopandas as gpd
+from era5_sandbox.config import data_catalog
+
+ex_agg_path = data_catalog['aggregate']['outputs']['2019_08_madagascar_night_d2m_max.parquet'].load()
+
+gpd.read_parquet(ex_agg_path).describe()
+
+
+
+
+
+
+
+
+
day_01
+
day_02
+
day_03
+
day_04
+
day_05
+
day_06
+
day_07
+
day_08
+
day_09
+
day_10
+
day_11
+
day_12
+
day_13
+
day_14
+
day_15
+
day_16
+
day_17
+
day_18
+
day_19
+
day_20
+
day_21
+
day_22
+
day_23
+
day_24
+
day_25
+
day_26
+
day_27
+
day_28
+
day_29
+
day_30
+
day_31
+
day_32
+
+
+
+
+
count
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
+
+
mean
+
290.493048
+
290.145274
+
288.953153
+
288.503714
+
288.439820
+
288.304426
+
286.940995
+
287.186512
+
287.453656
+
287.843029
+
288.301938
+
288.778014
+
288.813762
+
288.667253
+
288.796892
+
288.547945
+
288.197632
+
287.882440
+
287.659818
+
289.291587
+
289.911503
+
288.760939
+
288.257644
+
288.271450
+
287.746390
+
288.379399
+
288.504720
+
287.665699
+
288.149861
+
288.266861
+
288.644028
+
288.224829
+
+
+
std
+
2.616922
+
2.832083
+
3.215642
+
3.566019
+
4.401416
+
4.198817
+
5.235795
+
4.444031
+
4.346305
+
3.435444
+
2.735781
+
2.864494
+
2.841268
+
3.080593
+
3.306217
+
2.938165
+
3.018303
+
2.849850
+
2.817690
+
2.600946
+
2.584079
+
3.161855
+
3.171827
+
2.983778
+
3.223380
+
2.918867
+
2.844314
+
3.052635
+
3.077292
+
3.093706
+
3.335983
+
3.296264
+
+
+
min
+
284.295898
+
281.673340
+
280.566406
+
280.509521
+
277.348145
+
279.243164
+
274.955078
+
274.682129
+
275.397461
+
279.498291
+
282.339111
+
282.188721
+
282.470703
+
281.371582
+
280.724609
+
280.093506
+
280.849121
+
281.123535
+
281.952148
+
282.186768
+
284.168945
+
282.519287
+
282.015381
+
280.578857
+
281.183838
+
281.146973
+
281.977539
+
281.014648
+
280.787842
+
281.631348
+
281.349854
+
280.615967
+
+
+
25%
+
288.031494
+
287.739014
+
286.978271
+
285.750488
+
284.326904
+
284.071289
+
281.695068
+
283.710449
+
284.153076
+
285.459717
+
286.141846
+
286.444092
+
286.505859
+
286.104004
+
286.114014
+
286.730225
+
286.005371
+
285.420166
+
285.230713
+
287.408203
+
287.744873
+
286.101318
+
285.243652
+
285.488281
+
285.170166
+
285.876465
+
286.145508
+
285.243164
+
285.579346
+
285.322754
+
285.930908
+
285.565186
+
+
+
50%
+
290.674316
+
290.331543
+
288.916260
+
288.472168
+
289.635742
+
289.390381
+
288.382568
+
287.926758
+
288.173096
+
287.859375
+
287.797852
+
288.716064
+
288.806641
+
288.789307
+
289.210938
+
288.769287
+
288.085205
+
287.698975
+
287.252930
+
289.310547
+
289.878418
+
288.511719
+
288.420166
+
288.263916
+
287.717041
+
288.661621
+
288.999023
+
287.485107
+
288.326416
+
288.429199
+
288.576416
+
288.093018
+
+
+
75%
+
292.828369
+
292.707764
+
291.609375
+
291.655762
+
291.987305
+
291.845459
+
291.671631
+
291.051758
+
291.288574
+
291.000244
+
290.813721
+
291.365967
+
291.540039
+
291.393799
+
291.756592
+
291.094727
+
290.893311
+
290.266602
+
290.166748
+
291.649902
+
291.970459
+
291.342285
+
290.443848
+
290.660400
+
290.400146
+
290.360840
+
290.854004
+
290.328125
+
290.827881
+
290.999268
+
291.598877
+
291.072754
+
+
+
max
+
296.467285
+
295.717529
+
295.837158
+
295.693604
+
295.723389
+
296.195557
+
295.589600
+
295.345703
+
294.754639
+
294.483154
+
294.952148
+
294.815430
+
294.623779
+
295.088135
+
295.036621
+
294.847900
+
294.224609
+
294.522949
+
294.728760
+
295.268066
+
295.507324
+
295.797363
+
296.297119
+
296.222900
+
295.492432
+
295.406006
+
294.629883
+
295.211670
+
295.363037
+
295.263184
+
295.446533
+
295.408691
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/_docs/index.md b/_docs/index.md
new file mode 100644
index 0000000..97efafe
--- /dev/null
+++ b/_docs/index.md
@@ -0,0 +1,928 @@
+# The ERA5 Spatial Aggregation Pipeline
+
+
+
+
+``` python
+from era5_sandbox.core import *
+```
+
+## era5_sandbox
+
+> Sandbox environment for era5 development
+
+This package documents the development and implementation of functions
+and code for the Madagascar ERA5 dataset project. The goal is for
+exposure data to be made available at the daily resolution when
+possible. Finer resolutions shouldn’t ever be needed for our purposes,
+and it should then be relatively easy to aggregate at coarser
+resolutions, such as weekly or monthly. Additionally, we’ve extended
+this work to Nepal as well.
+
+Variables should generally be made available from 2010 onward, as that’s
+where our clinic data starts.
+
+All data are ideally made available at the “healthshed” geographical
+level. Healthsheds are defined as geographical areas where people who
+live all go to the same clinic. There are a total of ~2700 public
+clinics in Madagascar, hence ~2700 healthsheds, with each healthshed
+containing ~10000 people on average.
+
+Preliminary list of environmental variables
+
+- ☒ 2-m air temperature from ERA5: daily min, max, mean
+
+- ☒ 2-m air dew point temperature from ERA5: daily min, max, mean
+
+- ☒ Precipitation: daily total (ERA5)
+
+- ☒ Soil moisture: daily average (ERA5)
+
+Variables from other sources:
+
+- ☐ Sea surface temperature: daily average and maximum in the nearest
+ neighbor for each healthshed.
+
+- ☐ Precipitation: daily total (CHIRPS)
+
+- ☐ Chlorophyll-A (Giacomo)
+
+- ☐ Wealth index: Available from Giacomo
+
+- ☐ NDVI
+
+- ☐ Tropical storm
+
+- ☐ Flooding
+
+- ☐ Deforestation
+
+- ☐ Linking/segmenting healthsheds into climate zones and other
+
+- ☐ Relative humidity: daily average (lower priority)
+
+Those from the ERA5 dataset will be housed here, but we may likely
+develop a separate repository for the other datasets.
+
+## Developer Guide
+
+This package is built and maintained with `nbdev`. If you are new to
+using `nbdev` here are some useful pointers to get you started.
+
+### Install era5_sandbox in Development mode
+
+``` sh
+# make sure era5_sandbox package is installed in development mode
+$ pip install -e .
+```
+
+To make changes, go to the “notes” directory and edit the notebooks as
+necessary. Each notebook refers to a module in the era5_sandbox package.
+Cells are exported to the module when the notebook is saved and you run
+the following command:
+
+``` sh
+$ nbdev_export
+```
+
+For e.g., to change functionality of the
+[`testAPI()`](https://TinasheMTapera.github.io/era5_sandbox/core.html#testapi)
+function in the testAPI Hydra rule, you would edit the
+[`testAPI`](https://TinasheMTapera.github.io/era5_sandbox/core.html#testapi)
+notebook in the `notes` directory `notes/testAPI.ipynb`, and then save
+that notebook and run `nbdev_export` to update the `core` module in the
+package.
+
+### How to Run the Pipeline
+
+The pipeline downloads ERA5 variables for a given date range and
+geographical bounding box. You can learn how each of these steps was by
+following the notebooks in `notes` in numerical order.
+
+
+
+> **Important**
+>
+> The pipeline has two implementations: one using `snakemake` and
+> `hydra`, and another using `pytask`. The `pytask` implementation is
+> the more recent one, and is recommended for future use. The
+> `snakemake` implementation is left here for reference to legacy code.
+
+
+
+#### Using `pytask`
+
+To run the pipeline, the `pytask` config at `note/20_pytask_config.qmd`
+should be reviewed and updated if necessary. The pipeline can then be
+run with the following command:
+
+``` sh
+$ sbatch pytask.sbatch
+```
+
+#### Using `snakemake` and `hydra`
+
+To run the pipeline, the config at `config/config.yaml` should be
+updated with the desired date range and geographical bounding box. The
+pipeline can then be run with the following command:
+
+``` sh
+sbatch snakemake.sbatch
+```
+
+### What Does the Pipeline Produce?
+
+Using `pytask`’s data catalog, you can investigate the downloaded raw
+data with python, eg.:
+
+``` python
+import xarray as xr
+from era5_sandbox.config import data_catalog
+from era5_sandbox.core import ClimateDataFileHandler
+
+ex_nc = list(data_catalog['download']['outputs']._entries).pop()
+ex_nc_path = data_catalog['download']['outputs'][ex_nc].load()
+
+with ClimateDataFileHandler(ex_nc_path) as handler:
+ ds = xr.open_dataset(handler.get_dataset("instant"))
+
+ds
+```
+
+
European Centre for Medium-Range Weather Forecasts
GRIB_subCentre :
0
Conventions :
CF-1.7
institution :
European Centre for Medium-Range Weather Forecasts
history :
2025-09-16T20:55 GRIB to CDM+CF via cfgrib-0.9.15.0/ecCodes-2.42.0 with {"source": "data.grib", "filter_by_keys": {}, "encode_cf": ["parameter", "time", "geography", "vertical"]}
+```
+
+:::
+:::
+
+
+And plot it with cartopy, eg.:
+
+::: {#cell-6 .cell exec_doc='null'}
+``` {.python .cell-code}
+import matplotlib.pyplot as plt
+import cartopy.crs as ccrs
+import cartopy.feature as cfeature
+
+temperature = ds["t2m"]
+
+# Select a specific time step
+temperature_at_time = temperature.isel(valid_time=0)
+
+# Plot the data on a map
+plt.figure(figsize=(12, 8))
+ax = plt.axes(projection=ccrs.PlateCarree())
+temperature_at_time.plot(ax=ax, cmap="coolwarm", transform=ccrs.PlateCarree(), cbar_kwargs={"label": "Temperature (K)"})
+ax.coastlines()
+ax.add_feature(cfeature.BORDERS, linestyle=":")
+ax.set_title("Temperature at Time Step 0")
+plt.show()
+```
+
+::: {.cell-output .cell-output-display}
+{width=897 height=640}
+:::
+:::
+
+
+You can also load the aggregated data:
+
+::: {#cell-8 .cell exec_doc='null'}
+``` {.python .cell-code}
+import pandas as pd
+import geopandas as gpd
+from era5_sandbox.config import data_catalog
+
+ex_agg_path = data_catalog['aggregate']['outputs']['2019_08_madagascar_night_d2m_max.parquet'].load()
+
+gpd.read_parquet(ex_agg_path).describe()
+```
+
+::: {.cell-output .cell-output-display}
+
+```{=html}
+
+
+
+
+
+
+
day_01
+
day_02
+
day_03
+
day_04
+
day_05
+
day_06
+
day_07
+
day_08
+
day_09
+
day_10
+
day_11
+
day_12
+
day_13
+
day_14
+
day_15
+
day_16
+
day_17
+
day_18
+
day_19
+
day_20
+
day_21
+
day_22
+
day_23
+
day_24
+
day_25
+
day_26
+
day_27
+
day_28
+
day_29
+
day_30
+
day_31
+
day_32
+
+
+
+
+
count
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
2701.000000
+
+
+
mean
+
290.493048
+
290.145274
+
288.953153
+
288.503714
+
288.439820
+
288.304426
+
286.940995
+
287.186512
+
287.453656
+
287.843029
+
288.301938
+
288.778014
+
288.813762
+
288.667253
+
288.796892
+
288.547945
+
288.197632
+
287.882440
+
287.659818
+
289.291587
+
289.911503
+
288.760939
+
288.257644
+
288.271450
+
287.746390
+
288.379399
+
288.504720
+
287.665699
+
288.149861
+
288.266861
+
288.644028
+
288.224829
+
+
+
std
+
2.616922
+
2.832083
+
3.215642
+
3.566019
+
4.401416
+
4.198817
+
5.235795
+
4.444031
+
4.346305
+
3.435444
+
2.735781
+
2.864494
+
2.841268
+
3.080593
+
3.306217
+
2.938165
+
3.018303
+
2.849850
+
2.817690
+
2.600946
+
2.584079
+
3.161855
+
3.171827
+
2.983778
+
3.223380
+
2.918867
+
2.844314
+
3.052635
+
3.077292
+
3.093706
+
3.335983
+
3.296264
+
+
+
min
+
284.295898
+
281.673340
+
280.566406
+
280.509521
+
277.348145
+
279.243164
+
274.955078
+
274.682129
+
275.397461
+
279.498291
+
282.339111
+
282.188721
+
282.470703
+
281.371582
+
280.724609
+
280.093506
+
280.849121
+
281.123535
+
281.952148
+
282.186768
+
284.168945
+
282.519287
+
282.015381
+
280.578857
+
281.183838
+
281.146973
+
281.977539
+
281.014648
+
280.787842
+
281.631348
+
281.349854
+
280.615967
+
+
+
25%
+
288.031494
+
287.739014
+
286.978271
+
285.750488
+
284.326904
+
284.071289
+
281.695068
+
283.710449
+
284.153076
+
285.459717
+
286.141846
+
286.444092
+
286.505859
+
286.104004
+
286.114014
+
286.730225
+
286.005371
+
285.420166
+
285.230713
+
287.408203
+
287.744873
+
286.101318
+
285.243652
+
285.488281
+
285.170166
+
285.876465
+
286.145508
+
285.243164
+
285.579346
+
285.322754
+
285.930908
+
285.565186
+
+
+
50%
+
290.674316
+
290.331543
+
288.916260
+
288.472168
+
289.635742
+
289.390381
+
288.382568
+
287.926758
+
288.173096
+
287.859375
+
287.797852
+
288.716064
+
288.806641
+
288.789307
+
289.210938
+
288.769287
+
288.085205
+
287.698975
+
287.252930
+
289.310547
+
289.878418
+
288.511719
+
288.420166
+
288.263916
+
287.717041
+
288.661621
+
288.999023
+
287.485107
+
288.326416
+
288.429199
+
288.576416
+
288.093018
+
+
+
75%
+
292.828369
+
292.707764
+
291.609375
+
291.655762
+
291.987305
+
291.845459
+
291.671631
+
291.051758
+
291.288574
+
291.000244
+
290.813721
+
291.365967
+
291.540039
+
291.393799
+
291.756592
+
291.094727
+
290.893311
+
290.266602
+
290.166748
+
291.649902
+
291.970459
+
291.342285
+
290.443848
+
290.660400
+
290.400146
+
290.360840
+
290.854004
+
290.328125
+
290.827881
+
290.999268
+
291.598877
+
291.072754
+
+
+
max
+
296.467285
+
295.717529
+
295.837158
+
295.693604
+
295.723389
+
296.195557
+
295.589600
+
295.345703
+
294.754639
+
294.483154
+
294.952148
+
294.815430
+
294.623779
+
295.088135
+
295.036621
+
294.847900
+
294.224609
+
294.522949
+
294.728760
+
295.268066
+
295.507324
+
295.797363
+
296.297119
+
296.222900
+
295.492432
+
295.406006
+
294.629883
+
295.211670
+
295.363037
+
295.263184
+
295.446533
+
295.408691
+
+
+
+
+```
+
+:::
+:::
+
+
diff --git a/_proc/index.ipynb b/_proc/index.ipynb
index b608fd6..43c2671 100644
--- a/_proc/index.ipynb
+++ b/_proc/index.ipynb
@@ -1,16 +1,13 @@
{
"cells": [
{
- "cell_type": "raw",
+ "cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
- "description: Sandbox environment for era5 development\n",
- "output-file: index.html\n",
- "title: era5_sandbox\n",
- "\n",
- "---\n",
- "\n"
+ "title: \"The ERA5 Spatial Aggregation Pipeline\"\n",
+ "exec_all: true\n",
+ "---"
]
},
{
@@ -20,11 +17,27 @@
""
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "language": "python"
+ },
+ "outputs": [],
+ "source": [
+ "#| hide: null\n",
+ "from era5_sandbox.core import *"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "Here we are developing functions and code for the Madagascar ERA5 dataset project. The goal is for exposure data to be made available at the daily resolution when possible. Finer resolutions shouldn’t ever be needed for our purposes, and it should then be relatively easy to aggregate at coarser resolutions, such as weekly or monthly.\n",
+ "## era5_sandbox\n",
+ "\n",
+ "> Sandbox environment for era5 development\n",
+ "\n",
+ "This package documents the development and implementation of functions and code for the Madagascar ERA5 dataset project. The goal is for exposure data to be made available at the daily resolution when possible. Finer resolutions shouldn’t ever be needed for our purposes, and it should then be relatively easy to aggregate at coarser resolutions, such as weekly or monthly. Additionally, we've extended this work to Nepal as well.\n",
"\n",
"Variables should generally be made available from 2010 onward, as that’s where our clinic data starts.\n",
"\n",
@@ -32,11 +45,15 @@
"\n",
"Preliminary list of environmental variables\n",
"\n",
- "- [ ] 2-m air temperature from ERA5: daily min, max, mean\n",
+ "- [x] 2-m air temperature from ERA5: daily min, max, mean\n",
" \n",
- "- [ ] 2-m air dew point temperature from ERA5: daily min, max, mean\n",
+ "- [x] 2-m air dew point temperature from ERA5: daily min, max, mean\n",
"\n",
- "- [ ] Precipitation: daily total (ERA5)\n",
+ "- [x] Precipitation: daily total (ERA5)\n",
+ "\n",
+ "- [x] Soil moisture: daily average (ERA5)\n",
+ "\n",
+ "Variables from other sources:\n",
"\n",
"- [ ] Sea surface temperature: daily average and maximum in the nearest neighbor for each healthshed.\n",
"\n",
@@ -56,132 +73,615 @@
"\n",
"- [ ] Linking/segmenting healthsheds into climate zones and other \n",
"\n",
- "- [ ] Relative humidity: daily average (lower priority)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Developer Guide"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "If you are new to using `nbdev` here are some useful pointers to get you started."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Install era5_sandbox in Development mode"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
+ "- [ ] Relative humidity: daily average (lower priority)\n",
+ "\n",
+ "Those from the ERA5 dataset will be housed here, but we may likely develop a separate repository for the other datasets.\n",
+ "\n",
+ "## Developer Guide\n",
+ "\n",
+ "This package is built and maintained with `nbdev`. If you are new to using `nbdev` here are some useful pointers to get you started.\n",
+ "\n",
+ "### Install era5_sandbox in Development mode\n",
+ "\n",
"```sh\n",
"# make sure era5_sandbox package is installed in development mode\n",
"$ pip install -e .\n",
+ "```\n",
"\n",
- "# To make changes, go to the \"notes\" directory and edit the notebooks as necessary.\n",
- "# Each notebook refers to a module in the era5_sandbox package. Cells are exported to the module\n",
- "# when the notebook is saved and you run the following command:\n",
+ "To make changes, go to the \"notes\" directory and edit the notebooks as necessary.\n",
+ "Each notebook refers to a module in the era5_sandbox package. Cells are exported to the module\n",
+ "when the notebook is saved and you run the following command:\n",
"\n",
+ "```sh\n",
"$ nbdev_export\n",
"```\n",
"\n",
- "For e.g., to change functionality of the [`testAPI()`](https://TinasheMTapera.github.io/era5_sandbox/core.html#testapi) function in the testAPI Hydra rule, you would edit the [`testAPI`](https://TinasheMTapera.github.io/era5_sandbox/core.html#testapi) notebook in the `notes` directory `notes/testAPI.ipynb`, and then save that notebook and run `nbdev_export` to update the `core` module in the package."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Usage"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Installation"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Install latest from the GitHub [repository][repo]:\n",
+ "For e.g., to change functionality of the [`testAPI()`](https://TinasheMTapera.github.io/era5_sandbox/core.html#testapi) function in the testAPI Hydra rule, you would edit the [`testAPI`](https://TinasheMTapera.github.io/era5_sandbox/core.html#testapi) notebook in the `notes` directory `notes/testAPI.ipynb`, and then save that notebook and run `nbdev_export` to update the `core` module in the package.\n",
+ "\n",
+ "### How to Run the Pipeline\n",
+ "\n",
+ "The pipeline downloads ERA5 variables for a given date range and geographical bounding box. You can learn how each of these steps was by following the notebooks in `notes` in numerical order.\n",
+ "\n",
+ "::: {.callout-important}\n",
+ "The pipeline has two implementations: one using `snakemake` and `hydra`, and another using `pytask`. The `pytask` implementation is the more recent one, and is recommended for future use. The `snakemake` implementation is left here for reference to legacy code.\n",
+ ":::\n",
+ "\n",
+ "#### Using `pytask`\n",
+ "\n",
+ "To run the pipeline, the `pytask` config at `note/20_pytask_config.qmd` should be reviewed\n",
+ "and updated if necessary. The pipeline can then be run with the following command:\n",
"\n",
"```sh\n",
- "$ pip install git+https://github.com/NSAPH-Data-Processing/era5_sandbox\n",
+ "$ sbatch pytask.sbatch\n",
"```\n",
"\n",
- "or clone and install in development mode:\n",
+ "#### Using `snakemake` and `hydra`\n",
+ "\n",
+ "To run the pipeline, the config at `config/config.yaml` should be updated with the desired date range and geographical bounding box. The pipeline can then be run with the following command:\n",
"\n",
"```sh\n",
- "$ git clone https://github.com/NSAPH-Data-Processing/era5_sandbox\n",
- "$ pip install -e .\n",
+ "sbatch snakemake.sbatch\n",
"```\n",
"\n",
+ "### What Does the Pipeline Produce?\n",
"\n",
- "[repo]: https://github.com/NSAPH-Data-Processing/era5_sandbox"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Documentation"
+ "Using `pytask`'s data catalog, you can investigate the downloaded raw data with python, eg.:"
]
},
{
- "cell_type": "markdown",
- "metadata": {},
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "language": "python"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
European Centre for Medium-Range Weather Forecasts
GRIB_subCentre :
0
Conventions :
CF-1.7
institution :
European Centre for Medium-Range Weather Forecasts
history :
2025-09-16T20:55 GRIB to CDM+CF via cfgrib-0.9.15.0/ecCodes-2.42.0 with {"source": "data.grib", "filter_by_keys": {}, "encode_cf": ["parameter", "time", "geography", "vertical"]}
"
+ ],
+ "text/plain": [
+ " Size: 53MB\n",
+ "Dimensions: (valid_time: 744, latitude: 49, longitude: 91)\n",
+ "Coordinates:\n",
+ " number int64 8B ...\n",
+ " * valid_time (valid_time) datetime64[ns] 6kB 2024-03-01 ... 2024-03-31T23:...\n",
+ " * latitude (latitude) float64 392B 30.8 30.7 30.6 30.5 ... 26.2 26.1 26.0\n",
+ " * longitude (longitude) float64 728B 79.6 79.7 79.8 79.9 ... 88.4 88.5 88.6\n",
+ " expver (valid_time) "
+ ]
+ },
+ "metadata": {
+ "image/png": {
+ "height": 640,
+ "width": 897
+ }
+ },
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "The pipeline currently downloads ERA5 temperature and dew point temperature data for a given date range and geographical bounding box. You can learn each of these steps by following the notebooks in `notes` in numerical order.\n",
- "\n",
- "To run the pipeline, the config at `config/config.yaml` should be updated with the desired date range and geographical bounding box. The pipeline can then be run with the following command:\n",
- "\n",
- "```sh\n",
- "sbatch snakemake.sbatch\n",
- "```\n",
- "\n",
- "You can investigate the downloaded raw data with python, eg.:\n",
- "\n",
- "```python\n",
- "import xarray as xr\n",
+ "#| exec_doc: #\n",
"import matplotlib.pyplot as plt\n",
"import cartopy.crs as ccrs\n",
"import cartopy.feature as cfeature\n",
"\n",
- "### the path to any of the downloaded files\n",
- "file_path = \"/n/dominici_lab/lab/data_processing/csph-era5_sandbox/data/input/2010_01.nc\"\n",
- "data = xr.open_dataset(file_path)\n",
- "\n",
- "\n",
- "temperature = data[\"t2m\"]\n",
- "\n",
- "\n",
+ "temperature = ds[\"t2m\"]\n",
"\n",
"# Select a specific time step\n",
"temperature_at_time = temperature.isel(valid_time=0)\n",
@@ -193,8 +693,14 @@
"ax.coastlines()\n",
"ax.add_feature(cfeature.BORDERS, linestyle=\":\")\n",
"ax.set_title(\"Temperature at Time Step 0\")\n",
- "plt.show()\n",
- "```"
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can also load the aggregated data:"
]
},
{
@@ -206,8 +712,356 @@
"outputs": [
{
"data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
day_01
\n",
+ "
day_02
\n",
+ "
day_03
\n",
+ "
day_04
\n",
+ "
day_05
\n",
+ "
day_06
\n",
+ "
day_07
\n",
+ "
day_08
\n",
+ "
day_09
\n",
+ "
day_10
\n",
+ "
day_11
\n",
+ "
day_12
\n",
+ "
day_13
\n",
+ "
day_14
\n",
+ "
day_15
\n",
+ "
day_16
\n",
+ "
day_17
\n",
+ "
day_18
\n",
+ "
day_19
\n",
+ "
day_20
\n",
+ "
day_21
\n",
+ "
day_22
\n",
+ "
day_23
\n",
+ "
day_24
\n",
+ "
day_25
\n",
+ "
day_26
\n",
+ "
day_27
\n",
+ "
day_28
\n",
+ "
day_29
\n",
+ "
day_30
\n",
+ "
day_31
\n",
+ "
day_32
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "
\n",
+ "
count
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
2701.000000
\n",
+ "
\n",
+ "
\n",
+ "
mean
\n",
+ "
290.493048
\n",
+ "
290.145274
\n",
+ "
288.953153
\n",
+ "
288.503714
\n",
+ "
288.439820
\n",
+ "
288.304426
\n",
+ "
286.940995
\n",
+ "
287.186512
\n",
+ "
287.453656
\n",
+ "
287.843029
\n",
+ "
288.301938
\n",
+ "
288.778014
\n",
+ "
288.813762
\n",
+ "
288.667253
\n",
+ "
288.796892
\n",
+ "
288.547945
\n",
+ "
288.197632
\n",
+ "
287.882440
\n",
+ "
287.659818
\n",
+ "
289.291587
\n",
+ "
289.911503
\n",
+ "
288.760939
\n",
+ "
288.257644
\n",
+ "
288.271450
\n",
+ "
287.746390
\n",
+ "
288.379399
\n",
+ "
288.504720
\n",
+ "
287.665699
\n",
+ "
288.149861
\n",
+ "
288.266861
\n",
+ "
288.644028
\n",
+ "
288.224829
\n",
+ "
\n",
+ "
\n",
+ "
std
\n",
+ "
2.616922
\n",
+ "
2.832083
\n",
+ "
3.215642
\n",
+ "
3.566019
\n",
+ "
4.401416
\n",
+ "
4.198817
\n",
+ "
5.235795
\n",
+ "
4.444031
\n",
+ "
4.346305
\n",
+ "
3.435444
\n",
+ "
2.735781
\n",
+ "
2.864494
\n",
+ "
2.841268
\n",
+ "
3.080593
\n",
+ "
3.306217
\n",
+ "
2.938165
\n",
+ "
3.018303
\n",
+ "
2.849850
\n",
+ "
2.817690
\n",
+ "
2.600946
\n",
+ "
2.584079
\n",
+ "
3.161855
\n",
+ "
3.171827
\n",
+ "
2.983778
\n",
+ "
3.223380
\n",
+ "
2.918867
\n",
+ "
2.844314
\n",
+ "
3.052635
\n",
+ "
3.077292
\n",
+ "
3.093706
\n",
+ "
3.335983
\n",
+ "
3.296264
\n",
+ "
\n",
+ "
\n",
+ "
min
\n",
+ "
284.295898
\n",
+ "
281.673340
\n",
+ "
280.566406
\n",
+ "
280.509521
\n",
+ "
277.348145
\n",
+ "
279.243164
\n",
+ "
274.955078
\n",
+ "
274.682129
\n",
+ "
275.397461
\n",
+ "
279.498291
\n",
+ "
282.339111
\n",
+ "
282.188721
\n",
+ "
282.470703
\n",
+ "
281.371582
\n",
+ "
280.724609
\n",
+ "
280.093506
\n",
+ "
280.849121
\n",
+ "
281.123535
\n",
+ "
281.952148
\n",
+ "
282.186768
\n",
+ "
284.168945
\n",
+ "
282.519287
\n",
+ "
282.015381
\n",
+ "
280.578857
\n",
+ "
281.183838
\n",
+ "
281.146973
\n",
+ "
281.977539
\n",
+ "
281.014648
\n",
+ "
280.787842
\n",
+ "
281.631348
\n",
+ "
281.349854
\n",
+ "
280.615967
\n",
+ "
\n",
+ "
\n",
+ "
25%
\n",
+ "
288.031494
\n",
+ "
287.739014
\n",
+ "
286.978271
\n",
+ "
285.750488
\n",
+ "
284.326904
\n",
+ "
284.071289
\n",
+ "
281.695068
\n",
+ "
283.710449
\n",
+ "
284.153076
\n",
+ "
285.459717
\n",
+ "
286.141846
\n",
+ "
286.444092
\n",
+ "
286.505859
\n",
+ "
286.104004
\n",
+ "
286.114014
\n",
+ "
286.730225
\n",
+ "
286.005371
\n",
+ "
285.420166
\n",
+ "
285.230713
\n",
+ "
287.408203
\n",
+ "
287.744873
\n",
+ "
286.101318
\n",
+ "
285.243652
\n",
+ "
285.488281
\n",
+ "
285.170166
\n",
+ "
285.876465
\n",
+ "
286.145508
\n",
+ "
285.243164
\n",
+ "
285.579346
\n",
+ "
285.322754
\n",
+ "
285.930908
\n",
+ "
285.565186
\n",
+ "
\n",
+ "
\n",
+ "
50%
\n",
+ "
290.674316
\n",
+ "
290.331543
\n",
+ "
288.916260
\n",
+ "
288.472168
\n",
+ "
289.635742
\n",
+ "
289.390381
\n",
+ "
288.382568
\n",
+ "
287.926758
\n",
+ "
288.173096
\n",
+ "
287.859375
\n",
+ "
287.797852
\n",
+ "
288.716064
\n",
+ "
288.806641
\n",
+ "
288.789307
\n",
+ "
289.210938
\n",
+ "
288.769287
\n",
+ "
288.085205
\n",
+ "
287.698975
\n",
+ "
287.252930
\n",
+ "
289.310547
\n",
+ "
289.878418
\n",
+ "
288.511719
\n",
+ "
288.420166
\n",
+ "
288.263916
\n",
+ "
287.717041
\n",
+ "
288.661621
\n",
+ "
288.999023
\n",
+ "
287.485107
\n",
+ "
288.326416
\n",
+ "
288.429199
\n",
+ "
288.576416
\n",
+ "
288.093018
\n",
+ "
\n",
+ "
\n",
+ "
75%
\n",
+ "
292.828369
\n",
+ "
292.707764
\n",
+ "
291.609375
\n",
+ "
291.655762
\n",
+ "
291.987305
\n",
+ "
291.845459
\n",
+ "
291.671631
\n",
+ "
291.051758
\n",
+ "
291.288574
\n",
+ "
291.000244
\n",
+ "
290.813721
\n",
+ "
291.365967
\n",
+ "
291.540039
\n",
+ "
291.393799
\n",
+ "
291.756592
\n",
+ "
291.094727
\n",
+ "
290.893311
\n",
+ "
290.266602
\n",
+ "
290.166748
\n",
+ "
291.649902
\n",
+ "
291.970459
\n",
+ "
291.342285
\n",
+ "
290.443848
\n",
+ "
290.660400
\n",
+ "
290.400146
\n",
+ "
290.360840
\n",
+ "
290.854004
\n",
+ "
290.328125
\n",
+ "
290.827881
\n",
+ "
290.999268
\n",
+ "
291.598877
\n",
+ "
291.072754
\n",
+ "
\n",
+ "
\n",
+ "
max
\n",
+ "
296.467285
\n",
+ "
295.717529
\n",
+ "
295.837158
\n",
+ "
295.693604
\n",
+ "
295.723389
\n",
+ "
296.195557
\n",
+ "
295.589600
\n",
+ "
295.345703
\n",
+ "
294.754639
\n",
+ "
294.483154
\n",
+ "
294.952148
\n",
+ "
294.815430
\n",
+ "
294.623779
\n",
+ "
295.088135
\n",
+ "
295.036621
\n",
+ "
294.847900
\n",
+ "
294.224609
\n",
+ "
294.522949
\n",
+ "
294.728760
\n",
+ "
295.268066
\n",
+ "
295.507324
\n",
+ "
295.797363
\n",
+ "
296.297119
\n",
+ "
296.222900
\n",
+ "
295.492432
\n",
+ "
295.406006
\n",
+ "
294.629883
\n",
+ "
295.211670
\n",
+ "
295.363037
\n",
+ "
295.263184
\n",
+ "
295.446533
\n",
+ "
295.408691
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
"text/plain": [
- "2"
+ " day_01 day_02 day_03 day_04 day_05 day_06 ... day_27 day_28 day_29 day_30 day_31 day_32\n",
+ "count 2701.000000 2701.000000 2701.000000 2701.000000 2701.000000 2701.000000 ... 2701.000000 2701.000000 2701.000000 2701.000000 2701.000000 2701.000000\n",
+ "mean 290.493048 290.145274 288.953153 288.503714 288.439820 288.304426 ... 288.504720 287.665699 288.149861 288.266861 288.644028 288.224829\n",
+ "std 2.616922 2.832083 3.215642 3.566019 4.401416 4.198817 ... 2.844314 3.052635 3.077292 3.093706 3.335983 3.296264\n",
+ "min 284.295898 281.673340 280.566406 280.509521 277.348145 279.243164 ... 281.977539 281.014648 280.787842 281.631348 281.349854 280.615967\n",
+ "25% 288.031494 287.739014 286.978271 285.750488 284.326904 284.071289 ... 286.145508 285.243164 285.579346 285.322754 285.930908 285.565186\n",
+ "50% 290.674316 290.331543 288.916260 288.472168 289.635742 289.390381 ... 288.999023 287.485107 288.326416 288.429199 288.576416 288.093018\n",
+ "75% 292.828369 292.707764 291.609375 291.655762 291.987305 291.845459 ... 290.854004 290.328125 290.827881 290.999268 291.598877 291.072754\n",
+ "max 296.467285 295.717529 295.837158 295.693604 295.723389 296.195557 ... 294.629883 295.211670 295.363037 295.263184 295.446533 295.408691\n",
+ "\n",
+ "[8 rows x 32 columns]"
]
},
"execution_count": null,
@@ -216,17 +1070,15 @@
}
],
"source": [
- "1+1"
+ "#| exec_doc: #\n",
+ "import pandas as pd\n",
+ "import geopandas as gpd\n",
+ "from era5_sandbox.config import data_catalog\n",
+ "\n",
+ "ex_agg_path = data_catalog['aggregate']['outputs']['2019_08_madagascar_night_d2m_max.parquet'].load()\n",
+ "\n",
+ "gpd.read_parquet(ex_agg_path).describe()"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "language": "python"
- },
- "outputs": [],
- "source": []
}
],
"metadata": {
@@ -237,5 +1089,5 @@
}
},
"nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
}
diff --git a/_proc/index_files/figure-html/cell-4-output-1.png b/_proc/index_files/figure-html/cell-4-output-1.png
new file mode 100644
index 0000000..6044c3d
Binary files /dev/null and b/_proc/index_files/figure-html/cell-4-output-1.png differ
diff --git a/_proc/sidebar.yml b/_proc/sidebar.yml
index c80eda4..caf3166 100644
--- a/_proc/sidebar.yml
+++ b/_proc/sidebar.yml
@@ -2,5 +2,15 @@ website:
sidebar:
contents:
- index.ipynb
+ - section: "Snakemake Modules"
- 00_core.ipynb
- 01_download_raw_data.ipynb
+ - 02_aggregate.ipynb
+ - 03_publish.ipynb
+ - section: "PyTask Modules"
+ - 20_pytask_config.ipynb
+ - 20_pytask_logger.ipynb
+ - 21_pytask_download.ipynb
+ - 22_pytask_aggregate.ipynb
+ - section: "PyTask Demo"
+ - 10_pytask_demo.ipynb
diff --git a/_proc/sidebar.yml.bak b/_proc/sidebar.yml.bak
new file mode 100644
index 0000000..caf3166
--- /dev/null
+++ b/_proc/sidebar.yml.bak
@@ -0,0 +1,16 @@
+website:
+ sidebar:
+ contents:
+ - index.ipynb
+ - section: "Snakemake Modules"
+ - 00_core.ipynb
+ - 01_download_raw_data.ipynb
+ - 02_aggregate.ipynb
+ - 03_publish.ipynb
+ - section: "PyTask Modules"
+ - 20_pytask_config.ipynb
+ - 20_pytask_logger.ipynb
+ - 21_pytask_download.ipynb
+ - 22_pytask_aggregate.ipynb
+ - section: "PyTask Demo"
+ - 10_pytask_demo.ipynb
diff --git a/dag.pdf b/dag.pdf
new file mode 100644
index 0000000..669b76b
Binary files /dev/null and b/dag.pdf differ
diff --git a/data b/data
new file mode 120000
index 0000000..b5d477b
--- /dev/null
+++ b/data
@@ -0,0 +1 @@
+/n/holylabs/LABS/cgolden_lab/Lab/projects/era5_database/era5_sandbox/data
\ No newline at end of file
diff --git a/environment.yml b/environment.yml
index a77c4df..19aabbb 100644
--- a/environment.yml
+++ b/environment.yml
@@ -435,4 +435,5 @@ dependencies:
- types-setuptools==76.0.0.20250313
- uritemplate==4.1.1
- watchdog==6.0.0
+ - -e .
prefix: /n/home03/ttapera/.conda/envs/era5_sandbox
diff --git a/index_files/figure-commonmark/cell-4-output-1.png b/index_files/figure-commonmark/cell-4-output-1.png
new file mode 100644
index 0000000..6044c3d
Binary files /dev/null and b/index_files/figure-commonmark/cell-4-output-1.png differ
diff --git a/nbdev_prepare.sh b/nbdev_prepare.sh
new file mode 100644
index 0000000..aa72626
--- /dev/null
+++ b/nbdev_prepare.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+#
+#SBATCH -p test # partition (queue)
+#SBATCH -c 2 # number of cores
+#SBATCH --mem 100GB # memory
+#SBATCH -t 0-02:00 # time (D-HH:MM)
+
+# build docs
+
+nbdev_prepare
\ No newline at end of file
diff --git a/notes/00_core.ipynb b/notes/00_core.ipynb
index 8e624fe..febefc2 100644
--- a/notes/00_core.ipynb
+++ b/notes/00_core.ipynb
@@ -4,38 +4,45 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# core\n",
+ "---\n",
+ "title: \"Core Module: Internal functions and testing\"\n",
+ "exec_all: true\n",
+ "---\n",
"\n",
- "> This is a core library for the ERA5 dataset pipeline. It defines\n",
- "a few helpful functions such as an API tester to test your API key and connection."
+ "## core\n",
+ "\n",
+ "> This is a core library for the ERA5 dataset pipeline. It defines a few helpful functions such as an API tester to test your API key and connection."
]
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#| default_exp core"
+ "#| default_exp core:\n",
+ "#"
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#| hide\n",
+ "#| hide:\n",
+ "#\n",
"from nbdev.showdoc import *"
]
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#| export\n",
+ "#| exports:\n",
+ "#\n",
"import os\n",
"import cdsapi\n",
"import hydra\n",
@@ -50,7 +57,7 @@
"from pydrive2.drive import GoogleDrive\n",
"from omegaconf import DictConfig, OmegaConf\n",
"from pyprojroot import here\n",
- "from importlib import import_module\n"
+ "from importlib import import_module"
]
},
{
@@ -64,11 +71,12 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#| export\n",
+ "#| exports:\n",
+ "#\n",
"def describe(\n",
" cfg: DictConfig=None, # Configuration file\n",
" )-> None:\n",
@@ -84,11 +92,12 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#| exporti\n",
+ "#| exporti:\n",
+ "#\n",
"def _expand_path(\n",
" path: str # Path on user's machine\n",
" )-> str: # Expanded path\n",
@@ -105,11 +114,12 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#| exporti\n",
+ "#| exporti:\n",
+ "#\n",
"def _get_callable(func_path):\n",
" \"\"\"Dynamically import a callable from a string path.\"\"\"\n",
" module_name, func_name = func_path.rsplit(\".\", 1)\n",
@@ -119,22 +129,18 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#| exporti\n",
- "\n",
+ "#| exporti:\n",
+ "# a directory structure creator\n",
"def _create_directory_structure(\n",
" base_path: str, # The base directory where the structure will be created\n",
" structure: dict # A dictionary representing the directory structure\n",
" )->None:\n",
" \"\"\"\n",
" Recursively creates a directory structure from a dictionary.\n",
- "\n",
- " Args:\n",
- " base_path (str): The base directory where the structure will be created.\n",
- " structure (dict): A dictionary representing the directory structure.\n",
" \"\"\"\n",
" for folder, substructure in structure.items():\n",
" # Create the current directory\n",
@@ -146,23 +152,28 @@
" _create_directory_structure(current_path, substructure)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In addition, we've defined 3 private functions to help with path expansion `_expand_path`, dynamic function importing `_get_callable`, and directory structure creation `_create_directory_structure`.\n",
+ "\n",
+ "### A Simple Temperature Conversion Function"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#| export\n",
- "\n",
- "def kelvin_to_celsius(kelvin):\n",
+ "#| export:\n",
+ "#\n",
+ "def kelvin_to_celsius(\n",
+ " kelvin: float # Temperature in Kelvin\n",
+ " ) -> float: # Temperature in Celsius\n",
" \"\"\"\n",
" Convert temperature from Kelvin to Celsius.\n",
- " \n",
- " Args:\n",
- " kelvin (float): Temperature in Kelvin.\n",
- " \n",
- " Returns:\n",
- " float: Temperature in Celsius.\n",
" \"\"\"\n",
" return kelvin - 273.15"
]
@@ -173,17 +184,24 @@
"source": [
"### A Class for Authenticating Google Drive\n",
"\n",
- "We're going to use a class to authenticate and interact with google drive. The goal is to have a simple interface to fetch the healthshed files dynamically from google drive in the pipeline."
+ "We're going to use a class to authenticate and interact with google drive. The goal is to have a simple interface to fetch the healthshed files dynamically from google drive in the pipeline.\n",
+ "\n",
+ "::: {.callout-important}\n",
+ "This class was implemented when all of our data\n",
+ "was stored on a private Google Drive. Since we\n",
+ "have moved all of our data to FASRC, this will\n",
+ "likely be deprecated in the near future.\n",
+ ":::"
]
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#| export\n",
- "\n",
+ "#| export:\n",
+ "#\n",
"class GoogleDriver:\n",
" \"\"\"\n",
" A class to handle Google Drive authentication and file management.\n",
@@ -225,25 +243,48 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from hydra import initialize, compose\n",
- "from omegaconf import OmegaConf\n",
- "\n",
+ "from omegaconf import OmegaConf"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
"# unfortunately, we have to use the initialize function to load the config file\n",
"# this is because the @hydra decorator does not work with Notebooks very well\n",
"# this is a known issue with Hydra: https://gist.github.com/bdsaglam/586704a98336a0cf0a65a6e7c247d248\n",
"# \n",
"# just use the relative path from the notebook to the config dir\n",
- "with initialize(version_base=None, config_path=\"../conf\"):\n",
- " cfg = compose(config_name='config.yaml')"
+ "try:\n",
+ " with initialize(version_base=None, config_path=\"../conf\"):\n",
+ " cfg = compose(config_name='config.yaml')\n",
+ "except Exception as e:\n",
+ " print(f\"Error initializing Hydra: {e}\")\n",
+ " with initialize(version_base=None, config_path=\"conf\"):\n",
+ " cfg = compose(config_name='config.yaml')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "::: {.callout-important}\n",
+ "If we continue with `pytask`, we will not need to\n",
+ "use hydra at all, and so the above strategy\n",
+ "may get deprecated.\n",
+ ":::"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -260,17 +301,9 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "healthsheds2022.zip - application/zip\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# we're using the madagascar healthshed folder as an example\n",
"folder_id = cfg.geographies.madagascar.healthsheds\n",
@@ -290,7 +323,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -303,9 +336,7 @@
" file_obj.GetContentFile(zip_path)\n",
"\n",
" # Read shapefile directly from ZIP\n",
- " gdf = gpd.read_file(f\"zip://{zip_path}\")\n",
- "\n",
- "\n"
+ " gdf = gpd.read_file(f\"zip://{zip_path}\")"
]
},
{
@@ -317,22 +348,23 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#| export\n",
+ "#| export:\n",
+ "#\n",
"from fastcore.basics import patch"
]
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#| export\n",
- "\n",
+ "#| export:\n",
+ "#\n",
"@patch\n",
"def read_healthsheds(self:GoogleDriver, healthshed_zip_name):\n",
"\n",
@@ -365,123 +397,9 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
fs_pop
\n",
- "
n_uid
\n",
- "
n_instat
\n",
- "
n_comp
\n",
- "
n_shape
\n",
- "
\n",
- " \n",
- " \n",
- "
\n",
- "
count
\n",
- "
2766.000000
\n",
- "
2766.000000
\n",
- "
2766.000000
\n",
- "
2766.000000
\n",
- "
2766.000000
\n",
- "
\n",
- "
\n",
- "
mean
\n",
- "
10493.058930
\n",
- "
7.480116
\n",
- "
6.318149
\n",
- "
1.010484
\n",
- "
1.036515
\n",
- "
\n",
- "
\n",
- "
std
\n",
- "
12127.817529
\n",
- "
7.263235
\n",
- "
4.939271
\n",
- "
0.112019
\n",
- "
0.393120
\n",
- "
\n",
- "
\n",
- "
min
\n",
- "
0.000000
\n",
- "
1.000000
\n",
- "
1.000000
\n",
- "
1.000000
\n",
- "
1.000000
\n",
- "
\n",
- "
\n",
- "
25%
\n",
- "
4344.750000
\n",
- "
4.000000
\n",
- "
3.000000
\n",
- "
1.000000
\n",
- "
1.000000
\n",
- "
\n",
- "
\n",
- "
50%
\n",
- "
7417.000000
\n",
- "
6.000000
\n",
- "
5.000000
\n",
- "
1.000000
\n",
- "
1.000000
\n",
- "
\n",
- "
\n",
- "
75%
\n",
- "
12531.250000
\n",
- "
9.000000
\n",
- "
8.000000
\n",
- "
1.000000
\n",
- "
1.000000
\n",
- "
\n",
- "
\n",
- "
max
\n",
- "
194782.000000
\n",
- "
104.000000
\n",
- "
62.000000
\n",
- "
3.000000
\n",
- "
15.000000
\n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " fs_pop n_uid n_instat n_comp n_shape\n",
- "count 2766.000000 2766.000000 2766.000000 2766.000000 2766.000000\n",
- "mean 10493.058930 7.480116 6.318149 1.010484 1.036515\n",
- "std 12127.817529 7.263235 4.939271 0.112019 0.393120\n",
- "min 0.000000 1.000000 1.000000 1.000000 1.000000\n",
- "25% 4344.750000 4.000000 3.000000 1.000000 1.000000\n",
- "50% 7417.000000 6.000000 5.000000 1.000000 1.000000\n",
- "75% 12531.250000 9.000000 8.000000 1.000000 1.000000\n",
- "max 194782.000000 104.000000 62.000000 3.000000 15.000000"
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"driver = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)\n",
"drive = driver.get_drive()\n",
@@ -496,6 +414,10 @@
"source": [
"## CDS File Handler Type\n",
"\n",
+ "::: {.callout-important}\n",
+ "This section may also be deprecated. Since adding `swvl1` to the pipeline, we have not needed to use this class. We leave it here for now for reference.\n",
+ ":::\n",
+ "\n",
"We're going to make a file handler type to help deal with CDS files. This is to fix [NSAPH-Data-Processing/era5_sandbox#13](https://github.com/NSAPH-Data-Processing/era5_sandbox/issues/13). \n",
"\n",
"Usually, when you download data, it comes out as a simple .nc file that can be opened with xarray. However, the CDS API has a few different file types that are not .nc files. For example, the ERA5 data is stored in a .grib file format. This is a common format for meteorological data, and it is used by the ECMWF. When a query has multiple variables, sometimes they are downloaded as a .zip file to separat the grib from the netcdf.\n",
@@ -505,12 +427,12 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#| export\n",
- "\n",
+ "#| export:\n",
+ "#\n",
"class ClimateDataFileHandler:\n",
" \"\"\"\n",
" A class to handle file operations for the Climate Data Store (CDS).\n",
@@ -609,9 +531,16 @@
"outputs": [],
"source": [
"import xarray as xr\n",
- "from fastcore.test import test_fail\n",
- "\n",
- "eg_file = here() / \"data/input/madagascar_2023_10.nc\"\n",
+ "from fastcore.test import test_fail"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "eg_file = here() / \"bld/2019_5_madagascar.nc\"\n",
"\n",
"# this fails because the nc file downloaded has grib and netcdf in it, so\n",
"# xr cannot handle it.\n",
@@ -636,911 +565,46 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"handler = ClimateDataFileHandler(eg_file)\n",
"handler.prepare()\n",
"ds1 = xr.open_dataset(handler.get_dataset(\"instant\"))\n",
- "ds2 = xr.open_dataset(handler.get_dataset(\"accum\"))"
+ "#ds2 = xr.open_dataset(handler.get_dataset(\"accum\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "::: {.callout-important}\n",
+ "The above line for `ds2` is commented out because the example file does not separate accumulation data. \n",
+ ":::"
]
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
+ "#| eval: false\n",
"parquet_file.plot(column=\"day_22_daily_mean\", legend=True)"
]
},
@@ -2920,7 +798,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#| export\n",
+ "#| exports:\n",
+ "#\n",
"@hydra.main(version_base=None, config_path=\"../../conf\", config_name=\"config\")\n",
"def main(cfg: DictConfig) -> None:\n",
" # Parse command-line arguments\n",
@@ -2947,7 +826,7 @@
"metadata": {},
"outputs": [],
"source": [
- "#| export\n",
+ "#| export:\n",
"#| eval: false\n",
"try: from nbdev.imports import IN_NOTEBOOK\n",
"except: IN_NOTEBOOK=False\n",
@@ -2962,30 +841,19 @@
"metadata": {},
"outputs": [],
"source": [
- "#| hide\n",
+ "#| hide: \n",
+ "#\n",
"import nbdev; nbdev.nbdev_export()"
]
}
],
"metadata": {
"kernelspec": {
- "display_name": "era5_sandbox",
+ "display_name": "python3",
"language": "python",
"name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.11"
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 5
}
diff --git a/notes/03_publish.ipynb b/notes/03_publish.ipynb
new file mode 100644
index 0000000..a9d3461
--- /dev/null
+++ b/notes/03_publish.ipynb
@@ -0,0 +1,719 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "title: \"Publish: Gather the Aggregated Data and Publish to DataVerse\"\n",
+ "engine: jupyter\n",
+ "---\n",
+ "\n",
+ "## publish \n",
+ "\n",
+ "> This is the `publish` module for the ERA5 dataset pipeline. It defines a functions that make use of the `pyDataverse` library and API to publish our outputs to the Harvard Dataverse."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| default_exp publish:\n",
+ "#"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide:\n",
+ "#\n",
+ "from nbdev.showdoc import *"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "First, we'll test out the API by pinging the Harvard DataVerse"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "import hydra\n",
+ "import yaml\n",
+ "import json\n",
+ "from tqdm import tqdm\n",
+ "from pyprojroot import here"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "api_token_file = here() / \"sandbox/dataverse_api_key.yml\"\n",
+ "with open(api_token_file, \"r\") as f:\n",
+ " config = yaml.load(f, Loader=yaml.BaseLoader)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now, following the [docs]() for the dataverse tutorial, load a NativeAPI up:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "from pyDataverse.api import NativeApi"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The NativeAPI is a catchall API object to be able to do general stuff:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "api = NativeApi(config['base_url'], config['api_token'])\n",
+ "resp=api.get_info_version()\n",
+ "#resp.text()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "resp.json()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Looks good! Now that we know that it works, we can think more\n",
+ "about how to publish data there.\n",
+ "\n",
+ "## Harvard Dataverse\n",
+ "\n",
+ "Let's create a dummy dataset with the components we're\n",
+ "planning to upload, and then upload and promptly delete it.\n",
+ "\n",
+ "To do that, we must import the `models` module and create a Dataset object:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pyDataverse.models import Dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds = Dataset()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This `ds` object is pretty straightforward since it doesn't contain anything yet:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.get()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can populate the object from the dummy data on the github repo:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pyDataverse.utils import read_file\n",
+ "from urllib.request import urlretrieve\n",
+ "import tempfile"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# url for dummy data\n",
+ "url = \"https://raw.githubusercontent.com/gdcc/pyDataverse/refs/heads/main/tests/data/user-guide/dataset.json\"\n",
+ "\n",
+ "\n",
+ "with tempfile.NamedTemporaryFile(mode='w+') as tmp:\n",
+ " urlretrieve(url, tmp.name)\n",
+ " ds.from_json(read_file(tmp.name))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We have to validate the JSON correctly:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.validate_json()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Modifying it is easy:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds.set({\"title\": \"Youth from Austria 2005\"})\n",
+ "ds.get()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now, to create the dataset we use the API:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| eval: false\n",
+ "# this is only run in interactive sessions for demo purposes\n",
+ "resp = api.create_dataset(\":root\", ds.json())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If you caught the `resp` object, it contains the PID for the newly created dataset.\n",
+ "\n",
+ "However, if you didn't you can use the SearchAPI to find it:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "from pyDataverse.api import SearchApi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "search_api = SearchApi(config['base_url'], config['api_token'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| eval: false\n",
+ "#\n",
+ "\n",
+ "resp = search_api.search(\"Youth from Austria\", data_type=\"dataset\")\n",
+ "results = resp.json()['data']['items']\n",
+ "result = [x for x in results if \"Youth from Austria\" in x['name']][0]\n",
+ "result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| eval: false\n",
+ "pid = result['global_id']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now to look at the data we created using the NativeAPI again, and delete the dataset:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| eval: false\n",
+ "\n",
+ "uploaded_ds = api.get_dataset(pid)\n",
+ "uploaded_ds.json()['data']\n",
+ "\n",
+ "resp = api.delete_dataset(pid)\n",
+ "resp.json()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "With that understanding, we can develop a quick module to do the following:\n",
+ "\n",
+ "1. Make the dataset LEGO Compatible\n",
+ "2. Upload and publish the data to dataverse\n",
+ "\n",
+ "## LEGO Compatibility\n",
+ "\n",
+ "Let's take an example file to use as a model for LEGO compatibility"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "import geopandas as gpd\n",
+ "import pandas as pd\n",
+ "import re\n",
+ "import glob"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ex = gpd.read_parquet(here() / \"bld/2009_06_madagascar_day_swvl1_mean.parquet\")\n",
+ "ex.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We know that the LEGO data model should look like this:\n",
+ "\n",
+ "```\n",
+ "/lego\n",
+ "├── \n",
+ "│ ├── __\n",
+ "│ │ ├── __\n",
+ "│ │ │ ├── _yyyy.parquet\n",
+ "```\n",
+ "\n",
+ "So, for the above file, we'll end up with the LEGO path `data/environmental/exposures_era5/healthshed_monthly/dewpoint_2024.parquet`. In it, we should have the following columns:\n",
+ "\n",
+ "\n",
+ "```\n",
+ "healthshed_id year month day stat_1 stat_2 ... stat_n \n",
+ "```\n",
+ "\n",
+ "\n",
+ "This means we should read in all of the exposures for a single timepoint at once. \n",
+ "I think the smart thing to do is use a glob string to gather all of the pertinent files.\n",
+ "This will be the first function we export to the library:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "# \n",
+ "\n",
+ "def gather_exposure_geodataframes(\n",
+ " glob_string: str, # string for the path to search for the pertinent files\n",
+ " polygon_id: str, # the string signifying the healthshed ID of the polygon\n",
+ " exposure: str # the exposure name\n",
+ " )-> list:\n",
+ " \"Read in a list of geo dataframes from the same time frame and merge them\"\n",
+ "\n",
+ " # first get the initial one so we have the polygon ID and geometry\n",
+ " frames = glob.glob(str(glob_string))\n",
+ " initial_gdf=gpd.read_parquet(frames[0])\n",
+ " merged_df = []\n",
+ " \n",
+ " for f in tqdm(frames, desc=\"Processing files\"):\n",
+ " # read in as a regular dataframe by ignoring geometry\n",
+ " df = gpd.read_parquet(f).drop([\"geometry\"], axis=1) \n",
+ " \n",
+ " # get the year and month\n",
+ " # Extract year and month\n",
+ " search_str = rf'_{exposure}_(\\d{{4}})_(\\d{{1,2}})\\.parquet$'\n",
+ " match = re.search(search_str, f)\n",
+ "\n",
+ " if match:\n",
+ " year = int(match.group(1))\n",
+ " month = int(match.group(2))\n",
+ " #print(f\"Year: {year}, Month: {month}\")\n",
+ " else:\n",
+ " raise ValueError(f\"Could not extract year and month from filename: {search_str} {f}\")\n",
+ " \n",
+ " df['exposure'] = exposure\n",
+ " df['month'] = month\n",
+ " df['year'] = year\n",
+ "\n",
+ " # Step 1: Melt all day columns (leave 'month' and 'year' as identifiers)\n",
+ " df_long = df.melt(id_vars=[polygon_id, \"exposure\", \"year\", \"month\"], var_name=\"day_stat\", value_name=\"value\")\n",
+ "\n",
+ " # Step 2: Extract day and stat type from column names\n",
+ " # Example column: \"day_01_daily_mean\"\n",
+ " df_long[[\"day\", \"stat\"]] = df_long[\"day_stat\"].str.extract(r\"day_(\\d{2})_daily_(mean|max|min|total)\")\n",
+ "\n",
+ " # Optional: convert 'day' and month to integer\n",
+ " df_long[\"day\"] = df_long[\"day\"].astype(int)\n",
+ " df_long[\"month\"] = df_long[\"month\"].astype(int)\n",
+ "\n",
+ " # Drop the original combined column\n",
+ " df_long = df_long.drop(columns=\"day_stat\")\n",
+ "\n",
+ " # Reorder columns\n",
+ " df_long = df_long[[polygon_id, \"exposure\", \"year\", \"month\", \"day\", \"stat\", \"value\"]]\n",
+ "\n",
+ " df_long = df_long.sort_values(by=[\"year\", \"month\", \"day\"])\n",
+ " df_clean = df_long.pivot(index=[polygon_id, \"exposure\", \"year\", \"month\", \"day\"], columns=\"stat\", values=\"value\").reset_index()\n",
+ " merged_df.append(df_clean)\n",
+ "\n",
+ " return [pd.concat(merged_df).reset_index(drop=True), initial_gdf[[polygon_id, \"geometry\"]]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "frames = here() / \"data\" / \"testing\" / \"*madagascar*\"\n",
+ "\n",
+ "merged = gather_exposure_geodataframes(frames, \"fs_uid\", \"2m_dewpoint_temperature\")\n",
+ "merged[0].describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This returns one file with all of the geometries and one file\n",
+ "with the statistics and exposures.\n",
+ "\n",
+ "Now, with this, we can move on. The dataset was created in the UI and is available via search and test out how to upload it:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "resp = search_api.search(\"ERA5\", data_type=\"dataset\")\n",
+ "\n",
+ "results = resp.json()['data']['items']\n",
+ "\n",
+ "result = [x for x in results if \"ERA5\" in x['name']][0]\n",
+ "era5_pid = result['global_id']\n",
+ "result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "\n",
+ "from pyDataverse.models import Datafile\n",
+ "import os\n",
+ "import pathlib"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We'll upload directly from file. In the case of ERA5 vs. LEGO, we\n",
+ "store the file on disk as LEGO hierarchy, but to upload it to dataverse\n",
+ "using a flat filename (since creating subdatasets to represent directories might be \n",
+ "a bit of a hassle)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# assuming the file has a path on disk like:\n",
+ "f_out = \"environmental/exposures_era5/healthshed_daily/dewpoint_2024.parquet\"\n",
+ "os.makedirs(here() / \"data\" / \"testing\" / os.path.dirname(f_out), exist_ok=True)\n",
+ "aggregations, geo = merged\n",
+ "aggregations.to_parquet(here() / \"data\" / \"testing\" / f_out, index=False)\n",
+ "\n",
+ "datafile = Datafile()\n",
+ "datafile.set({\n",
+ " # the id of the era5 dataset \n",
+ " \"pid\": era5_pid,\n",
+ " # the path to the file on disk goes here\n",
+ " \"filename\": str(here() / \"data\" / \"testing\" / f_out),\n",
+ " # use the \"label\" to name the file\n",
+ " \"label\": f_out.replace(\"/\", \"-\")\n",
+ "})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| eval: false\n",
+ "resp = api.upload_datafile(era5_pid, str(here() / \"data\" / \"testing\" / f_out), datafile.json())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Pretty simple!\n",
+ "\n",
+ "Now, we just need a main function to upload this data. The final upload is one file per\n",
+ "exposure per year, so these should be the variables we gather data for.\n",
+ "\n",
+ "We should get some functionality to gather the groups of these files automatically, based on\n",
+ "the hydra config:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "from hydra import initialize, compose\n",
+ "from omegaconf import OmegaConf, DictConfig\n",
+ "from tqdm import tqdm"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "target_dir = here() / \"data\" / \"intermediate\"\n",
+ "\n",
+ "try:\n",
+ " with initialize(version_base=None, config_path=\"../conf\"):\n",
+ " cfg = compose(config_name='config.yaml')\n",
+ "except Exception as e:\n",
+ " print(f\"Error initializing Hydra: {e}\")\n",
+ " with initialize(version_base=None, config_path=\"conf\"):\n",
+ " cfg = compose(config_name='config.yaml')\n",
+ "\n",
+ "cfg.development_mode = False\n",
+ "#cfg.query['year'] = 2017\n",
+ "#cfg.query['month'] = 11\n",
+ "#cfg.query['geography'] = \"nepal\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "\n",
+ "@hydra.main(version_base=None, config_path=\"../../conf\", config_name=\"config\")\n",
+ "def main(cfg: DictConfig) -> None:\n",
+ "\n",
+ " variables_dict = {\n",
+ " \"2m_temperature\": \"t2m\",\n",
+ " \"2m_dewpoint_temperature\": \"d2m\",\n",
+ " \"volumetric_soil_water_layer_1\": \"swvl1\",\n",
+ " \"total_precipitation\": \"tp\"\n",
+ " }\n",
+ "\n",
+ " print(OmegaConf.to_yaml(cfg))\n",
+ "\n",
+ " #prep dataverse\n",
+ " api_token_file = here() / \"sandbox/dataverse_api_key.yml\"\n",
+ " with open(api_token_file, \"r\") as f:\n",
+ " apiconfig = yaml.load(f, Loader=yaml.BaseLoader)\n",
+ " api = NativeApi(apiconfig['base_url'], apiconfig['api_token'])\n",
+ " search_api = SearchApi(apiconfig['base_url'], apiconfig['api_token'])\n",
+ " resp = search_api.search(\"ERA5\", data_type=\"dataset\")\n",
+ "\n",
+ " results = resp.json()['data']['items']\n",
+ "\n",
+ " result = [x for x in results if \"ERA5\" in x['name']][0]\n",
+ " era5_pid = result['global_id']\n",
+ "\n",
+ " for geography in cfg.geographies:\n",
+ " for year in cfg.query['year']:\n",
+ " for variable, v in variables_dict.items():\n",
+ " \n",
+ " print(f\"Processing {geography} for {variable} in {year}\")\n",
+ " glob_string = here() / \"data\" / \"intermediate\" / f\"*{geography}*{variable}*{year}*\"\n",
+ " print(f\"Glob: {glob_string}\")\n",
+ " polygon_id = cfg.geographies[geography]['unique_id']\n",
+ " print(f\"polygon_id: {polygon_id}\")\n",
+ " merged = gather_exposure_geodataframes(glob_string, polygon_id, variable)\n",
+ " print(merged[0].head())\n",
+ " print(merged[1].head())\n",
+ "\n",
+ " output_dir = here() / \"data\" / \"output\" \n",
+ " \n",
+ " f_out = f\"environmental/exposures_era5/healthshed_daily/{geography}_{v}_{year}.parquet\"\n",
+ " os.makedirs(output_dir / os.path.dirname(f_out), exist_ok=True)\n",
+ " output_path = output_dir / f_out\n",
+ "\n",
+ " print(f\"Writing to {output_path}\")\n",
+ " merged[0].to_parquet(output_path, index=False)\n",
+ " \n",
+ "\n",
+ " print(f\"Uploading {f_out.replace('/', '-')} to Dataverse...\")\n",
+ " # upload to dataverse\n",
+ " datafile = Datafile()\n",
+ " datafile.set({\n",
+ " \"pid\": era5_pid,\n",
+ " \"filename\": str(output_path),\n",
+ " \"label\": f_out.replace(\"/\", \"-\")\n",
+ " })\n",
+ "\n",
+ " resp = api.upload_datafile(era5_pid, output_path, datafile.json())\n",
+ " assert resp.json()['status'] == \"OK\", f\"Failed to upload datafile: {resp.text}\"\n",
+ " \n",
+ " # also save the geometry for the region \n",
+ " merged[1].to_parquet(output_path.parent / f\"{geography}_geometry.parquet\", index=False)\n",
+ "\n",
+ " # and upload it to dataverse\n",
+ " datafile = Datafile()\n",
+ " datafile.set({\n",
+ " \"pid\": era5_pid,\n",
+ " \"filename\": str(output_path.parent / f\"{geography}_geometry.parquet\"),\n",
+ " \"label\": f\"{geography}_geometry.parquet\"\n",
+ " })\n",
+ "\n",
+ " resp = api.upload_datafile(era5_pid, output_path.parent / f\"{geography}_geometry.parquet\", datafile.json())\n",
+ " assert resp.json()['status'] == \"OK\", f\"Failed to upload geometry datafile: {resp.text}\"\n",
+ "\n",
+ " print(\"All files processed and uploaded successfully.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export:\n",
+ "#| eval: false\n",
+ "try: from nbdev.imports import IN_NOTEBOOK\n",
+ "except: IN_NOTEBOOK=False\n",
+ "\n",
+ "if __name__ == \"__main__\" and not IN_NOTEBOOK:\n",
+ " main()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide:\n",
+ "#\n",
+ "import nbdev; nbdev.nbdev_export()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "python3",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notes/10_pytask_demo.ipynb b/notes/10_pytask_demo.ipynb
new file mode 100644
index 0000000..cdd3c73
--- /dev/null
+++ b/notes/10_pytask_demo.ipynb
@@ -0,0 +1,412 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "title: \"Demo: How to Create Pipelines with `pytask`\"\n",
+ "engine: jupyter\n",
+ "---\n",
+ "\n",
+ "## Data Preparation Demo\n",
+ "\n",
+ "> Data preparation task for `pytask` demo\n",
+ "\n",
+ "In this notebook, we are demonstrating how to convert our snakemake workflow into a `pytask` workflow. We use the basic tutorial to demonstrate this, but continue\n",
+ "to use nbdev for development of functions in notebooks.\n",
+ "\n",
+ "`pytask` is a task management system that allows you to define tasks and their dependencies, similar to `Snakemake`. It is particularly useful for data science workflows.\n",
+ "\n",
+ "There are a number of reasons to use `pytask` over `snakemake`:\n",
+ "- **Pythonic**: `pytask` is designed to be purely Pythonic by default, allowing you to write tasks and entire pipelines as Python functions.\n",
+ "- **Flexibility**: `pytask` allows you to define tasks and their dependencies in a more flexible way, using Python functions and decorators, as opposed to orchestrating numerous scripts.\n",
+ "- **Integration**: `pytask` integrates well with other Python libraries, such as `nbdev` here, or `hydra` configurations if you need, allowing you to use your existing code, notebooks, or configs in your workflows.\n",
+ "- **Parallelism**: `pytask` supports parallel execution of tasks with `pytask-parallel`, which can speed up your workflows significantly, especially for data processing tasks.\n",
+ "\n",
+ "We'll use nbdev to define the task functions, and then export them to the `src` directory. `pytask` is then invoked at the command line to run the tasks."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| default_exp task_data_preparation:\n",
+ "#"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This demo task is taken from the tutorial at [pytask documentation](https://pytask-dev.readthedocs.io/en/stable/tutorials/write_a_task.html). At minimum, you need your package to contain the following in a config.py file:\n",
+ "\n",
+ "```md\n",
+ "my_project\n",
+ "│\n",
+ "├───.pytask\n",
+ "│\n",
+ "├───bld\n",
+ "│ └────...\n",
+ "│\n",
+ "├───src\n",
+ "│ └───my_project\n",
+ "│ ├────__init__.py\n",
+ "│ ├────config.py\n",
+ "│ └────...\n",
+ "│\n",
+ "└───pyproject.toml\n",
+ "```\n",
+ "\n",
+ "```python\n",
+ "#contents of `era5_sandbox.config` module\n",
+ "from pathlib import Path\n",
+ "\n",
+ "\n",
+ "SRC = Path(__file__).parent.resolve()\n",
+ "BLD = SRC.joinpath(\"..\", \"..\", \"bld\").resolve()\n",
+ "```\n",
+ "\n",
+ "Additionally, your pyproject.toml file should contain the following at minimum:\n",
+ "\n",
+ "```toml\n",
+ "[tool.pytask.ini_options]\n",
+ "paths = [\"src/era5_sandbox\"]\n",
+ "```\n",
+ "\n",
+ "The former tells Python where to find the source code and build directory for `pytask` objects and shims, while the latter tells `pytask` where to find the task definitions and dependency DAG."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "import os\n",
+ "from pathlib import Path\n",
+ "from typing import Annotated\n",
+ "\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import pandas as pd\n",
+ "from era5_sandbox.config import BLD\n",
+ "from era5_sandbox.config import data_catalog, demo_catalog\n",
+ "\n",
+ "from pytask import PickleNode\n",
+ "from pytask import Product"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Defining Tasks\n",
+ "\n",
+ "To define a task, simply use the `task_` prefix in the function name (or, if you are familiar and comfortable with decorators, use `@pytask.mark.task`). Be verbose and expressive in your use of type hints to specify the input and output data, so that `pytask` can automatically detect and handle the dependencies between tasks. \n",
+ "\n",
+ "### Defining Tracked Outputs\n",
+ "\n",
+ "To define something as a tracked output, you can annotate the input of the task with `Annotated[Path, Product]`, where `Product` is imported from `pytask`. This tells `pytask` that this is a product of the task and should be saved in the build directory.\n",
+ "\n",
+ "In this example, we're generating random data into a data frame and saving the object as a pickle in the `bld` directory (`bld` is the default build directory for `pytask`'s intermediate data). To get that directory, we use the `BLD` variable from the `era5_sandbox.config` module as above. This module itself could also be generated using `nbdev` if you want to keep your configuration in notebooks.\n",
+ "\n",
+ "Using `nbdev`, we can also include all of the bells and whistles of function documentation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "\n",
+ "def task_create_random_data(\n",
+ " seed: Annotated[int, 42], # Default seed for reproducibility\n",
+ " path_to_data: Annotated[Path, Product] = BLD / \"data.pkl\" # Path to the object in the build directory\n",
+ " ) -> None:\n",
+ " \"Create a random data set and save it as a pickle file. Return the path to the saved file.\"\n",
+ " rng = np.random.default_rng(seed)\n",
+ " beta = 2\n",
+ "\n",
+ " x = rng.normal(loc=5, scale=10, size=1_000)\n",
+ " epsilon = rng.standard_normal(1_000)\n",
+ "\n",
+ " y = beta * x + epsilon\n",
+ "\n",
+ " df = pd.DataFrame({\"x\": x, \"y\": y})\n",
+ "\n",
+ " # this is a tracked output, so we annotate the return value with `Annotated[Path, Product]`\n",
+ " df.to_pickle(path_to_data)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can test the function directly in the notebook:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "task_create_random_data(42)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Once this module and function are exported with `nbdev_export`, the functions are in a python package. We can then use the command line to look at the registered tasks:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| eval: false\n",
+ "\n",
+ "%%sh\n",
+ "pytask collect"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's add another task in the same module. This task plots the data we generated. To link the previous task to this one as a dependency, we can list the output of the previous task as an input to this one. This way, `pytask` will know that it needs to run the first task before this one."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "\n",
+ "def task_plot_data(\n",
+ " path_to_data: Annotated[Path, BLD / \"data.pkl\"], # Path to the data file created by the previous task\n",
+ " path_to_plot: Annotated[Path, Product] = BLD / \"plot.png\" # Path to the build directory for the plot\n",
+ ") -> None:\n",
+ " \"\"\"\n",
+ " Plot the data from the pickle file and save the plot. Note that this task:\n",
+ " 1. depends on the data.pkl file created by the previous task,\n",
+ " 2. does not return any value, but saves a plot to the build directory. So the side effect of the task is what we are interested in here (though this is probably bad practice).\n",
+ " \"\"\"\n",
+ "\n",
+ " df = pd.read_pickle(path_to_data)\n",
+ " \n",
+ " _, ax = plt.subplots()\n",
+ " df.plot(x=\"x\", y=\"y\", ax=ax, kind=\"scatter\")\n",
+ "\n",
+ " plt.savefig(path_to_plot)\n",
+ " plt.close()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We now have a DAG of tasks that `pytask` can execute. To see the tasks, we can use the command line to create a pygraphviz graph of the tasks:\n",
+ "\n",
+ "```bash\n",
+ "pytask dag\n",
+ "```\n",
+ "\n",
+ "The DAG is saved as a pdf file, and you can view it using any viewer. Now, to run the pipeline, just invoke `pytask` at the command line:\n",
+ "\n",
+ "```bash\n",
+ "pytask\n",
+ "```\n",
+ "\n",
+ "In Jupyter or iPython, you can interact with the task outputs directly:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| eval: false\n",
+ "\n",
+ "# list all the files in the build directory\n",
+ "for file in os.listdir(BLD):\n",
+ " print(file)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can use these to build subsequent tasks later.\n",
+ "\n",
+ "## More Complex Tasks & The Data Catalog\n",
+ "\n",
+ "As we define more complex tasks, we can use the `pytask` data catalog to manage the inputs and outputs of our tasks. The data catalog allows us to imperatively name the data and their formats, making it easier to manage the data flow in our tasks. Importantly, we can define the data pythonically, which allows us to use the full power of Python to manipulate and transform our data. This is particularly more useful than snakemake's approach, which requires you to define the data in a more static way using paths and a separate pseudo-language.\n",
+ "\n",
+ "The content of the `era5_sandbox.config` module can be extended to include a data catalog:\n",
+ "\n",
+ "```python\n",
+ "from pathlib import Path\n",
+ "from pytask import DataCatalog, Product\n",
+ "\n",
+ "SRC = Path(__file__).parent.resolve()\n",
+ "BLD = SRC.joinpath(\"..\", \"..\", \"bld\").resolve()\n",
+ "\n",
+ "demo_catalog = DataCatalog()\n",
+ "```\n",
+ "\n",
+ "With just this definition, we're now able to refer directly to data by name in our tasks, and `pytask` will handle the paths and formats for us. This allows us to focus on the logic of our tasks rather than the details of data management.\n",
+ "\n",
+ ":::{.callout-note}\n",
+ "This is a major advantage of `pytask` over `snakemake`, as it allows you to define the data in a more flexible and Pythonic way, while still maintaining the benefits of a task management system. It is a similar approach to building pipelines in R with targets, which allows you to define the data in a more flexible way.\n",
+ ":::\n",
+ "\n",
+ "Let's create a task that modifies the data frame by adding a new column. This task will depend on the previous task's output, and we will use the data catalog to define the input and output data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "\n",
+ "def task_add_one(\n",
+ " path_to_data: Annotated[Path, BLD / \"data.pkl\"], # Path to the data file created by the previous task\n",
+ " node: Annotated[PickleNode, Product] = demo_catalog[\"mydata\"]\n",
+ ") -> None:\n",
+ " \"\"\"\n",
+ " Add one to the 'y' column of the data frame and save it as a new pickle file.\n",
+ " \"\"\"\n",
+ " df = pd.read_pickle(path_to_data)\n",
+ " df['z'] = df['y'] + 1\n",
+ " \n",
+ " node.save(df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this function, we've defined that the task relies on the output of the first task being there, the `data.pkl` file. But importantly, we've also defined our product as a `node` from the `PickleNode` module. This will allow `pytask` to handle the serialization and deserialization of the data frame automatically, so we don't have to worry about the details of how the data is stored. We create the datacatalog in our config file, and then tell this task to create a Node in that catalog called `mydata`. Whatever we save with the `node.save()` method will be saved in the build directory, but more importantly _will be indexed and hashed by `pytask`_. This means that if the data changes, `pytask` will know to rerun the task.\n",
+ "\n",
+ "To make this even more pythonic, we can modify the format of our task function so that the return type annotator is used as a node in the data catalog. This allows us to define the output of the task as a `PickleNode`, which will automatically handle the serialization and deserialization of the data frame.\n",
+ "\n",
+ ":::{.callout-note}\n",
+ "This is another trick I'm deriving from {targets}. By formatting tasks as pure functions where inputs are parameters and targets are return type annotations, we can define the output of the task as a `PickleNode`, which will automatically handle the serialization and deserialization of the data frame. This again allows us to focus on the logic of our tasks rather than the details of data management.\n",
+ ":::\n",
+ "\n",
+ "So below, we're directly accessing the `data_catalog` to get the `mydata` node, and then modifying it by adding a new column. It _feels_ like we are doing this in place, such as in an iPython session, because we are allowing `pytask` to handle the serialization of the file on disk for us."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "\n",
+ "def task_add_another_column(\n",
+ " df: Annotated[pd.DataFrame, demo_catalog[\"mydata\"]] # which object in the catalog to fetch from the catalog with node.load()\n",
+ ") -> Annotated[pd.DataFrame, demo_catalog[\"mydata2\"]]: # which object in the catalog to save the return value to\n",
+ " \"\"\"\n",
+ " Add another column to the data frame stored in the PickleNode.\n",
+ " \"\"\"\n",
+ "\n",
+ " # use the datacatalog directly to access the node\n",
+ " # this is a bit like accessing the node in an iPython session, but pytask\n",
+ " # will handle the serialization and deserialization for us\n",
+ " df['w'] = df['z'] * df['y']\n",
+ " \n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To test this interactively, we'd have to import the data catalog's object"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = demo_catalog[\"mydata\"].load() # load the data frame from the PickleNode\n",
+ "result = task_add_another_column(df) # call the task function with the loaded data frame"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "result"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now that we know it will work, we can invoke pytask:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| eval: false\n",
+ "%%sh\n",
+ "pytask"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Notice that the outputs are cached and not recomputed unless the inputs change. This is a key feature of `pytask` and other DAGs, allowing you to efficiently manage your data processing tasks without unnecessary recomputation.\n",
+ "\n",
+ "## Conclusion\n",
+ "\n",
+ "The takeaway here is that with `pytask`, you can define pure functions that take inputs and return outputs, and build a DAG of tasks that can be executed in a flexible and efficient way. This allows you to focus on the logic of your tasks rather than the details of data management, while still maintaining the benefits of a task management system. The key elements are:\n",
+ "\n",
+ "- **Task annotation**: You define your tasks by creating pure functions that take inputs and return outputs, and use decorators or naming conventions to mark them as \"tasks\" in a dag\n",
+ "- **Input and output annotation**: You define the inputs and outputs of your tasksusing type hints, and allow `pytask` to automatically detect and handle the dependencies between tasks.\n",
+ "- **Data catalog**: You define your data in a Pythonic object in your config called `data_catalog`. As you iteratively develop your DAG, you add objects to the data catalog, which are called nodes. As long as a node is a pythonic object and has a pickle method, `pytask` will handle the serialization and deserialization of the data for you."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "python3",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notes/20_pytask_config.ipynb b/notes/20_pytask_config.ipynb
new file mode 100644
index 0000000..2ea8116
--- /dev/null
+++ b/notes/20_pytask_config.ipynb
@@ -0,0 +1,530 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "title: \"`pytask` Config: Defining the Pipeline Internals in `pytask`\"\n",
+ "engine: jupyter\n",
+ "---\n",
+ "\n",
+ "## config\n",
+ "\n",
+ "> This is the config module for the `pytask` pipeline. \n",
+ "This module defines the data catalog(s) and any hard-coded parameters that are used throughout the pipeline."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| default_exp config:\n",
+ "#"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide:\n",
+ "#\n",
+ "from nbdev.showdoc import *"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports: \n",
+ "#\n",
+ "\n",
+ "import pandas as pd\n",
+ "\n",
+ "from pathlib import Path\n",
+ "from pyprojroot import here\n",
+ "from pytask import DataCatalog\n",
+ "\n",
+ "\n",
+ "SRC = here() / \"src\" / \"era5_sandbox\"\n",
+ "BLD = here() / \"bld\"\n",
+ "\n",
+ "demo_catalog = DataCatalog()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## `DEV_MODE`: A Quick Development Flag\n",
+ "\n",
+ "I'm adding a flag to the config that can be used for quick development. \n",
+ "If you import this boolean variable, it can be used to skip tasks,\n",
+ "setup samples, etc. on the fly by `marking` a task with the `pytask.mark.skipif`\n",
+ "decorator. Change this to `False` when you're ready to run the full pipeline."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "DEV_MODE=True"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## The Data Catalog\n",
+ "\n",
+ "To manage our pipeline, we're going to use a nested data catalog structure.\n",
+ "This way, we can easily return specific entries to specific tasks\n",
+ "without having to manage multiple different data catalogs. Specifically,\n",
+ "we'll have a data catalog for each stage of the pipeline, and each catalog\n",
+ "will have entries for the inputs, outputs, and any other parameters needed\n",
+ "for that stage. This is similar to how we used Hydra configs, but\n",
+ "using the `pytask` data catalog, we can more easily gather the data\n",
+ "for a specific task in structured manner entirely in Python."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "\n",
+ "stages = [\"mydata\", 'mydata2', # from the demo, ignore\n",
+ " \"download\", # download task\n",
+ " \"aggregate\", # aggregation task\n",
+ " \"publish\", # publishing task\n",
+ " \"viz\"] # visualization task\n",
+ "\n",
+ "buckets = [\n",
+ " \"inputs\", # any specific inputs, eg for carrying over between tasks\n",
+ " \"outputs\", # specific output task returns\n",
+ " \"jobs\", # job parameters as a dataframe\n",
+ " \"params\" # any lingering hardcoded parameters\n",
+ " ]\n",
+ "\n",
+ "data_catalog = {\n",
+ "\n",
+ " stage: {bucket: DataCatalog(name=f\"{stage}_{bucket}\") for bucket in buckets}\n",
+ " for stage in stages\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## The Download Task\n",
+ "\n",
+ "A good strategy may be to set pipeline stage parameters in the config file, \n",
+ "and then use the `pytask` data catalog to manage the data. This way, we can\n",
+ "easily change the parameters without having to modify the code. This is especially \n",
+ "useful for the API query, where we need to be able to set the parameter grid for\n",
+ "the years and data types we want to download data for. So, let's create an entry in the data catalog specifically for the download task.\n",
+ "\n",
+ "A good strategy I thought about for grid parameter comprehension is to create a dataframe expands all the combinations of\n",
+ "parameters, and then uses each combination to create the tasks which are then \n",
+ "easily added to the data catalog. This way, we can still easily inspect the \n",
+ "pipeline and see what tasks are being run, while also being able to easily \n",
+ "change the parameters in the config file without too much hassle.\n",
+ "\n",
+ "An important framework decision I'm making here is that each ROW of the dataframe corresponds to a single task, so that we can quickly understand at a glance what the task is doing, and also easily develop the code for the task itself. This is different from the hydra approach where a job is first specified by a default config, and then the parameters are swept over in multiple config files. This is a more flexible approach, IMO, because:\n",
+ "\n",
+ "1. each row defines a single task run, so it's easy to understand what the run is doing\n",
+ "2. it's easy to add or remove runs by simply expanding the list of parameters and using dataframe filters to remove irrelevant parameter combinations\n",
+ "3. we don't have to independently inspect and manage multiple different/overriding config files\n",
+ "4. it's all in Python, so we can use the full power of the language to define\n",
+ " the parameters and the tasks in a single sweep, not through the need of\n",
+ " hydra+snakemake multi stage/multi-lingual config system\n",
+ "\n",
+ "So, to do this, we define one job as a query to the CDS API that must contain:\n",
+ "- The dataset (re-analysis)\n",
+ "- The year\n",
+ "- The month\n",
+ "- All days in the month\n",
+ "- All times of day (hour)\n",
+ "- The geography (region), which will need:\n",
+ " - The URL to the shapefile to calculate the bounding box\n",
+ "\n",
+ "Given one combination of all of these, a single SLURM job can complete the first \"task\" in parallel by having a run assigned to each row of the dataframe."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "\n",
+ "# a dataframe for the query parameters, with nested entries for days, times, and variables\n",
+ "# Dimensions\n",
+ "years = [str(x) for x in range(2009, 2025)] # 16 years\n",
+ "months = [str(x).zfill(2) for x in range(1, 13)] # 12 months\n",
+ "geographies = [\"madagascar\", \"nepal\"] # 2 geographies\n",
+ "\n",
+ "# nested values; we want ALL days, times, and variables for each job\n",
+ "days = [str(x).zfill(2) for x in range(1, 32)]\n",
+ "times = [f\"{x:02d}:00\" for x in range(24)]\n",
+ "variables = [\"2m_dewpoint_temperature\", \"2m_temperature\", \"total_precipitation\", \"volumetric_soil_water_layer_1\"]\n",
+ "\n",
+ "product_type = \"reanalysis\"\n",
+ "\n",
+ "# Map shapefiles to geography\n",
+ "shapefiles = {\n",
+ " \"madagascar\": \"https://data.humdata.org/dataset/26fa506b-0727-4d9d-a590-d2abee21ee22/resource/ed94d52e-349e-41be-80cb-62dc0435bd34/download/mdg_adm_bngrc_ocha_20181031_shp.zip\",\n",
+ " \"nepal\": \"https://data.humdata.org/dataset/07db728a-4f0f-4e98-8eb0-8fa9df61f01c/resource/2eb4c47f-fd6e-425d-b623-d35be1a7640e/download/npl_adm_nd_20240314_ab_shp.zip\"\n",
+ "}\n",
+ "\n",
+ "# Build row-wise combinations of (year, month, geography)\n",
+ "rows = []\n",
+ "for year in years:\n",
+ " for month in months:\n",
+ " for geo in geographies:\n",
+ " rows.append({\n",
+ " \"year\": year,\n",
+ " \"month\": month,\n",
+ " \"geography\": geo,\n",
+ " \"shapefile\": shapefiles[geo],\n",
+ " \"product_type\": product_type,\n",
+ " \"day\": days,\n",
+ " \"time\": times,\n",
+ " \"variables\": variables,\n",
+ " \"output\": f\"{year}_{month}_{geo}\"\n",
+ " })\n",
+ "\n",
+ "# Create dataframe\n",
+ "query_df = pd.DataFrame(rows)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "query_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(f\"Number of estimated jobs: {query_df.shape[0]}. Examples...\")\n",
+ "\n",
+ "for i, row in query_df.sample(3).iterrows():\n",
+ " print(f\"Year: {row['year']}, Month: {row['month']}, Geography: {row['geography']}, Link: {row['shapefile']}, Variables: {row['variables']}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now add them to the catalog. We're going to use a dictionary to\n",
+ "nest data catalogs so that we can return specific task products to\n",
+ "named data catalog nodes."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export:\n",
+ "# set up catalog\n",
+ "\n",
+ "data_catalog['download']['jobs'].add(\"queries_df\", query_df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Our data catalog now has a `download|jobs` node with a `queries_df` entry that contains the dataframe of all the jobs to be run in this task."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_catalog['download']['jobs']['queries_df'].load().head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## The Aggregation Task\n",
+ "\n",
+ "To carry out the aggregation, we will follow similar logic to the original pipeline and use xarray to aggregate data into spatial and temporal averages. The aggregation task will take the downloaded data and compute the mean over the specified time period and spatial region. However, in this case, we want to aggregate the data diurnally, so we will need to fetch the sundown and sunrise times for the region and use them to compute the diurnal averages.\n",
+ "\n",
+ "Once again, we will use a dataframe to define the parameters for the aggregation task.\n",
+ "\n",
+ "Here we will use a dataframe with the jobs as rows;\n",
+ "the first column is \"input\" which is the list of query names from\n",
+ "the download task, and the last column is the output object name. Columns\n",
+ "in between can be the parameters needed for the aggregation task, which\n",
+ "then get expanded to the full list of jobs with `itertools.product`, `explode` or similar,\n",
+ "and filtered as necessary.\n",
+ "\n",
+ "For explanations of the parameters, see the Aggregation Task notebook's final `task_aggregate_data_diurnal` function."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "\n",
+ "# aggregate task parameters\n",
+ "\n",
+ "inputs = query_df[\"output\"].tolist()\n",
+ "outputs = [f\"{i}_agg\" for i in inputs]\n",
+ "\n",
+ "variable_dict = {\n",
+ " \"2m_dewpoint_temperature\": \"d2m\",\n",
+ " \"2m_temperature\": \"t2m\",\n",
+ " \"total_precipitation\": \"tp\",\n",
+ " \"volumetric_soil_water_layer_1\": \"swvl1\"\n",
+ "}\n",
+ "\n",
+ "# list of params that get fed into the task functions\n",
+ "agg_params = {\n",
+ " \"time\": [\"day\", \"night\"],\n",
+ " \"solar_classification\": [\"before\"],\n",
+ " \"variables\": variables,\n",
+ " \"variables_short\": [variable_dict[x] for x in variables],\n",
+ " \"aggregation_name\": [\"mean\", \"sum\", \"max\", \"min\"]\n",
+ "}\n",
+ "\n",
+ "from itertools import product\n",
+ "import pandas as pd\n",
+ "\n",
+ "# expand all the params\n",
+ "agg_params = pd.DataFrame(list(product(*agg_params.values())), columns=agg_params.keys())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Inspecting it:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "agg_params"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's keep only rows where the variables and variables_short match"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "# quick filter to keep only matching rows\n",
+ "\n",
+ "agg_params = agg_params[agg_params.apply(lambda x: variable_dict[x['variables']] == x['variables_short'], axis=1)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "agg_params"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Great, and now keeping `sum` only for total precipitation (we don't need mean, max, min for that variable), and removing `sum` for all other variables (we don't need sum for temperature or soil moisture):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "# remove rows where tp aggregation is not sum\n",
+ "mask = (agg_params['variables_short'] == \"tp\") & (agg_params['aggregation_name'] != \"sum\")\n",
+ "agg_params = agg_params[~mask]\n",
+ "\n",
+ "# remove rows where non-tp aggregation is sum\n",
+ "mask = (agg_params['variables_short'] != \"tp\") & (agg_params['aggregation_name'] == \"sum\")\n",
+ "agg_params = agg_params[~mask]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "agg_params"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now we add the input and output columns by joining:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "# set up inputs and parameters\n",
+ "inputs = pd.DataFrame({\"input\": inputs})\n",
+ "aggregate_jobs = inputs.merge(agg_params, how=\"cross\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This result gives us the full list of jobs for the aggregation task. 20 rows for the parameters,\n",
+ "and 384 inputs/outputs, giving a total of 7680 jobs:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "assert aggregate_jobs.shape[0] == 20 * len(inputs)\n",
+ "aggregate_jobs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A few more configuration items need to be added, like\n",
+ "the local timezone for each geography, the healthshed filename,\n",
+ "the healthshed unique ID variable name in the shapefile,\n",
+ "and whether the variable is instantaneous or accumulated:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "# add a few more columns\n",
+ "aggregate_jobs['local_tz'] = aggregate_jobs['input'].apply(\n",
+ " lambda x: \"Asia/Kathmandu\" if \"nepal\" in x else \"Indian/Antananarivo\"\n",
+ ")\n",
+ "aggregate_jobs['shapefile'] = aggregate_jobs['input'].apply(\n",
+ " lambda x: \"Nepal_Healthsheds2024.zip\" if \"nepal\" in x else \"healthsheds2022.zip\"\n",
+ ")\n",
+ "\n",
+ "aggregate_jobs['hshd_unique_id'] = aggregate_jobs['input'].apply(\n",
+ " lambda x: \"fid\" if \"nepal\" in x else \"fs_uid\"\n",
+ ")\n",
+ "\n",
+ "aggregate_jobs['climate_handler_var'] = aggregate_jobs['variables_short'].apply(\n",
+ " lambda x: \"accum\" if x == \"tp\" else \"instant\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "aggregate_jobs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now we add this to the data catalog:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "# update catalog\n",
+ "data_catalog['aggregate']['jobs'].add(\"jobs_df\", aggregate_jobs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Our data catalog now has an `aggregate|jobs` node with a `jobs_df` entry that contains the dataframe of all the jobs to be run in this task."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_catalog['aggregate']['jobs']['jobs_df'].load().head()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "python3",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notes/20_pytask_logger.ipynb b/notes/20_pytask_logger.ipynb
new file mode 100644
index 0000000..e128bc1
--- /dev/null
+++ b/notes/20_pytask_logger.ipynb
@@ -0,0 +1,95 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "title: \"Logging: A simple logger to inject into `pytask` jobs\"\n",
+ "engine: jupyter\n",
+ "---\n",
+ "\n",
+ "## logger\n",
+ "\n",
+ "> A simple logger module for the pytask tasks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| default_exp pytask_logger:\n",
+ "#|"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide:\n",
+ "# showdoc\n",
+ "from nbdev.showdoc import *"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "# imports \n",
+ "\n",
+ "import logging\n",
+ "from pathlib import Path\n",
+ "from pyprojroot import here\n",
+ "from datetime import datetime\n",
+ "\n",
+ "LOG_DIR = here(\"logs\")\n",
+ "# get the date & time for the log file name\n",
+ "log_date = datetime.now().strftime(\"%Y-%m-%d\")\n",
+ "log_time = datetime.now().strftime(\"%H-%M-%S\")\n",
+ "LOG_DIR = here(\"logs\") / log_date / log_time"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "# main function to setup a logger\n",
+ "\n",
+ "\n",
+ "\n",
+ "def setup_logger(name: str, log_path: Path=LOG_DIR, level=logging.INFO) -> logging.Logger:\n",
+ " log_path.mkdir(parents=True, exist_ok=True)\n",
+ " formatter = logging.Formatter('%(asctime)s — %(name)s — %(levelname)s — %(message)s')\n",
+ "\n",
+ " handler = logging.FileHandler(log_path / f\"{name}.log\", mode='a')\n",
+ " handler.setFormatter(formatter)\n",
+ "\n",
+ " logger = logging.getLogger(name)\n",
+ " logger.setLevel(level)\n",
+ " logger.addHandler(handler)\n",
+ " logger.propagate = False\n",
+ "\n",
+ " return logger"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "python3",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notes/21_pytask_download.ipynb b/notes/21_pytask_download.ipynb
new file mode 100644
index 0000000..6fb2d46
--- /dev/null
+++ b/notes/21_pytask_download.ipynb
@@ -0,0 +1,257 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "title: \"Download: `download` Module as a `pytask` Task\"\n",
+ "engine: jupyter\n",
+ "---\n",
+ "\n",
+ "## task_download \n",
+ "\n",
+ "> This module downloads the raw era5 data from the CDS API. It is similar to the original script, refactored for `pytask`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| default_exp task_download:\n",
+ "#|"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide:\n",
+ "# showdoc\n",
+ "from nbdev.showdoc import *"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We're going to quickly refactor the pipeline to use pytask instead of hydra and snakemake. This will hopefully demonstrate a simpler and more flexible way to manage data pipelines in Python.\n",
+ "\n",
+ "To start off, we need to create a function that queries the CDS API with one job. This function will be used to download the data for each query in the range specified in the data catalog in the config file.\n",
+ "\n",
+ "Let's take a look at the data catalog we created in the config module:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export:\n",
+ "# necessary imports\n",
+ "import cdsapi\n",
+ "import pytask\n",
+ "import os\n",
+ "from pytask import task, Product\n",
+ "from pathlib import Path\n",
+ "from typing import Annotated\n",
+ "from pandas import Series\n",
+ "\n",
+ "from era5_sandbox.config import data_catalog\n",
+ "from era5_sandbox.config import BLD\n",
+ "from era5_sandbox.config import DEV_MODE\n",
+ "from era5_sandbox.pytask_logger import setup_logger\n",
+ "from era5_sandbox.download import fetch_GADM, create_bounding_box"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can see the queries entry we created in the data catalog. Each query is a row of a dataframe that contains the parameters for the CDS API query."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "queries = data_catalog['download']['jobs']['queries_df'].load()\n",
+ "queries"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can test this query like we did in the original work:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "example_query = queries.iloc[0]\n",
+ "\n",
+ "create_bounding_box(example_query['shapefile'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this way, we have a similar approach as Hydra configs, but, using the `pytask` data catalog, we can more easily gather the data for a specific task in structured manner entirely in Python."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| eval: false\n",
+ "\n",
+ "client = cdsapi.Client()\n",
+ "\n",
+ "ex_bounding_box = create_bounding_box(example_query['shapefile'])\n",
+ "\n",
+ "request = {\n",
+ " \"product_type\": example_query['product_type'],\n",
+ " \"variable\": example_query['variables'], \n",
+ " \"year\": str(example_query['year']),\n",
+ " \"month\": str(example_query['month']),\n",
+ " \"day\": example_query['day'],\n",
+ " \"time\": example_query['time'],\n",
+ " \"data_format\": \"netcdf\",\n",
+ " \"download_format\": \"unarchived\",\n",
+ " \"area\": ex_bounding_box\n",
+ " }\n",
+ "\n",
+ "target = f\"{example_query['output']}.nc\"\n",
+ "\n",
+ "client.retrieve(\"reanalysis-era5-single-levels\", request).download(target)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This works! So now we just need to create a `task_` function that pytask will recognise to parallelise the download of queries over:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export:\n",
+ "# define the download task\n",
+ "\n",
+ "queries = data_catalog['download']['jobs']['queries_df'].load()\n",
+ "\n",
+ "for i, job in queries.iterrows():\n",
+ "\n",
+ " @task(id=job['output'], name=f\"Download {job['output']}\")\n",
+ " def task_download_raw_data(\n",
+ " _query: Series = job # The query object from the data catalog\n",
+ " )-> Annotated[Path, data_catalog['download']['outputs'][job['output']]]:\n",
+ " \n",
+ " logger = setup_logger(_query['output'])\n",
+ " output_path = BLD / f\"{_query['output']}.nc\"\n",
+ " logger.info(f\"Starting download for {_query['output']} to {output_path}\")\n",
+ "\n",
+ " # check if string file path exists\n",
+ " if os.path.exists(output_path):\n",
+ " logger.info(f\"File {output_path} already exists. Skipping download.\")\n",
+ " return output_path\n",
+ "\n",
+ " client = cdsapi.Client()\n",
+ " bounding_box = create_bounding_box(_query['shapefile'])\n",
+ " \n",
+ " request = {\n",
+ " \"product_type\": _query['product_type'],\n",
+ " \"variable\": _query['variables'], \n",
+ " \"year\": _query['year'],\n",
+ " \"month\": _query['month'],\n",
+ " \"day\": _query['day'],\n",
+ " \"time\": _query['time'],\n",
+ " \"data_format\": \"netcdf\",\n",
+ " \"download_format\": \"unarchived\",\n",
+ " \"area\": bounding_box\n",
+ " }\n",
+ " \n",
+ " client.retrieve(\"reanalysis-era5-land\", request).download(output_path)\n",
+ " logger.info(f\"Downloaded data for {_query['output']} to {output_path}\")\n",
+ "\n",
+ " return output_path"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### How this works (with some help from GPT):\n",
+ "\n",
+ "#### 🧠 How pytask Discovers and Executes Tasks\n",
+ "\n",
+ "When you run pytask, it automatically scans your project for Python files named `task_*.py`. In these files, it looks for:\n",
+ "- Functions decorated with `@task`, or\n",
+ "- Functions prefixed with `task_`\n",
+ "\n",
+ "These functions are not executed immediately. Instead, `pytask`:\n",
+ "1.\tImports each task_*.py module (just like Python would)\n",
+ "2.\tRegisters any matching task functions as nodes in a directed acyclic graph (DAG)\n",
+ "3.\tResolves dependencies by analyzing:\n",
+ " - Input annotations (e.g., `Annotated[x, DependsOn]`)\n",
+ " - Output declarations (e.g., `return` values or `Product` annotations)\n",
+ "4.\tBuilds the DAG, where each task function is a node\n",
+ "5.\tExecutes the tasks, respecting dependency order and skipping up-to-date nodes\n",
+ "\n",
+ "So even though the task functions aren’t explicitly “run” in the Python code itself, pytask knows how and when to execute them — based on their position in the DAG.\n",
+ "\n",
+ "#### 🔄 How This Differs from Snakemake\n",
+ "\n",
+ "In `snakemake`, you’re expected to define a series of explicitly executable rules, often using shell commands or Python scripts. You “stitch together” rules using filenames and wildcard matching.\n",
+ "\n",
+ "In contrast:\n",
+ "- 🐍 pytask is Python-native — tasks are just regular Python functions\n",
+ "- ⚙️ It builds a DAG from those functions and tracks inputs/outputs automatically\n",
+ "- 🧱 You are declaring nodes, not scripting execution\n",
+ "\n",
+ "Think of your Python files not as scripts to run, but as a way to define and wire together declarative tasks that will be executed by the pytask engine.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "Because we defined this task in a function and loop, we can easily debug a node in the DAG by simply calling it:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| eval: false\n",
+ "task_download_raw_data()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "python3",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notes/22_pytask_aggregate.ipynb b/notes/22_pytask_aggregate.ipynb
new file mode 100644
index 0000000..6f0bccc
--- /dev/null
+++ b/notes/22_pytask_aggregate.ipynb
@@ -0,0 +1,906 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "title: \"Aggregation: The `aggregation` Module as a `pytask` Task\"\n",
+ "format: html\n",
+ "engine: jupyter\n",
+ "---\n",
+ "\n",
+ "# task_aggregate\n",
+ "\n",
+ "> This task aggregates the downloaded data into spatial and temporal averages. It uses xarray to compute summary statistics over the specified time period and spatial region. The aggregation is done diurnally, so we will fetch the sundown and sunrise times for the region and use them to compute the diurnal averages."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| default_exp task_aggregate:\n",
+ "#"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide:\n",
+ "# showdoc\n",
+ "\n",
+ "from nbdev.showdoc import *"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export:\n",
+ "#\n",
+ "\n",
+ "import os\n",
+ "import tempfile\n",
+ "import rasterio\n",
+ "import yaml\n",
+ "import xarray as xr\n",
+ "from pyprojroot import here\n",
+ "from typing import Literal\n",
+ "from pytask import task, Product\n",
+ "from pathlib import Path\n",
+ "from typing import Annotated\n",
+ "from rasterstats.io import Raster\n",
+ "\n",
+ "from era5_sandbox.config import BLD, data_catalog\n",
+ "from era5_sandbox.pytask_logger import setup_logger\n",
+ "\n",
+ "from era5_sandbox.core import GoogleDriver, _get_callable, describe, ClimateDataFileHandler, kelvin_to_celsius\n",
+ "\n",
+ "from era5_sandbox.aggregate import polygon_to_raster_cells, aggregate_to_healthsheds, RasterFile, netcdf_to_tiff"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Diurnal Classification Based on Sun Position\n",
+ "\n",
+ "To do diurnal classificaiton, we will need to fetch the sundown and sunrise times for the region and use them to compute the diurnal averages. We will use the [astral library](https://astral.readthedocs.io/en/latest/) to get the sunrise and sunset times for the specified latitude and longitude. The aggregation will be done using xarray, which allows us to compute the mean over the specified time period and spatial region.\n",
+ "\n",
+ "Here's our example file:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "eg_file = data_catalog['download']['outputs']['2009_01_nepal'].load()\n",
+ "with ClimateDataFileHandler(eg_file) as handler:\n",
+ " \n",
+ " ds = xr.open_dataset(handler.get_dataset(\"instant\"))\n",
+ " #ds = xr.open_dataset(handler.get_dataset(\"accum\"))\n",
+ "\n",
+ "ds"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can see the astral library in action below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "from astral import Observer, sun\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from tqdm import tqdm\n",
+ "import random\n",
+ "import datetime\n",
+ "from pytz import UTC"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# get the location of a datapoint in the dataset\n",
+ "lat, long = ds.coords[\"latitude\"].values[0], ds.coords[\"longitude\"].values[0]\n",
+ "time = ds['valid_time'].values[0]\n",
+ "dt = pd.to_datetime(time, utc=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "observer = Observer(latitude=lat, longitude=long, elevation=0)\n",
+ "sun_info = sun.sun(observer, date=dt)\n",
+ "sun_info"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Astral is very fast:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%timeit\n",
+ "\n",
+ "#fetch a random time from valid_time\n",
+ "options = ds['valid_time'].values\n",
+ "\n",
+ "random_time = random.choice(options)\n",
+ "dt = pd.to_datetime(random_time, utc=True)\n",
+ "sun_info = sun.sun(observer, date=dt)\n",
+ "if dt < sun_info['sunrise']:\n",
+ " print(f\"Randomly selected time: {dt} is pre_dawn\")\n",
+ "elif dt >= sun_info['sunrise'] and dt < sun_info['sunset']:\n",
+ " print(f\"Randomly selected time: {dt} is day\")\n",
+ "else:\n",
+ " print(f\"Randomly selected time: {dt} is post_dusk\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This tells us that we can use the valid time for the specific location of each data point in the query and know based on the sun whether it was daytime or nighttime. The runtime will be limited only by the looping.\n",
+ "Let's put this in a function so that we can use the resampling in `xarray`.\n",
+ "\n",
+ "The resampling approach will be a single function that can resample in three ways:\n",
+ "\n",
+ "- By calendar date, default (1 value per calendar date)\n",
+ "- By diurnal class by calendar date (3 values, pre-dawn, day, post-dusk)\n",
+ "- By solar date (2 values per calendar date, with night classified as \"before\" or \"after\")\n",
+ "\n",
+ "Therefore, we'll need 2 internal functions; one to do diurnal, and one to do solar date bins.\n",
+ "\n",
+ "Essentially, we are going to create an array-shaped index/mask, (time, latitude, longitude). As a\n",
+ "demonstration, this loop goes through the first 24 time points in the dataset,\n",
+ "and calculates the sun info for each latitude and longitude, assigning the values to an array:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time \n",
+ "times = ds['valid_time'].values[:24]\n",
+ "lats = ds.coords['latitude'].values\n",
+ "lons = ds.coords['longitude'].values\n",
+ "\n",
+ "result = np.full((len(times), len(lats), len(lons)), \"\", dtype=object)\n",
+ "\n",
+ "for i, dt in enumerate(times):\n",
+ "\n",
+ " for j, lat in enumerate(lats):\n",
+ "\n",
+ " for k, lon in enumerate(lons):\n",
+ " \n",
+ " # set the geographical position\n",
+ " observer = Observer(latitude=lat, longitude=lon, elevation=0)\n",
+ " \n",
+ " # use the time\n",
+ " dt = pd.to_datetime(dt, utc=True)\n",
+ "\n",
+ " # where/when is the sun at this time for this position\n",
+ " sun_info = sun.sun(observer, date=dt)\n",
+ " result[i, j, k] = sun_info"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "So we know that in the first hour, the sun goes up and comes down at slightly different\n",
+ "times based on latitude and longitude. Take the first hour, for example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(result.shape)\n",
+ "hour_1 = 0 # 0th index of the results\n",
+ "\n",
+ "min_lat = 0\n",
+ "min_lon = 0\n",
+ "max_lat = 48\n",
+ "max_lon = 90\n",
+ "print(f\"Even though the reading came from the first HOUR of data UTC, the sun info at the minimum latitude/longitude is: {result[hour_1, min_lat, min_lon]}\")\n",
+ "\n",
+ "print(f\"this is different from the sun info at the maximum latitude/longitude is: {result[hour_1, max_lat, max_lon]}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export:\n",
+ "# define the basic diurnal classification function\n",
+ "\n",
+ "def compute_diurnal_class_bins(\n",
+ " ds: xr.Dataset\n",
+ " )-> np.ndarray:\n",
+ " \"\"\"\n",
+ " Compute the diurnal value for each data point in the dataset.\n",
+ " This function iterates over each data point in the dataset,\n",
+ " calculates the sunrise and sunset times for the given time, latitude and longitude,\n",
+ " and returns whether or not that data point is before dawn, during the day, or after dusk.\n",
+ " \"\"\"\n",
+ "\n",
+ " times = ds['valid_time'].values\n",
+ " lats = ds.coords['latitude'].values\n",
+ " lons = ds.coords['longitude'].values\n",
+ "\n",
+ " result = np.full((len(times), len(lats), len(lons)), \"\", dtype=object)\n",
+ "\n",
+ " for i, dt in enumerate(tqdm(times, desc=\"Classifying data points by sun position\")):\n",
+ " # use the time\n",
+ " dt = pd.to_datetime(dt, utc=True)\n",
+ "\n",
+ " for j, lat in enumerate(lats):\n",
+ "\n",
+ " for k, lon in enumerate(lons):\n",
+ " \n",
+ " # set the geographical position\n",
+ " observer = Observer(latitude=lat, longitude=lon, elevation=0)\n",
+ " \n",
+ " # where/when is the sun at this time for this position\n",
+ " sun_info = sun.sun(observer, date=dt)\n",
+ " \n",
+ " if dt < sun_info['sunrise']:\n",
+ " result[i, j, k] = \"pre_dawn\"\n",
+ " elif dt >= sun_info['sunrise'] and dt < sun_info['sunset']:\n",
+ " result[i, j, k] = \"day\"\n",
+ " else:\n",
+ " result[i, j, k] = \"post_dusk\"\n",
+ "\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ex=compute_diurnal_class_bins(ds)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "So, for our 720 time points, we should find that\n",
+ "if we take the `set()` of all the classifications within that slice,\n",
+ "there should be a few of them with 2 classes.\n",
+ "In other words, at any given hour, almost all of\n",
+ "the readings are \"day\", because it is daytime across all\n",
+ "of Madagascar, _but_ at certain timepoints, the sun is rising\n",
+ "or setting in the northern part of the country and so some\n",
+ "portion of the slice is classified differently:\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for x in range(720):\n",
+ " print(set(ex[x].flatten()))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This works! Now we can do a similar, but slightly more\n",
+ "complicated function to define \"night\" and \"day\",\n",
+ "where \"night\" includes all of the values after the sun goes down."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "# \n",
+ "\n",
+ "def compute_solar_day_night_class_bins(\n",
+ " ds: xr.Dataset,\n",
+ " night_direction: Literal[\"before\", \"after\"],\n",
+ " )-> list:\n",
+ " \"\"\"\n",
+ " Compute the diurnal value for each data point in the dataset.\n",
+ " This function iterates over each data point in the dataset,\n",
+ " calculates the sunrise and sunset times for the given time, latitude and longitude,\n",
+ " and returns whether or not that data point is daytime or nighttime.\n",
+ " The definition of \"nighttime\" can be set to be all the darkness before the sun\n",
+ " came up (before), or all the darkness after it went down (after).\n",
+ " \"\"\"\n",
+ "\n",
+ " times = ds['valid_time'].values\n",
+ " lats = ds.coords['latitude'].values\n",
+ " lons = ds.coords['longitude'].values\n",
+ "\n",
+ " result = np.full((len(times), len(lats), len(lons)), \"\", dtype=object)\n",
+ " datetimes = np.full((len(times), len(lats), len(lons)), \"\", dtype=object)\n",
+ "\n",
+ " for i, dt in enumerate(tqdm(times, desc=\"Classifying data points by sun position\")):\n",
+ " # use the time\n",
+ " dt = pd.to_datetime(dt, utc=True)\n",
+ "\n",
+ " for j, lat in enumerate(lats):\n",
+ "\n",
+ " for k, lon in enumerate(lons):\n",
+ " \n",
+ " # set the geographical position\n",
+ " observer = Observer(latitude=lat, longitude=lon, elevation=0)\n",
+ " if night_direction == \"before\":\n",
+ " # Night is from previous sunset to today's sunrise\n",
+ " sun_today = sun.sun(observer, date=dt.date())\n",
+ " sun_prev = sun.sun(observer, date=(dt - pd.Timedelta(days=1)).date())\n",
+ " night_start = sun_prev[\"sunset\"].astimezone(pd.Timestamp.utcnow().tz)\n",
+ " night_end = sun_today[\"sunrise\"].astimezone(pd.Timestamp.utcnow().tz)\n",
+ " \n",
+ " # the reading is from yesterday's nighttime\n",
+ " if night_start <= dt < night_end:\n",
+ " result[i, j, k] = \"night\"\n",
+ " # the date counts as today\n",
+ " datetimes[i, j, k] = dt.date()\n",
+ "\n",
+ " # the reading is from daytime\n",
+ " elif sun_today[\"sunrise\"] <= dt < sun_today[\"sunset\"]:\n",
+ " result[i, j, k] = \"day\"\n",
+ " # the date counts as today\n",
+ " datetimes[i, j, k] = dt.date()\n",
+ " \n",
+ " # the reading is from today's nighttime, but counts as tomorrow's night\n",
+ " else:\n",
+ " result[i, j, k] = \"night\"\n",
+ " # the date is tomorrow\n",
+ " datetimes[i, j, k] = (dt + pd.Timedelta(days=1)).date()\n",
+ "\n",
+ " elif night_direction == \"after\":\n",
+ " # Night is from today's sunset to next sunrise\n",
+ " sun_today = sun.sun(observer, date=dt.date())\n",
+ " sun_next = sun.sun(observer, date=(dt + pd.Timedelta(days=1)).date())\n",
+ " night_start = sun_today[\"sunset\"].astimezone(pd.Timestamp.utcnow().tz)\n",
+ " night_end = sun_next[\"sunrise\"].astimezone(pd.Timestamp.utcnow().tz)\n",
+ "\n",
+ " # the reading is from daytime\n",
+ " if sun_today[\"sunrise\"] <= dt < sun_today[\"sunset\"]:\n",
+ " result[i, j, k] = \"day\"\n",
+ " # the date counts as today\n",
+ " datetimes[i, j, k] = dt.date()\n",
+ " # the reading is from tonight\n",
+ " elif night_start <= dt < night_end:\n",
+ " result[i, j, k] = \"night\"\n",
+ " # the date counts as today\n",
+ " datetimes[i, j, k] = dt.date()\n",
+ "\n",
+ " # the reading is from yesterday night\n",
+ " else:\n",
+ " # the date counts as yesterday\n",
+ " result[i, j, k] = \"day\"\n",
+ " datetimes[i, j, k] = (dt - pd.Timedelta(days=1)).date()\n",
+ " else:\n",
+ " raise ValueError(f\"Invalid night_direction: {night_direction}\")\n",
+ "\n",
+ " return result, datetimes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "ex_class, ex_dt = compute_solar_day_night_class_bins(ds, \"before\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ex_class"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As before, we should see that most slices are homogenous,\n",
+ "meaning most of the time, all the readings are from the day,\n",
+ "but some slices should have day and night values:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for slice_ in range(720):\n",
+ " print(set(ex_class[slice_].flatten()))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The returned array can serve as new \"variable indexes\" for the dataset:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds_masked = ds.copy()\n",
+ "ds_masked['solar_class'] = (('valid_time', 'latitude', 'longitude'), ex_class)\n",
+ "ds_masked[\"solar_date\"] = ((\"valid_time\", \"latitude\", \"longitude\"), ex_dt)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Diurnal Resampling\n",
+ "\n",
+ "Now, to see if it will resample by both solar day and diurnal class. Let's try by masking and making copies with NaN in the masked values:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds_day = ds_masked.where(ds_masked[\"solar_class\"] == \"day\").drop_vars([\"solar_class\", \"solar_date\"])\n",
+ "ds_night = ds_masked.where(ds_masked[\"solar_class\"] == \"night\").drop_vars([\"solar_class\", \"solar_date\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Next, we set the time zone for Madagascar since, to resample by day and night,\n",
+ "we should observe the local time:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds_day = ds_day.assign_coords(valid_time=pd.to_datetime(ds[\"valid_time\"].values).tz_localize(\"UTC\").tz_convert(\"Asia/Kathmandu\"))\n",
+ "ds_night = ds_night.assign_coords(valid_time=pd.to_datetime(ds[\"valid_time\"].values).tz_localize(\"UTC\").tz_convert(\"Asia/Kathmandu\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now if we can resample by day..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds_day_rs = ds_day.resample(valid_time=\"1D\").reduce(np.nanmean)\n",
+ "ds_night_rs = ds_night.resample(valid_time=\"1D\").reduce(np.nanmean)\n",
+ "ds_day_rs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Can we successfully convert this to a tiff?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from era5_sandbox.aggregate import netcdf_to_tiff"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "raster_day = netcdf_to_tiff(ds_day_rs, band=1, variable=\"d2m\")\n",
+ "raster_night = netcdf_to_tiff(ds_night_rs, band=1, variable=\"d2m\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Looks great! These two rasters represent one calendar day of daytime and nighttime values.\n",
+ "\n",
+ "### Testing Polygon to Raster Cells & Healthshed Aggregation\n",
+ "\n",
+ "The penultimate step of the aggregate pipeline in the original version is\n",
+ "assigning each datapoint to the respective healthshed. The `vectors` argument\n",
+ "comes from the healthshed, and represents each geographic polygon on the ground\n",
+ "that we want to aggregate data to."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from hydra import initialize, compose"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "try:\n",
+ " with initialize(version_base=None, config_path=\"../conf\"):\n",
+ " cfg = compose(config_name='config.yaml')\n",
+ "except Exception as e:\n",
+ " print(f\"Error initializing Hydra: {e}\")\n",
+ " with initialize(version_base=None, config_path=\"conf\"):\n",
+ " cfg = compose(config_name='config.yaml')\n",
+ "\n",
+ "driver = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)\n",
+ "drive = driver.get_drive()\n",
+ "healthsheds = driver.read_healthsheds(\"Nepal_Healthsheds2024.zip\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "res_poly2cell=polygon_to_raster_cells(\n",
+ " vectors = healthsheds.geometry.values, # the geometries of the shapefile of the regions\n",
+ " raster=raster_day.data, # the raster data above\n",
+ " nodata=np.nan, # any intersections with no data, may have to be np.nan\n",
+ " affine=raster_day.transform, # some math thing need to revise\n",
+ " all_touched=True, \n",
+ " verbose=True\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This works fine. Finally, we aggregate to healthsheds:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from era5_sandbox.aggregate import aggregate_to_healthsheds"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "result_day = aggregate_to_healthsheds(\n",
+ " res_poly2cell=res_poly2cell,\n",
+ " raster=raster_day,\n",
+ " shapes=healthsheds,\n",
+ " names_column=\"fid\",\n",
+ " aggregation_func=np.nanmean,\n",
+ " aggregation_name=\"mean_dewpoint_day\"\n",
+ ")\n",
+ "\n",
+ "result_night = aggregate_to_healthsheds(\n",
+ " res_poly2cell=res_poly2cell,\n",
+ " raster=raster_night,\n",
+ " shapes=healthsheds,\n",
+ " names_column=\"fid\",\n",
+ " aggregation_func=np.nanmean,\n",
+ " aggregation_name=\"mean_dewpoint_night\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Below shows the result of aggregating the daytime dewpoint temperature to the healthshed level:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "result_day"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "result_night"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "So from one input, we will have two outputs, one for daytime and one for nighttime, and this will have to loop over the bands (ie each day in the month).\n",
+ "\n",
+ "# Putting it all together in a `pytask` task\n",
+ "\n",
+ "Below we define our `pytask` task to aggregate data to the healthshed level."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exports:\n",
+ "#\n",
+ "\n",
+ "job_rows = data_catalog['aggregate']['jobs']['jobs_df'].load()\n",
+ "\n",
+ "aggregation_funcs = {\n",
+ " \"mean\": np.nanmean,\n",
+ " \"sum\": np.nansum,\n",
+ " \"max\": np.nanmax,\n",
+ " \"min\": np.nanmin\n",
+ "}\n",
+ "\n",
+ "for i, job in job_rows.iterrows():\n",
+ " #print(f\"Job {i+1}: variable={job['variables']}, time={job['time']}, aggregation={job['aggregation_name']}\")\n",
+ "\n",
+ " # parse the row into function parameters\n",
+ " input_file = data_catalog['download']['outputs'][job['input']]\n",
+ " solar_classification = job['solar_classification']\n",
+ " variable = job['variables_short']\n",
+ " time = job['time']\n",
+ " aggregation_func = aggregation_funcs[job['aggregation_name']]\n",
+ " aggregation_name = job['aggregation_name']\n",
+ "\n",
+ " climate_handler_var = job['climate_handler_var']\n",
+ " local_tz = job['local_tz']\n",
+ "\n",
+ " shapefile = job['shapefile']\n",
+ " hshd_unique_id = job['hshd_unique_id']\n",
+ "\n",
+ " output_file = job['input'] + \"_\" + job['time'] + \"_\" + job['variables_short'] + \"_\" + job['aggregation_name'] + \".parquet\"\n",
+ "\n",
+ " @task(id=output_file, name=f\"Aggregate {output_file}\", after=\"task_download_raw_data\")\n",
+ " def task_aggregate_data_diurnal(\n",
+ " input_file: Path = data_catalog['download']['outputs'][job['input']], # input data Path from the download task\n",
+ " aggregation_func: callable = aggregation_func, # the aggregation function\n",
+ " aggregation_name: str = aggregation_name, # the name of the aggregation function\n",
+ " time: Literal[\"day\", \"night\"] = time, # whether to aggregate by day or night\n",
+ " night_direction: Literal[\"before\", \"after\"] = solar_classification, # how to define night\n",
+ " variable: str = variable, # the variable to aggregate,\n",
+ " climate_handler_var: Literal[\"instant\", \"accum\"] = climate_handler_var, # whether the variable is instant or accum,\n",
+ " local_tz: str = local_tz, # the local timezone for resampling\n",
+ " shapefile: str = shapefile, # the shapefile for the healthsheds,\n",
+ " hshd_unique_id: str = hshd_unique_id, # the unique id column in the shapefile,\n",
+ " output_file: str = output_file # the output file name\n",
+ " ) -> Annotated[Path, data_catalog['aggregate']['outputs'][output_file]]:\n",
+ " \"\"\"\n",
+ " Task to aggregate data from a CDSAPI Query to the healthshed\n",
+ " level. Returns path to parquet file with aggregated data.\n",
+ " \"\"\"\n",
+ "\n",
+ " logger = setup_logger(output_file)\n",
+ "\n",
+ " logger.info(f\"Aggregating: {output_file}\")\n",
+ "\n",
+ " # check if the string path exists\n",
+ " # if os.path.exists(output_file):\n",
+ " # logger.info(f\"File {output_file} already exists. Skipping aggregation.\")\n",
+ " # return output_file\n",
+ "\n",
+ " # get input data\n",
+ " logger.info(\"Reading input data...\")\n",
+ " with ClimateDataFileHandler(input_file) as handler:\n",
+ " ds = xr.open_dataset(handler.get_dataset('instant'))\n",
+ "\n",
+ " #get the healthshed shapefile\n",
+ " logger.info(f\"Reading healthshed shapefile from yaml {here()}...\")\n",
+ " with open(here() / \"conf\" / \"config.yaml\") as f:\n",
+ " healthshed_config = yaml.safe_load(f)\n",
+ "\n",
+ " key_path = here() / healthshed_config['GOOGLE_DRIVE_AUTH_JSON']['path']\n",
+ "\n",
+ " driver = GoogleDriver(json_key_path=key_path)\n",
+ " drive = driver.get_drive()\n",
+ " healthsheds = driver.read_healthsheds(shapefile)\n",
+ "\n",
+ " # compute the diurnal classification bins\n",
+ " logger.info(\"Computing diurnal classification bins...\")\n",
+ " class_bins, class_dts = compute_solar_day_night_class_bins(ds, night_direction)\n",
+ "\n",
+ " ds_masked = ds.copy()\n",
+ "\n",
+ " # assign classifications\n",
+ " logger.info(\"Assigning classification bins to dataset...\")\n",
+ " ds['solar_class'] = (('valid_time', 'latitude', 'longitude'), class_bins)\n",
+ " ds[\"solar_date\"] = ((\"valid_time\", \"latitude\", \"longitude\"), class_dts)\n",
+ "\n",
+ " # mask the dataset to the requested time\n",
+ " mask = ds[\"solar_class\"] == time\n",
+ " ds_masked = ds_masked.where(mask)\n",
+ "\n",
+ " # set the local timezone\n",
+ " ds_masked = ds_masked.assign_coords(valid_time=pd.to_datetime(ds[\"valid_time\"].values).tz_localize(\"UTC\").tz_convert(local_tz))\n",
+ "\n",
+ " # resample by local date\n",
+ " logger.info(\"Resampling by local date...\")\n",
+ " ds_rs = ds_masked.resample(valid_time=\"1D\").reduce(aggregation_func)\n",
+ "\n",
+ " # convert to tiff\n",
+ " logger.info(\"Rasterizing resampled data...\")\n",
+ " n_bands = ds_rs.dims['valid_time']\n",
+ "\n",
+ " # polygon to raster cells for the first band\n",
+ " logger.info(\"Converting polygons to raster cells...\")\n",
+ " raster = netcdf_to_tiff(ds_rs, band=1, variable=variable)\n",
+ " res_poly2cell=polygon_to_raster_cells(\n",
+ " vectors = healthsheds.geometry.values, # the geometries of the shapefile of the regions\n",
+ " raster=raster.data, # the raster data above\n",
+ " nodata=np.nan, # any intersections with no data, may have to be np.nan\n",
+ " affine=raster.transform, # some math thing need to revise\n",
+ " all_touched=True, \n",
+ " verbose=True\n",
+ " )\n",
+ "\n",
+ " result_df = healthsheds[[hshd_unique_id, \"geometry\"]].copy()\n",
+ "\n",
+ " # loop over bands and aggregate to healthsheds\n",
+ " for band in tqdm(range(1, n_bands + 1)):\n",
+ " logger.info(f\"Processing band {band} of {n_bands}...\")\n",
+ " \n",
+ " day = band # band is 1-indexed\n",
+ "\n",
+ " day_col = f\"day_{day:02d}\"\n",
+ "\n",
+ " # calculate raster for this band\n",
+ " raster = netcdf_to_tiff(ds_rs, band=band, variable=variable)\n",
+ "\n",
+ " # aggregate to healthsheds\n",
+ " result = aggregate_to_healthsheds(\n",
+ " res_poly2cell=res_poly2cell,\n",
+ " raster=raster,\n",
+ " shapes=healthsheds,\n",
+ " names_column=hshd_unique_id,\n",
+ " aggregation_func=aggregation_func,\n",
+ " aggregation_name=variable\n",
+ " )\n",
+ " \n",
+ " # add band to result dataframe\n",
+ " result_df[day_col] = result[variable]\n",
+ "\n",
+ " # save to parquet\n",
+ " result_df.to_parquet(f\"{BLD}/{output_file}\")\n",
+ "\n",
+ " logger.info(\"Aggregation complete.\")\n",
+ " \n",
+ " return Path(f\"{BLD}/{output_file}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That should wrap it up! To test, we can run a single job:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| eval: false\n",
+ "# runs the last defined job only\n",
+ "task_aggregate_data_diurnal()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Or we can run the task in `pytask`:\n",
+ "\n",
+ "```bash\n",
+ "pytask build -k \"nepal and 2009\" --dry-run\n",
+ "```"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "python3",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notes/IMG_740012467778-1.jpeg b/notes/IMG_740012467778-1.jpeg
new file mode 100644
index 0000000..52886eb
Binary files /dev/null and b/notes/IMG_740012467778-1.jpeg differ
diff --git a/notes/prototypes/aggregation_visualizer.Rmd b/notes/_prototypes/_aggregation_visualizer.Rmd
similarity index 100%
rename from notes/prototypes/aggregation_visualizer.Rmd
rename to notes/_prototypes/_aggregation_visualizer.Rmd
diff --git a/notes/_prototypes/_download_QA.qmd b/notes/_prototypes/_download_QA.qmd
new file mode 100644
index 0000000..d2d492b
--- /dev/null
+++ b/notes/_prototypes/_download_QA.qmd
@@ -0,0 +1,15 @@
+# Investigating The Download Results
+
+There are a couple of things we should do to QA our data downloads. Specifically, we want to come up with a way of ensuring our aggregations are valid and accurate. This will require some simple EDA.
+
+```{python}
+from pyprojroot import here
+import pandas as pd
+import os
+from hydra import initialize, compose
+from omegaconf import OmegaConf, DictConfig
+```
+
+```{python}
+eg_file = here() / "data/input/2010_1.nc"
+```
\ No newline at end of file
diff --git a/notes/_prototypes/_kenya_demo_01_intro.qmd b/notes/_prototypes/_kenya_demo_01_intro.qmd
new file mode 100644
index 0000000..6bbc193
--- /dev/null
+++ b/notes/_prototypes/_kenya_demo_01_intro.qmd
@@ -0,0 +1,247 @@
+---
+skip_showdoc: true
+---
+
+# Introduction to the ERA 5 Data
+
+The ERA5 dataset is the fifth iteration of the ECMWF ReAnalysis dataset, spanning from 1950 to the present. ECMWF is the "European Centre for Medium-Range Weather Forecasts".
+The dataset provides comprehensive and high-resolution historical weather and climate data. The source data is from the [Copernicus Climate Data Store (CDS)](https://cds.climate.copernicus.eu/#!/home). A comprehensive data documentation guide is available [here](https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation). In total, the entire CDS ERA data is over 10Petabytes.
+
+Fortunately for us, there are existing [Python](https://github.com/Climate-CAFE/era5-daily-heat-aggregation-python) and [R](https://github.com/Climate-CAFE/era5-daily-heat-aggregation) packages that have gone ahead and demonstrated extracting the data from the API for us, so we are going to use those to develop our workflow. Specifically, we're trying to understand the
+following characteristics of the data:
+
+* size,
+* how to download,
+* what are the key transformations to map things into the health sheds
+* two important variables:
+ * 2m air temp, and,
+ * 2m air dew point
+
+Let's get started
+
+Important: we need to install the CDS API first, so you'll need to grab an API key. First, you must register for an account and accept the T&Cs, afterwhich the page [here](https://ecmwf-projects.github.io/copernicus-training-c3s/cds-tutorial.html#install-the-cds-api-key) will autopopulate an API key for you. The following code shows a test case to make sure your API key works
+
+```{python}
+import cdsapi
+
+client = cdsapi.Client()
+
+dataset = 'reanalysis-era5-pressure-levels'
+request = {
+ 'product_type': ['reanalysis'],
+ 'variable': ['geopotential'],
+ 'year': ['2024'],
+ 'month': ['03'],
+ 'day': ['01'],
+ 'time': ['13:00'],
+ 'pressure_level': ['1000'],
+ 'data_format': 'grib',
+}
+target = 'download.grib'
+
+client.retrieve(dataset, request, target)
+```
+
+This demonstration is expected to amass 9GB of data for raw raster files (24 years, 12 files per year). The demonstration generates the 24 years of heat measures across Kenya administrative boundaries, in 1-month periods of ERA5-Land data across Kenya with three variables (2-m temp, dew point temp, skin temp)
+
+```{python}
+# imports as recommended by the github repo
+import cdsapi
+import geopandas as gpd
+import os
+```
+
+I'll use pyprojroot to specify a data path
+
+```{python}
+from pyprojroot.here import here
+
+ecmw_dir = here("data")
+```
+
+```{python}
+def create_dir(path):
+
+ if not os.path.exists(path):
+ os.makedirs(path)
+
+ return path
+```
+
+```{python}
+create_dir(ecmw_dir)
+```
+
+```{python}
+# create a directory for the kenya data
+create_dir(os.path.join(ecmw_dir, "Kenya_GADM"))
+```
+
+Next, we need to manually fetch this GADM file for Kenya from here: https://gadm.org/download_country.html
+
+This is a boundaries geopackage; GeoBoundaries is a global database of administrative boundaries (e.g., countries, states, provinces, districts). Hence, this file provides the
+boundaries for Kenyan regions
+
+```{python}
+kenya_shape = gpd.read_file(os.path.join(ecmw_dir, "Kenya_GADM/gadm41_KEN.gpkg"), layer = "ADM_ADM_0")
+```
+
+```{python}
+kenya_shape
+```
+
+The bounding box represents the coordinates of the shapefile, which is what we'll
+use to query Copernicus. Think of it like a mask provided in a file
+
+```{python}
+kenya_bbox = kenya_shape.total_bounds
+```
+
+```{python}
+kenya_bbox
+```
+
+Technical: Add a small buffer around the bounding box to ensure the whole region
+is queried, and round the parameters to a 0.1 resolution. A 0.1 resolution
+is applied because the resolution of netCDF ERA5 data is .25x.25
+https://confluence.ecmwf.int/display/CKB/ERA5%3A+What+is+the+spatial+reference
+
+```{python}
+kenya_bbox[0] = round(kenya_bbox[0], 1) - 0.1
+kenya_bbox[1] = round(kenya_bbox[1], 1) - 0.1
+kenya_bbox[2] = round(kenya_bbox[2], 1) + 0.1
+kenya_bbox[3] = round(kenya_bbox[3], 1) + 0.1
+```
+
+```{python}
+# to build a query, specify [xmin, ymin, xmax, ymax]
+query_area = [kenya_bbox[0], kenya_bbox[1], kenya_bbox[2], kenya_bbox[3]]
+```
+
+```{python}
+query_years = list(range(2000, 2024))
+query_years_str = [str(x) for x in query_years]
+
+query_months = list(range(1, 13))
+query_months_str = [str(x).zfill(2) for x in query_months]
+```
+
+```{python}
+output_dir = create_dir(os.path.join(ecmw_dir, "ERA5_out"))
+```
+
+```{python}
+for year_str in query_years_str:
+ # Track progress
+ print("Now processing year ", year_str, "\n")
+
+ # For each year, the query is divided into each month sections.
+ # If a request is too large, it will not be accepted by the CDS servers,
+ # so this division of requests is required.
+
+ for month_str in query_months_str:
+ # Track progress
+ print("Now processing month ", month_str, "\n")
+
+ # The below is the formatted API request language. All of the inputs
+ # specified below in proper formatting can be identified by forming a
+ # request using the Copernicus CDS point-and-click interface for data
+ # requests. https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-land?tab=form
+ # Select the variables, timing, and netcdf as the output format, and then
+ # select "Show API Request" at the bottom of the screen.
+
+ # Note that the argument in the download() function is the file path and
+ # file name that data will be exported to and stored at. If using a loop,
+ # ensure that the unique features of each request are noted in the output.
+
+ # Note: need to create "ERA5_Out" subfolder on your path
+
+ dataset = "reanalysis-era5-land"
+ request = {
+ "product_type": "reanalysis",
+ "variable": ["2m_dewpoint_temperature",
+ "2m_temperature",
+ "skin_temperature"],
+ "year": year_str,
+ "month": month_str,
+ "day": [
+ "01", "02", "03",
+ "04", "05", "06",
+ "07", "08", "09",
+ "10", "11", "12",
+ "13", "14", "15",
+ "16", "17", "18",
+ "19", "20", "21",
+ "22", "23", "24",
+ "25", "26", "27",
+ "28", "29", "30",
+ "31"],
+ "time": [
+ "00:00", "01:00", "02:00",
+ "03:00", "04:00", "05:00",
+ "06:00", "07:00", "08:00",
+ "09:00", "10:00", "11:00",
+ "12:00", "13:00", "14:00",
+ "15:00", "16:00", "17:00",
+ "18:00", "19:00", "20:00",
+ "21:00", "22:00", "23:00"],
+ "data_format": "netcdf",
+ "download_format": "unarchived",
+ "area": query_area
+ }
+
+ client = cdsapi.Client()
+ client.retrieve(dataset, request).download(os.path.join(output_dir,
+ "{}_{}.nc".format(year_str, month_str)))
+```
+
+From the logs, it looks like one month of data takes approximately 10 minutes. In this query, we might end up with 48 hours of downloading for the full 24 years. Clearly this will need to be multithreaded/paralleled to be efficient.
+
+## Testing Soil Moisture Downloads
+
+```{python}
+import cdsapi
+
+dataset = "reanalysis-era5-land"
+request = {
+ "variable": ["volumetric_soil_water_layer_1"],
+ "year": "2009",
+ "month": "01",
+ "day": ["01"],
+ "time": ["01:00"],
+ "data_format": "netcdf",
+ "download_format": "unarchived"
+}
+
+client = cdsapi.Client()
+client.retrieve(dataset, request).download()
+```
+
+```{python}
+from pyprojroot.here import here
+
+ecmw_dir = here("notes/prototypes/")
+```
+
+```{python}
+ecmw_dir / "soil.nc"
+```
+
+```{python}
+import xarray
+import os
+
+temp_file = xarray.open_dataset(os.path.join(ecmw_dir / "soil.nc"), decode_coords="all")
+```
+
+```{python}
+temp_file
+```
+
+```{python}
+temp_file['swvl1'].plot()
+```
+
+```{python}
+
+```
\ No newline at end of file
diff --git a/notes/_prototypes/_kenya_demo_02_fishnet.qmd b/notes/_prototypes/_kenya_demo_02_fishnet.qmd
new file mode 100644
index 0000000..120d456
--- /dev/null
+++ b/notes/_prototypes/_kenya_demo_02_fishnet.qmd
@@ -0,0 +1,228 @@
+---
+skip_showdoc: true
+---
+
+# Part 2: Aggregation via Fishnet
+
+This script is the first in a two-step raster processing process. In this script a grid-based polygon will be derived from the raster grid of ERA5 data. The goal
+is to create a fishnet that can be used to extract ERA5 data from raster stack including ERA5 hourly data (this file). This will allow for extraction from raster stack
+without the large computational burden of a loop (as below)
+
+```{python}
+import geopandas as gpd
+import os
+import glob
+# xarray makes working with labelled multi-dimensional arrays in Python simple, efficient, and fun!
+import xarray
+# The rioxarray package is an extension of xarray designed
+# for working with raster (geospatial) data in Python.
+# It provides an easy way to read, write, and manipulate GeoTIFF and other raster formats while maintaining spatial metadata.
+import rioxarray
+# for geometric operations on vector data (points, lines, polygons). It allows users to create, manipulate, and analyze geometric shapes in 2D space
+import shapely
+from shapely.geometry import Polygon
+import numpy
+# you need to install gdal here, not osgeo
+# gdal is generally a translator library for raster and vector geospatial data formats
+from osgeo import gdal, ogr
+```
+
+```{python}
+# create a fishnet grid using GDAL
+def make_fishnet(outputGridfn,xmin,xmax,ymin,ymax,rows,cols):
+ # Calculate grid parameters
+ xmin = float(xmin)
+ xmax = float(xmax)
+ ymin = float(ymin)
+ ymax = float(ymax)
+ gridWidth = float((xmax-xmin) / cols)
+ gridHeight = float((ymax-ymin) / rows)
+
+ # Start grid cell envelope
+ ringXleftOrigin = xmin
+ ringXrightOrigin = xmin + gridWidth
+ ringYtopOrigin = ymax
+ ringYbottomOrigin = ymax-gridHeight
+
+ # Create the output shapefile
+ outDriver = ogr.GetDriverByName('ESRI Shapefile')
+ if os.path.exists(outputGridfn):
+ os.remove(outputGridfn)
+ outDataSource = outDriver.CreateDataSource(outputGridfn)
+ outLayer = outDataSource.CreateLayer(outputGridfn, geom_type=ogr.wkbPolygon)
+ # Add fields to the layer
+ featureDefn = outLayer.GetLayerDefn()
+
+ # Create grid cells
+ countcols = 0
+ while countcols < cols:
+ countcols += 1
+
+ # Reset envelope for rows
+ ringYtop = ringYtopOrigin
+ ringYbottom =ringYbottomOrigin
+ countrows = 0
+
+ while countrows < rows:
+ countrows += 1
+ ring = ogr.Geometry(ogr.wkbLinearRing)
+ ring.AddPoint(ringXleftOrigin, ringYtop)
+ ring.AddPoint(ringXrightOrigin, ringYtop)
+ ring.AddPoint(ringXrightOrigin, ringYbottom)
+ ring.AddPoint(ringXleftOrigin, ringYbottom)
+ ring.AddPoint(ringXleftOrigin, ringYtop)
+ poly = ogr.Geometry(ogr.wkbPolygon)
+ poly.AddGeometry(ring)
+
+ # Add new geom to layer
+ outFeature = ogr.Feature(featureDefn)
+ outFeature.SetGeometry(poly)
+ outLayer.CreateFeature(outFeature)
+ outFeature = None
+
+ # New envelope for next poly
+ ringYtop = ringYtop - gridHeight
+ ringYbottom = ringYbottom - gridHeight
+
+ # New envelope for next poly
+ ringXleftOrigin = ringXleftOrigin + gridWidth
+ ringXrightOrigin = ringXrightOrigin + gridWidth
+
+ # Save and close DataSources
+ outDataSource = None
+```
+
+```{python}
+from pyprojroot import here
+```
+
+```{python}
+era_dir = here("data/ERA5_out")
+```
+
+```{python}
+temp_file = xarray.open_dataset(os.path.join(era_dir, "2000_01.nc"), decode_coords="all")
+```
+
+```{python}
+
+```
+
+```{python}
+t0='2000-01-01T00:00:00.000000000'
+temp_file['t2m']['valid_time'][0]
+```
+
+```{python}
+list(temp_file['t2m']['valid_time'].data)
+```
+
+```{python}
+temp_file['t2m'][0].shape
+```
+
+```{python}
+import matplotlib.pyplot as plt
+from matplotlib.pyplot import figure
+#from matplotlib.pyplot
+import cartopy.feature as cfeature
+import cartopy.crs as ccrs
+# Ensure lat/lon are the correct names in your dataset
+
+var=temp_file['t2m'][0]
+
+lon = temp_file.coords.get("longitude")
+lat = temp_file.coords.get("latitude")
+
+plt.figure(figsize=(12, 6))
+ax = plt.axes(projection=ccrs.PlateCarree()) # Set projection for geographic map
+
+# Add map features
+ax.add_feature(cfeature.BORDERS, linestyle=":")
+ax.add_feature(cfeature.COASTLINE)
+
+ax.set_extent([lon.min() - 3, lon.max() + 3, lat.min() - 3, lat.max() + 3], crs=ccrs.PlateCarree())
+
+# Plot raster using lat/lon
+im = ax.pcolormesh(lon, lat, var, transform=ccrs.PlateCarree())
+
+# Add colorbar
+plt.colorbar(im, label=var.name)
+plt.title(f"{var.name} Spatial Distribution")
+
+plt.show()
+```
+
+```{python}
+lat
+```
+
+```{python}
+lon
+```
+
+```{python}
+temp_file['t2m']
+```
+
+```{python}
+era_files = glob.glob(os.path.join(era_dir, '*.nc'))
+```
+
+We read in the netcdf files and stack them in the 4th dimension by year
+
+```{python}
+era_stack = xarray.open_mfdataset(era_files, decode_coords="all")
+```
+
+```{python}
+era_stack
+```
+
+The data above does not have a [coordinate reference system](https://en.wikipedia.org/wiki/Spatial_reference_system), needed to interpret, transform, or align datasets. Hence, we assign the WGS84 standard
+
+```{python}
+era_stack.rio.write_crs("WGS 84", inplace=True)
+```
+
+Here, we are making a shapefile that is a fishnet grid of the raster extent.
+It will essentially be a polygon of lines surrounding each ERA5 cell
+
+```{python}
+era_extent = era_stack.rio.bounds()
+```
+
+```{python}
+xmin = era_extent[0]
+xmax = era_extent[2]
+ymin = era_extent[1]
+ymax = era_extent[3]
+
+height = era_stack.rio.height
+width = era_stack.rio.width
+
+era_coords = [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), (xmin, ymin)]
+era_polygon = Polygon(era_coords)
+```
+
+```{python}
+# implement the fishnet function
+make_fishnet(os.path.join(era_dir, 'era_fishnet.shp'), xmin,xmax,ymin,ymax,height,width)
+```
+
+```{python}
+ogr_shp = gpd.read_file(os.path.join(era_dir, 'era_fishnet.shp'))
+print(ogr_shp.crs) # this is none, so we have to set it
+
+# Set the CRS for the created fishnet shape file as the same one from the stacked raster files.
+new_crs = era_stack.rio.crs.data # Replace with the desired CRS
+ogr_shp = ogr_shp.set_crs(new_crs)
+```
+
+```{python}
+ogr_shp.to_file(os.path.join(era_dir, 'era_fishnet.shp'))
+```
+
+```{python}
+
+```
\ No newline at end of file
diff --git a/notes/_prototypes/_kenya_demo_03_aggregate.qmd b/notes/_prototypes/_kenya_demo_03_aggregate.qmd
new file mode 100644
index 0000000..2d90f2f
--- /dev/null
+++ b/notes/_prototypes/_kenya_demo_03_aggregate.qmd
@@ -0,0 +1,19 @@
+---
+skip_showdoc: true
+---
+
+# Part 3: Joining Fishnet to the Ward geometries
+
+In this file, we will join the fishnet, which is a polygon grid with lines
+surrounding the grid of the ERA5 raster with the Ward geometries that have
+been queried from the Database of Global Administrative Boundaries
+(gadm.org). In merging the polygon grid with the ward polygon data,
+we ensure that every ward will be aligned with the relevant
+ERA5 temperature metrics for the area.
+
+Next, we Create extraction points from the union of the wards and fishnet. These
+are what we can use to extract values from the raster that overlaps with
+with the points aligning to each wards (this file).
+
+Lastly, we Estimate the ward-level exposure to ERA5, accounting for the availability
+of data within the wards (this file)
\ No newline at end of file
diff --git a/notes/_prototypes/_learning_aggregations_w_michelle_20250328.qmd b/notes/_prototypes/_learning_aggregations_w_michelle_20250328.qmd
new file mode 100644
index 0000000..0b81808
--- /dev/null
+++ b/notes/_prototypes/_learning_aggregations_w_michelle_20250328.qmd
@@ -0,0 +1,719 @@
+---
+skip_showdoc: true
+---
+
+## Prototyping Spatial Aggregations
+
+We're going to learn how to aggregate the exposure data into daily values. This is useful for analyzing the data over a longer period of time, such as a week or a month, and is part of the larger goal of this project to aggregate the ERA5 dataset for Madagascar.
+
+
+Doing an aggregation of a netcdf file is relatively simple. What we need to do is read in the data, and then use the `xarray` library to group the data by time using a resampler method. We can then use the `mean` function to calculate the average value for each day.
+
+```{python}
+import xarray as xr
+import matplotlib.pyplot as plt
+import cartopy.crs as ccrs
+import cartopy.feature as cfeature
+from pyprojroot import here
+from hydra import initialize, compose
+from omegaconf import OmegaConf
+```
+
+Let's look at the data that we've already downloaded. In the pipeline. We'll use the xarray library to open it up and inspect it.
+
+```{python}
+# Load the NetCDF file
+fpath = here() / "data/input/2010_1.nc"
+ds = xr.open_dataset(fpath)
+```
+
+This is a netcdf file. It has the following dimensions representing time series, as well as the variables we downloaded at specific locations:
+
+```{python}
+ds
+```
+
+We can see that our variables are accessible with indeces. The reason we have 744 time points is because it is hourly data for the entire month.
+
+Interestingly, we can simply use the `.resample()` method to create mathematically aggregated data.
+
+```{python}
+# Perform multiple aggregations
+daily_mean = ds.resample(valid_time="1D").mean() # Daily mean
+daily_max = ds.resample(valid_time="1D").max() # Daily max
+daily_min = ds.resample(valid_time="1D").min() # Daily min
+
+# Combine the results into a new dataset
+daily_aggregated = xr.Dataset({
+ "t2m_mean": daily_mean["t2m"],
+ "t2m_max": daily_max["t2m"],
+ "t2m_min": daily_min["t2m"],
+ "d2m_mean": daily_mean["d2m"],
+ "d2m_max": daily_max["d2m"],
+ "d2m_min": daily_min["d2m"]
+})
+
+daily_aggregated
+```
+
+Look at how this compares to the original data:
+
+```{python}
+ds
+```
+
+With data of this shape, we can plot the mean temperature over months
+
+```{python}
+# Select a specific grid point (e.g., latitude=-1, longitude=0)
+variable='mean'
+
+# note: we can use the isel method to select the grid point. In this case,
+# we are selecting the bottom-left grid point (latitude=-1, longitude=0) because we're selecting
+# the smallest value for latitude:
+# time=0: Selects the first time point.
+# latitude=-1: Selects the last latitude (bottom-most, as latitude is usually ordered from north to south).
+# longitude=0: Selects the first longitude (left-most).
+t2m_mean_point = daily_aggregated["t2m_" + variable].isel(latitude=-1, longitude=0)
+
+# Plot the time series
+plt.figure(figsize=(10, 6))
+t2m_mean_point.plot(label="Daily Mean t2m")
+plt.title("Daily Aggregated ({}) Temperature at Bottom-Left Grid Point".format(variable))
+plt.xlabel("Time")
+plt.ylabel("Temperature (K)")
+plt.legend()
+plt.grid()
+plt.show()
+```
+
+How does this compared to the disaggregated data?
+
+```{python}
+t2m_point = ds["t2m"].isel(latitude=-1, longitude=0)
+
+# Plot the time series
+plt.figure(figsize=(10, 6))
+t2m_point.plot(label="Daily Mean t2m")
+plt.title("Daily Disaggregated Temperature at Bottom-Left Grid Point")
+plt.xlabel("Time")
+plt.ylabel("Temperature (K)")
+plt.legend()
+plt.grid()
+plt.show()
+```
+
+These temperature plots match beautifully! This means our aggregation over the 31 days works!
+
+Let's look at the aggregation over a map:
+
+```{python}
+# Select the first day of t2m_mean
+variable="mean"
+t2m_mean_day1 = daily_aggregated["t2m_" + variable].isel(valid_time=0)
+
+# Set the absolute min and max for the color bar
+vmin = 270 # Minimum value (e.g., 270 K)
+vmax = 310 # Maximum value (e.g., 310 K)
+
+# Create a plot with Cartopy
+plt.figure(figsize=(10, 6))
+ax = plt.axes(projection=ccrs.PlateCarree()) # Use PlateCarree projection for latitude/longitude data
+
+# Plot the data
+t2m_mean_day1.plot(ax=ax, cmap="coolwarm", transform=ccrs.PlateCarree(), vmin=vmin, vmax=vmax, cbar_kwargs={"label": "Temperature (K)"})
+
+# Add Madagascar's border using Cartopy's built-in features
+ax.add_feature(cfeature.BORDERS, edgecolor="black", linewidth=1) # Add country borders
+ax.add_feature(cfeature.COASTLINE, edgecolor="black", linewidth=0.8) # Add coastlines
+
+# Optionally, zoom in on Madagascar
+ax.set_extent([43, 51, -26, -11], crs=ccrs.PlateCarree()) # Longitude and latitude bounds for Madagascar
+
+# Add gridlines
+ax.gridlines(draw_labels=True, linewidth=0.5, color="gray", alpha=0.5, linestyle="--")
+
+# Add a title
+plt.title("Mean Daily {} Temperature (Day 1)".format(variable))
+plt.show()
+```
+
+Looks great. Now, we need to see if we can do a spatial aggregation:
+
+>A mathematical aggregation like mean() involves summarizing data values (e.g., averaging) across a specific dimension, such as time, without considering spatial relationships. For example, calculating the daily mean temperature from hourly data is purely numerical.
+In contrast, a spatial aggregation using rasters and polygons involves summarizing data based on spatial boundaries. For example, when aggregating raster data (e.g., temperature) over a polygon (e.g., a country's boundary), the process involves selecting raster cells that fall within the polygon and computing a summary statistic (e.g., mean, sum) for those spatially defined areas. This type of aggregation accounts for geographic context and spatial relationships.
+
+To do this, we'll need to read in the shapefile that defines the shape of the polygon (ie the physical ground) and find the pixels of data that fall within the polygon. We can then use the `xarray` library to group the data by time using a resampler method. We can then use the `mean` function to calculate the average value for each day.
+
+```{python}
+import geopandas as gpd
+
+# we learned how to read in shapefiles in the kenya demo notebook
+zip_url_or_path = here() / "data/testing/gadm41_MDG.gpkg"
+
+shape = gpd.read_file(zip_url_or_path, layer = "ADM_ADM_1")
+```
+
+We are using the layer 1 of this shapefile from GADM.org. This refers to the states in red:
+
+
+
+When we read in the shapefile, the data in the `geometry` column is a specification of the polygons that represent geographic boundaries.
+
+```{python}
+shape
+```
+
+In a vector image such as a shapefile, the steps between each value are not guaranteed to be equal (unlike on a cartesian plane), so we need to think about how those values "project" onto a known Coordinate Reference System (CRS) that has equal steps.
+
+A quick note about CRS:
+
+> The WGS 84 (World Geodetic System 1984) is a widely used global Coordinate Reference System (CRS). It is the standard CRS for GPS (Global Positioning System) and is commonly used in geospatial applications. WGS 84 defines a geographic coordinate system based on a specific ellipsoid model of the Earth.
+
+> Key Features of WGS 84
+Type: Geographic Coordinate System (GCS).
+Coordinates are represented in latitude, longitude, and optionally altitude.
+Units: Degrees (for latitude and longitude).
+Ellipsoid: WGS 84 uses a reference ellipsoid with:
+Semi-major axis: 6,378,137 meters.
+Flattening: 1 / 298.257223563.
+Datum: The WGS 84 datum defines the origin and orientation of the coordinate system.
+EPSG Code: The EPSG code for WGS 84 is 4326.
+
+Spatial geometry is complicated and silly, hence [all maps are wrong](https://youtu.be/kIID5FDi2JQ?si=OZASX3i6Aglqwa4u).
+
+Nevertheless, we can see that the shapefile has a CRS of EPSG:4326, which is what we want:
+
+```{python}
+shape.crs
+```
+
+Were this different, we'd have to find some way to adjust these projections. For our netCDF file, however, we don't need to worry about this because the data themselves are created using a rasterized netCDF file, which is a standard format for storing gridded data. The data is already in a grid format, and the pixel values are already aligned with the geographic coordinates of the raster. In spatial geometry, we use degrees to represent the latitude and longitude of the corners of each pixel. This means that the data is already in a format that can be easily manipulated and analyzed using xarray and geopandas, because we refer to where the pixel is located in the world using degrees. It is essentially an absolute reference system.
+
+In the ERA5 dataset, the resolution is said to be 0.25 degrees, which means that each pixel represents a square area of approximately 25 km x 25 km at the equator. So at every unit of 0.25 degrees north-south or east-west, we have a new pixel of data, with a value for temperature or dewpoint or whatever. You can physically see each of these on the plot.
+
+Learn more about ERA5's resolution [here](https://confluence.ecmwf.int/display/CKB/ERA5%3A+What+is+the+spatial+reference).
+
+Now, in order to aggregate data spatially, we're pasting in a utility here for finding the intersecting values between our netcdf data and the polygons represented in our shapefile (ie the states, regions, etc.).
+
+Source: https://github.com/NSAPH-Data-Processing/air_pollution__aqdh/blob/main/utils/faster_zonal_stats.py
+
+```{python}
+import numpy as np
+from tqdm import tqdm
+from math import ceil, floor
+
+from rasterstats.io import Raster
+from rasterstats.utils import boxify_points, rasterize_geom
+```
+
+This function indexes each pixel and maps it to the polygon it falls within. A few notes about this function:
+
+- It uses the `rasterstats.io` library to read in a raster tiff file
+- It uses affine transformations to convert the pixel coordinates to geographic coordinates
+- It needs to know where there is no data in the raster file, so we need to set a `nodata` value
+- `all_touched` is a boolean that determines whether to include all pixels that touch the polygon or just the ones that are fully contained within it; this is a domain specific choice
+
+```{python}
+def polygon_to_raster_cells(
+ vectors,
+ raster,
+ band=1,
+ nodata=None,
+ affine=None,
+ all_touched=False,
+ verbose=False,
+ **kwargs,
+):
+ """Returns an index map for each vector geometry to indices in the raster source.
+
+ Parameters
+ ----------
+ vectors: list of geometries
+
+ raster: ndarray
+
+ nodata: float
+
+ affine: Affine instance
+
+ all_touched: bool, optional
+ Whether to include every raster cell touched by a geometry, or only
+ those having a center point within the polygon.
+ defaults to `False`
+
+ Returns
+ -------
+ dict
+ A dictionary mapping vector the ids of geometries to locations (indices) in the raster source.
+ """
+
+ cell_map = []
+
+ with Raster(raster, affine, nodata, band) as rast:
+ # used later to crop raster and find start row and col
+ min_lon, dlon = affine.c, affine.a
+ max_lat, dlat = affine.f, -affine.e
+ H, W = rast.shape
+
+ for geom in tqdm(vectors, disable=(not verbose)):
+ if "Point" in geom.geom_type:
+ geom = boxify_points(geom, rast)
+
+ # find geometry bounds to crop raster
+ # the raster and geometry must be in the same lon/lat coordinate system
+ start_row = max(0, min(H - 1, floor((max_lat - geom.bounds[3]) / dlat)))
+ start_col = min(W - 1, max(0, floor((geom.bounds[0] - min_lon) / dlon)))
+ end_col = max(0, min(W - 1, ceil((geom.bounds[2] - min_lon) / dlon)))
+ end_row = min(H - 1, max(0, ceil((max_lat - geom.bounds[1]) / dlat)))
+ geom_bounds = (
+ min_lon + dlon * start_col, # left
+ max_lat - dlat * end_row - 1e-12, # bottom
+ min_lon + dlon * end_col + 1e-12, # right
+ max_lat - dlat * start_row, # top
+ )
+
+ # crop raster to area of interest and rasterize
+ fsrc = rast.read(bounds=geom_bounds)
+ rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched)
+ indices = np.nonzero(rv_array)
+
+ if len(indices[0]) > 0:
+ indices = (indices[0] + start_row, indices[1] + start_col)
+ assert 0 <= indices[0].min() < rast.shape[0]
+ assert 0 <= indices[1].min() < rast.shape[1]
+ else:
+ pass # stop here for debug
+
+ cell_map.append(indices)
+
+ return cell_map
+```
+
+So to implement this we need to first convert the netcdf to a tiff so that we can rasterize it to each of the polygons in the shapefile. We do this with `rioxarray`
+
+```{python}
+import rioxarray as rxr
+```
+
+First, we pick our variable of interest, then we set the spatial properties to make sure it conforms to the CRS we wanted
+
+```{python}
+temperature = daily_aggregated['t2m_mean']
+```
+
+```{python}
+temp_set = temperature.rio.set_spatial_dims(x_dim="longitude", y_dim="latitude")
+temp_set = temp_set.rio.write_crs("EPSG:4326")
+```
+
+Write it out to tiff and read it back in (there's no way to do this in memory)
+
+```{python}
+temp_set.rio.to_raster("temp.tif")
+```
+
+Now we can investigate the tiff and see that it has all the properties necessary for the function
+
+```{python}
+import rasterio
+
+src = rasterio.open("temp.tif")
+raster = src.read(1) # Numpy array
+profile = src.profile # Metadata
+transform = src.transform
+```
+
+```{python}
+# the number of data points
+src.count
+```
+
+```{python}
+# the affine transformation matrix:
+# Pixel size (resolution in x and y).
+# Origin (top-left corner in spatial coordinates).
+# Rotation (if the raster is not north-up).
+src.transform
+```
+
+```{python}
+# any missing data locations
+src.nodata
+```
+
+```{python}
+# the number of rows and columns
+print(src.width, src.height)
+```
+
+Fetch the array of data
+
+```{python}
+raster_array = src.read(1)
+```
+
+Function go brrrr
+
+```{python}
+res_poly2cell=polygon_to_raster_cells(
+ vectors = shape.geometry.values, # the geometries of the shapefile of the regions
+ raster=raster_array, # the raster data above
+ band=1, # the value of the day that we're using
+ nodata=src.nodata, # any intersections with no data, may have to be np.nan
+ affine=src.transform, # some math thing need to revise
+ all_touched=True,
+ verbose=True
+)
+```
+
+The data below maps which grid entries fall into each of the regions in the shapefile (e.g. which pixel is in which state)
+
+```{python}
+res_poly2cell
+```
+
+```{python}
+len(res_poly2cell)
+```
+
+Look familiar?
+
+These are the 6 states in the shapefile. The values in the array are the indexes of the pixels in the netcdf file that fall within the polygon.
+Now, within each of these we can aggregate mathematically eg min max mean etc.
+
+```{python}
+# the values themselves
+raster_array
+```
+
+```{python}
+stats = []
+for indices in res_poly2cell:
+ if len(indices[0]) == 0:
+ # no cells found for this polygon
+ stats.append(np.nan)
+ else:
+ cells = raster[indices]
+ if sum(~np.isnan(cells)) == 0:
+ # no valid cells found for this polygon
+ stats.append(np.nan)
+ continue
+ else:
+ # compute MEAN of valid cells
+ # but this stat can be ANYTHING
+ stats.append(np.nanmean(cells))
+```
+
+```{python}
+stats
+```
+
+Looks like it worked!
+
+```{python}
+import pandas as pd
+
+pd.DataFrame({"l1_region": shape.NAME_1, "mean_31_day_temp": stats})
+```
+
+### Let's try it with Level 3 data
+
+```{python}
+# first get the shape of the polygons
+
+shape = gpd.read_file(zip_url_or_path, layer = "ADM_ADM_3")
+
+# get the new mapping of the pixels to the shapes in the region
+
+res_poly2cell = polygon_to_raster_cells(
+ vectors = shape.geometry.values, # the geometries of the shapefile of the regions
+ raster=raster_array, # the raster data above
+ band=1, # the value of the day that we're using
+ nodata=src.nodata, # any intersections with no data, may have to be np.nan
+ affine=src.transform, # some math thing need to revise
+ all_touched=True,
+ verbose=True
+)
+```
+
+```{python}
+len(res_poly2cell)
+```
+
+```{python}
+# demonsttrate that because this is a "denser" set of polygons
+# this iwll take longer
+stats = []
+
+for indices in res_poly2cell:
+ if len(indices[0]) == 0:
+ # no cells found for this polygon
+ stats.append(np.nan)
+ else:
+ cells = raster[indices]
+ if sum(~np.isnan(cells)) == 0:
+ # no valid cells found for this polygon
+ stats.append(np.nan)
+ continue
+ else:
+ # compute mean of valid cells
+ stats.append(np.nanmean(cells))
+```
+
+```{python}
+stats
+```
+
+Now we have 110 mean temperatuers for each of the shapefile's regions.
+
+```{python}
+
+df = pd.DataFrame(
+ {"l3_territory": shape.NAME_3, "dummy_date_in_future": 1, "temp_vals": stats}
+ )
+```
+
+```{python}
+df
+```
+
+```{python}
+# now we plot it using the shape.geometry to get the shapefile's location for each region
+gdf = gpd.GeoDataFrame(df, geometry=shape.geometry.values, crs=shape.crs)
+gdf.plot(column="temp_vals", legend=True)
+plt.show()
+```
+
+We can test this out with our healthsheds file
+
+```{python}
+healthsheds = gpd.read_file(here() / "data/testing/mdg_healthsheds2022")
+```
+
+```{python}
+healthsheds
+```
+
+```{python}
+# there are NAs to remove
+healthsheds.dropna(subset = ['geometry'], inplace=True)
+```
+
+```{python}
+len(set(healthsheds.fs_uid))
+```
+
+```{python}
+# get the new mapping of the pixels to the shapes in the region
+
+res_poly2cell = polygon_to_raster_cells(
+ vectors = healthsheds.geometry.values, # the geometries of the shapefile of the regions
+ raster=raster_array, # the raster data above
+ band=1, # the value of the day that we're using
+ nodata=src.nodata, # any intersections with no data, may have to be np.nan
+ affine=src.transform, # some math thing need to revise
+ all_touched=True,
+ verbose=True
+)
+
+stats = []
+
+for indices in res_poly2cell:
+ if len(indices[0]) == 0:
+ # no cells found for this polygon
+ stats.append(np.nan)
+ else:
+ cells = raster[indices]
+ if sum(~np.isnan(cells)) == 0:
+ # no valid cells found for this polygon
+ stats.append(np.nan)
+ continue
+ else:
+ # compute mean of valid cells
+ stats.append(np.nanmean(cells))
+```
+
+```{python}
+df = pd.DataFrame(
+ {"healthshed": healthsheds.fs_uid, "dummy_date_in_future": 1, "temp_vals": stats}
+ )
+```
+
+```{python}
+# now we plot it using the shape.geometry to get the shapefile's location for each region
+gdf = gpd.GeoDataFrame(df, geometry=healthsheds.geometry.values, crs=shape.crs)
+gdf.plot(column="temp_vals", legend=True)
+plt.show()
+```
+
+Now that we've demonstrated how this could work, we can substitute the GADM shapefiles for our healthsheds, and put it in a pipeline!!!
+
+## Nepal
+
+We've modified the pipeline to now download Nepal as well. We'll test out an aggregation using the aggregation shapefiles we were provided by Dimeji. We probably want to decide on where to centralize data storage for files like this
+
+```{python}
+try: from era5_sandbox.core import GoogleDriver, _get_callable, describe
+except: from core import GoogleDriver, _get_callable, describe
+
+try: from era5_sandbox.download import download_raw_era5
+except: from download import download_raw_era5
+
+try: from era5_sandbox.aggregate import resample_netcdf, netcdf_to_tiff, polygon_to_raster_cells, aggregate_to_healthsheds
+except: from aggregate import resample_netcdf, netcdf_to_tiff, polygon_to_raster_cells, aggregate_to_healthsheds
+```
+
+```{python}
+from hydra import initialize, compose
+from omegaconf import OmegaConf
+
+# unfortunately, we have to use the initialize function to load the config file
+# this is because the @hydra decorator does not work with Notebooks very well
+# this is a known issue with Hydra: https://gist.github.com/bdsaglam/586704a98336a0cf0a65a6e7c247d248
+#
+# just use the relative path from the notebook to the config dir
+with initialize(version_base=None, config_path="../../conf"):
+ cfg = compose(config_name='config.yaml')
+
+cfg.development_mode = False
+cfg.query['year'] = 2023
+cfg.query['month'] = 10
+cfg.query['day'] = 1
+cfg.query['time'] = "00:00"
+cfg.query['geography'] = "nepal"
+download_raw_era5(cfg)
+```
+
+Now let's read it in and run the aggregation:
+
+```{python}
+# Load the NetCDF file
+fpath = here() / "data/input/nepal_2023_10.nc"
+ds = xr.open_dataset(fpath)
+```
+
+```{python}
+ds
+```
+
+```{python}
+# Perform multiple aggregations
+daily_mean = ds.resample(valid_time="1D").mean() # Daily mean
+daily_max = ds.resample(valid_time="1D").max() # Daily max
+daily_min = ds.resample(valid_time="1D").min() # Daily min
+
+# Combine the results into a new dataset
+daily_aggregated = xr.Dataset({
+ "t2m_mean": daily_mean["t2m"],
+ "t2m_max": daily_max["t2m"],
+ "t2m_min": daily_min["t2m"],
+ "d2m_mean": daily_mean["d2m"],
+ "d2m_max": daily_max["d2m"],
+ "d2m_min": daily_min["d2m"]
+})
+
+daily_aggregated
+```
+
+```{python}
+# Select the first day of t2m_mean
+variable="mean"
+t2m_mean_day1 = daily_aggregated["t2m_" + variable].isel(valid_time=0)
+
+# Set the absolute min and max for the color bar
+vmin = 270 # Minimum value (e.g., 270 K)
+vmax = 310 # Maximum value (e.g., 310 K)
+
+# Create a plot with Cartopy
+plt.figure(figsize=(10, 6))
+ax = plt.axes(projection=ccrs.PlateCarree()) # Use PlateCarree projection for latitude/longitude data
+
+# Plot the data
+t2m_mean_day1.plot(ax=ax, cmap="coolwarm", transform=ccrs.PlateCarree(), vmin=vmin, vmax=vmax, cbar_kwargs={"label": "Temperature (K)"})
+
+# Add Madagascar's border using Cartopy's built-in features
+ax.add_feature(cfeature.BORDERS, edgecolor="black", linewidth=1) # Add country borders
+ax.add_feature(cfeature.COASTLINE, edgecolor="black", linewidth=0.8) # Add coastlines
+
+# Optionally, zoom in on Madagascar
+#ax.set_extent([43, 51, -26, -11], crs=ccrs.PlateCarree()) # Longitude and latitude bounds for Madagascar
+
+# Add gridlines
+ax.gridlines(draw_labels=True, linewidth=0.5, color="gray", alpha=0.5, linestyle="--")
+
+# Add a title
+plt.title("Mean Daily {} Temperature (Day 1)".format(variable))
+plt.show()
+```
+
+We're going to create the aggregations using the function we defined in the aggregate module
+
+```{python}
+resampled_nc = resample_netcdf(fpath)
+
+resampled_tiff = netcdf_to_tiff(
+ ds=resampled_nc,
+ variable="t2m",
+ crs="EPSG:4326"
+)
+```
+
+Now we fetch the shapefile for administrative aggregations using our googledriver class:
+
+```{python}
+driver = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)
+drive = driver.get_drive()
+```
+
+```{python}
+shape = "Nepal_Healthsheds2024.zip"
+```
+
+```{python}
+healthsheds = driver.read_healthsheds(shape)
+```
+
+```{python}
+healthsheds.columns
+```
+
+```{python}
+healthsheds.describe()
+```
+
+```{python}
+len(set(healthsheds['fid'].values))
+```
+
+```{python}
+res_poly2cell=polygon_to_raster_cells(
+ vectors = healthsheds.geometry.values, # the geometries of the shapefile of the regions
+ raster=resampled_tiff.data, # the raster data above
+ band=1, # the value of the day that we're using
+ nodata=resampled_tiff.nodata, # any intersections with no data, may have to be np.nan
+ affine=resampled_tiff.transform, # some math thing need to revise
+ all_touched=True,
+ verbose=True
+)
+```
+
+```{python}
+result = aggregate_to_healthsheds(
+ res_poly2cell=res_poly2cell,
+ raster=resampled_tiff,
+ shapes=healthsheds,
+ names_column="fid",
+ aggregation_func=np.nanmean,
+ aggregation_name="mean_temperature"
+)
+result.head()
+```
+
+```{python}
+result.plot(column="mean_temperature", legend=True)
+plt.title("Mean Temperature (K) by Health Shed October 2023")
+plt.show()
+```
+
+This should work by slotting right into the pipeline, only changing the function for the names column
\ No newline at end of file
diff --git a/notes/prototypes/image.png b/notes/_prototypes/image.png
similarity index 100%
rename from notes/prototypes/image.png
rename to notes/_prototypes/image.png
diff --git a/notes/index.ipynb b/notes/index.ipynb
index 58de015..83e40d9 100644
--- a/notes/index.ipynb
+++ b/notes/index.ipynb
@@ -1,29 +1,34 @@
{
"cells": [
{
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [],
"source": [
- "#| hide\n",
- "from era5_sandbox.core import *"
+ "---\n",
+ "title: \"The ERA5 Spatial Aggregation Pipeline\"\n",
+ "exec_all: true\n",
+ "---"
]
},
{
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {},
+ "outputs": [],
"source": [
- "# era5_sandbox\n",
- "\n",
- "> Sandbox environment for era5 development"
+ "#| hide: null\n",
+ "from era5_sandbox.core import *"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "Here we are developing functions and code for the Madagascar ERA5 dataset project. The goal is for exposure data to be made available at the daily resolution when possible. Finer resolutions shouldn’t ever be needed for our purposes, and it should then be relatively easy to aggregate at coarser resolutions, such as weekly or monthly.\n",
+ "## era5_sandbox\n",
+ "\n",
+ "> Sandbox environment for era5 development\n",
+ "\n",
+ "This package documents the development and implementation of functions and code for the Madagascar ERA5 dataset project. The goal is for exposure data to be made available at the daily resolution when possible. Finer resolutions shouldn’t ever be needed for our purposes, and it should then be relatively easy to aggregate at coarser resolutions, such as weekly or monthly. Additionally, we've extended this work to Nepal as well.\n",
"\n",
"Variables should generally be made available from 2010 onward, as that’s where our clinic data starts.\n",
"\n",
@@ -31,11 +36,15 @@
"\n",
"Preliminary list of environmental variables\n",
"\n",
- "- [ ] 2-m air temperature from ERA5: daily min, max, mean\n",
+ "- [x] 2-m air temperature from ERA5: daily min, max, mean\n",
" \n",
- "- [ ] 2-m air dew point temperature from ERA5: daily min, max, mean\n",
+ "- [x] 2-m air dew point temperature from ERA5: daily min, max, mean\n",
+ "\n",
+ "- [x] Precipitation: daily total (ERA5)\n",
"\n",
- "- [ ] Precipitation: daily total (ERA5)\n",
+ "- [x] Soil moisture: daily average (ERA5)\n",
+ "\n",
+ "Variables from other sources:\n",
"\n",
"- [ ] Sea surface temperature: daily average and maximum in the nearest neighbor for each healthshed.\n",
"\n",
@@ -55,132 +64,102 @@
"\n",
"- [ ] Linking/segmenting healthsheds into climate zones and other \n",
"\n",
- "- [ ] Relative humidity: daily average (lower priority)\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Developer Guide"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "If you are new to using `nbdev` here are some useful pointers to get you started."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Install era5_sandbox in Development mode"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
+ "- [ ] Relative humidity: daily average (lower priority)\n",
+ "\n",
+ "Those from the ERA5 dataset will be housed here, but we may likely develop a separate repository for the other datasets.\n",
+ "\n",
+ "## Developer Guide\n",
+ "\n",
+ "This package is built and maintained with `nbdev`. If you are new to using `nbdev` here are some useful pointers to get you started.\n",
+ "\n",
+ "### Install era5_sandbox in Development mode\n",
+ "\n",
"```sh\n",
"# make sure era5_sandbox package is installed in development mode\n",
"$ pip install -e .\n",
+ "```\n",
"\n",
- "# To make changes, go to the \"notes\" directory and edit the notebooks as necessary.\n",
- "# Each notebook refers to a module in the era5_sandbox package. Cells are exported to the module\n",
- "# when the notebook is saved and you run the following command:\n",
+ "To make changes, go to the \"notes\" directory and edit the notebooks as necessary.\n",
+ "Each notebook refers to a module in the era5_sandbox package. Cells are exported to the module\n",
+ "when the notebook is saved and you run the following command:\n",
"\n",
+ "```sh\n",
"$ nbdev_export\n",
"```\n",
"\n",
- "For e.g., to change functionality of the `testAPI()` function in the testAPI Hydra rule, you would edit the `testAPI` notebook in the `notes` directory `notes/testAPI.ipynb`, and then save that notebook and run `nbdev_export` to update the `core` module in the package."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Usage"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Installation"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Install latest from the GitHub [repository][repo]:\n",
+ "For e.g., to change functionality of the `testAPI()` function in the testAPI Hydra rule, you would edit the `testAPI` notebook in the `notes` directory `notes/testAPI.ipynb`, and then save that notebook and run `nbdev_export` to update the `core` module in the package.\n",
+ "\n",
+ "### How to Run the Pipeline\n",
+ "\n",
+ "The pipeline downloads ERA5 variables for a given date range and geographical bounding box. You can learn how each of these steps was by following the notebooks in `notes` in numerical order.\n",
+ "\n",
+ "::: {.callout-important}\n",
+ "The pipeline has two implementations: one using `snakemake` and `hydra`, and another using `pytask`. The `pytask` implementation is the more recent one, and is recommended for future use. The `snakemake` implementation is left here for reference to legacy code.\n",
+ ":::\n",
+ "\n",
+ "#### Using `pytask`\n",
+ "\n",
+ "To run the pipeline, the `pytask` config at `note/20_pytask_config.qmd` should be reviewed\n",
+ "and updated if necessary. The pipeline can then be run with the following command:\n",
"\n",
"```sh\n",
- "$ pip install git+https://github.com/NSAPH-Data-Processing/era5_sandbox\n",
+ "$ sbatch pytask.sbatch\n",
"```\n",
"\n",
- "or clone and install in development mode:\n",
+ "#### Using `snakemake` and `hydra`\n",
+ "\n",
+ "To run the pipeline, the config at `config/config.yaml` should be updated with the desired date range and geographical bounding box. The pipeline can then be run with the following command:\n",
"\n",
"```sh\n",
- "$ git clone https://github.com/NSAPH-Data-Processing/era5_sandbox\n",
- "$ pip install -e .\n",
+ "sbatch snakemake.sbatch\n",
"```\n",
"\n",
+ "### What Does the Pipeline Produce?\n",
"\n",
- "[repo]: https://github.com/NSAPH-Data-Processing/era5_sandbox"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Documentation"
+ "Using `pytask`'s data catalog, you can investigate the downloaded raw data with python, eg.:"
]
},
{
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {},
+ "outputs": [],
"source": [
- "🚧Documentation is in development 🚧"
+ "#| exec_doc:\n",
+ "#\n",
+ "import xarray as xr\n",
+ "from era5_sandbox.config import data_catalog\n",
+ "from era5_sandbox.core import ClimateDataFileHandler\n",
+ "\n",
+ "ex_nc = list(data_catalog['download']['outputs']._entries).pop()\n",
+ "ex_nc_path = data_catalog['download']['outputs'][ex_nc].load()\n",
+ "\n",
+ "with ClimateDataFileHandler(ex_nc_path) as handler:\n",
+ " ds = xr.open_dataset(handler.get_dataset(\"instant\"))\n",
+ "\n",
+ "ds"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## How to use"
+ "And plot it with cartopy, eg.:"
]
},
{
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {},
+ "outputs": [],
"source": [
- "The pipeline currently downloads ERA5 temperature and dew point temperature data for a given date range and geographical bounding box. You can learn each of these steps by following the notebooks in `notes` in numerical order.\n",
- "\n",
- "To run the pipeline, the config at `config/config.yaml` should be updated with the desired date range and geographical bounding box. The pipeline can then be run with the following command:\n",
- "\n",
- "```sh\n",
- "sbatch snakemake.sbatch\n",
- "```\n",
- "\n",
- "You can investigate the downloaded raw data with python, eg.:\n",
- "\n",
- "```python\n",
- "import xarray as xr\n",
+ "#| exec_doc:\n",
+ "#\n",
"import matplotlib.pyplot as plt\n",
"import cartopy.crs as ccrs\n",
"import cartopy.feature as cfeature\n",
"\n",
- "### the path to any of the downloaded files\n",
- "file_path = \"/n/dominici_lab/lab/data_processing/csph-era5_sandbox/data/input/2010_01.nc\"\n",
- "data = xr.open_dataset(file_path)\n",
- "\n",
- "\n",
- "temperature = data[\"t2m\"]\n",
- "\n",
- "\n",
+ "temperature = ds[\"t2m\"]\n",
"\n",
"# Select a specific time step\n",
"temperature_at_time = temperature.isel(valid_time=0)\n",
@@ -192,28 +171,14 @@
"ax.coastlines()\n",
"ax.add_feature(cfeature.BORDERS, linestyle=\":\")\n",
"ax.set_title(\"Temperature at Time Step 0\")\n",
- "plt.show()\n",
- "```"
+ "plt.show()"
]
},
{
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "2"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
"source": [
- "1+1"
+ "You can also load the aggregated data:"
]
},
{
@@ -221,7 +186,17 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "#| exec_doc:\n",
+ "#\n",
+ "import pandas as pd\n",
+ "import geopandas as gpd\n",
+ "from era5_sandbox.config import data_catalog\n",
+ "\n",
+ "ex_agg_path = data_catalog['aggregate']['outputs']['2019_08_madagascar_night_d2m_max.parquet'].load()\n",
+ "\n",
+ "gpd.read_parquet(ex_agg_path).describe()"
+ ]
}
],
"metadata": {
@@ -232,5 +207,5 @@
}
},
"nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
}
diff --git a/notes/logs/2025-03-17/12-59-36/.hydra/config.yaml b/notes/logs/2025-03-17/12-59-36/.hydra/config.yaml
deleted file mode 100644
index e675fff..0000000
--- a/notes/logs/2025-03-17/12-59-36/.hydra/config.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-query:
- product_type: reanalysis
- variable:
- - 2m_dewpoint_temperature
- - 2m_temperature
- - skin_temperature
- year:
- - 2010
- - 2011
- month:
- - 1
- - 2
- - 3
- day:
- - 1
- - 2
- - 3
- - 4
- - 5
- time:
- - 0
- - 6
- - 12
- - 18
- area:
- - 0
- - 360
- - -90
- - 90
- data_format: netcdf
- download_format: unarchived
-datapaths:
- input: null
- output: null
diff --git a/notes/logs/2025-03-17/12-59-36/.hydra/hydra.yaml b/notes/logs/2025-03-17/12-59-36/.hydra/hydra.yaml
deleted file mode 100644
index b04b55c..0000000
--- a/notes/logs/2025-03-17/12-59-36/.hydra/hydra.yaml
+++ /dev/null
@@ -1,155 +0,0 @@
-hydra:
- run:
- dir: logs/${now:%Y-%m-%d}/${now:%H-%M-%S}
- sweep:
- dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
- subdir: ${hydra.job.num}
- launcher:
- _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
- sweeper:
- _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
- max_batch_size: null
- params: null
- help:
- app_name: ${hydra.job.name}
- header: '${hydra.help.app_name} is powered by Hydra.
-
- '
- footer: 'Powered by Hydra (https://hydra.cc)
-
- Use --hydra-help to view Hydra specific help
-
- '
- template: '${hydra.help.header}
-
- == Configuration groups ==
-
- Compose your configuration from those groups (group=option)
-
-
- $APP_CONFIG_GROUPS
-
-
- == Config ==
-
- Override anything in the config (foo.bar=value)
-
-
- $CONFIG
-
-
- ${hydra.help.footer}
-
- '
- hydra_help:
- template: 'Hydra (${hydra.runtime.version})
-
- See https://hydra.cc for more info.
-
-
- == Flags ==
-
- $FLAGS_HELP
-
-
- == Configuration groups ==
-
- Compose your configuration from those groups (For example, append hydra/job_logging=disabled
- to command line)
-
-
- $HYDRA_CONFIG_GROUPS
-
-
- Use ''--cfg hydra'' to Show the Hydra config.
-
- '
- hydra_help: ???
- hydra_logging:
- version: 1
- formatters:
- simple:
- format: '[%(asctime)s][HYDRA] %(message)s'
- handlers:
- console:
- class: logging.StreamHandler
- formatter: simple
- stream: ext://sys.stdout
- root:
- level: INFO
- handlers:
- - console
- loggers:
- logging_example:
- level: DEBUG
- disable_existing_loggers: false
- job_logging:
- version: 1
- formatters:
- simple:
- format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
- handlers:
- console:
- class: logging.StreamHandler
- formatter: simple
- stream: ext://sys.stdout
- file:
- class: logging.FileHandler
- formatter: simple
- filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
- root:
- level: INFO
- handlers:
- - console
- - file
- disable_existing_loggers: false
- env: {}
- mode: RUN
- searchpath: []
- callbacks: {}
- output_subdir: .hydra
- overrides:
- hydra:
- - hydra.mode=RUN
- task: []
- job:
- name: ipython-input-1-3f35d1394572
- chdir: null
- override_dirname: ''
- id: ???
- num: ???
- config_name: config
- env_set: {}
- env_copy: []
- config:
- override_dirname:
- kv_sep: '='
- item_sep: ','
- exclude_keys: []
- runtime:
- version: 1.3.2
- version_base: '1.3'
- cwd: /Users/tit420/projects/era5_sandbox/notes
- config_sources:
- - path: hydra.conf
- schema: pkg
- provider: hydra
- - path: /Users/tit420/projects/era5_sandbox/conf
- schema: file
- provider: main
- - path: ''
- schema: structured
- provider: schema
- output_dir: /Users/tit420/projects/era5_sandbox/notes/logs/2025-03-17/12-59-36
- choices:
- datapaths: datapaths
- hydra/env: default
- hydra/callbacks: null
- hydra/job_logging: default
- hydra/hydra_logging: default
- hydra/hydra_help: default
- hydra/help: default
- hydra/sweeper: basic
- hydra/launcher: basic
- hydra/output: default
- verbose: false
diff --git a/notes/logs/2025-03-17/12-59-36/.hydra/overrides.yaml b/notes/logs/2025-03-17/12-59-36/.hydra/overrides.yaml
deleted file mode 100644
index fe51488..0000000
--- a/notes/logs/2025-03-17/12-59-36/.hydra/overrides.yaml
+++ /dev/null
@@ -1 +0,0 @@
-[]
diff --git a/notes/logs/2025-03-17/12-59-36/ipython-input-1-3f35d1394572.log b/notes/logs/2025-03-17/12-59-36/ipython-input-1-3f35d1394572.log
deleted file mode 100644
index 10854a9..0000000
--- a/notes/logs/2025-03-17/12-59-36/ipython-input-1-3f35d1394572.log
+++ /dev/null
@@ -1,4 +0,0 @@
-[2025-03-17 12:59:37,230][datapi.legacy_api_client][INFO] - [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
-[2025-03-17 12:59:37,232][datapi.legacy_api_client][WARNING] - [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the "Show API request code" tool on the dataset Download Form to check you are using the correct syntax for your API request.
-[2025-03-17 12:59:37,541][datapi.legacy_api_client][INFO] - Request ID is 94401c1f-cc22-4d58-acea-0cca463df9ab
-[2025-03-17 12:59:37,676][datapi.legacy_api_client][INFO] - status has been updated to accepted
diff --git a/notes/prototypes/download_QA.ipynb b/notes/prototypes/download_QA.ipynb
deleted file mode 100644
index 96e6b5a..0000000
--- a/notes/prototypes/download_QA.ipynb
+++ /dev/null
@@ -1,47 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "036be852",
- "metadata": {},
- "source": [
- "# Investigating The Download Results\n",
- "\n",
- "There are a couple of things we should do to QA our data downloads. Specifically, we want to come up with a way of ensuring our aggregations are valid and accurate. This will require some simple EDA."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "08094a12",
- "metadata": {},
- "outputs": [],
- "source": [
- "from pyprojroot import here\n",
- "import pandas as pd\n",
- "import os\n",
- "from hydra import initialize, compose\n",
- "from omegaconf import OmegaConf, DictConfig"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c3e18bb3",
- "metadata": {},
- "outputs": [],
- "source": [
- "eg_file = here() / \"data/input/2010_1.nc\""
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "python3",
- "language": "python",
- "name": "python3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notes/prototypes/kenya_demo_01_intro.ipynb b/notes/prototypes/kenya_demo_01_intro.ipynb
deleted file mode 100644
index 8fdb30b..0000000
--- a/notes/prototypes/kenya_demo_01_intro.ipynb
+++ /dev/null
@@ -1,6142 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "---\n",
- "skip_showdoc: true\n",
- "---"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Introduction to the ERA 5 Data\n",
- "\n",
- "The ERA5 dataset is the fifth iteration of the ECMWF ReAnalysis dataset, spanning from 1950 to the present. ECMWF is the \"European Centre for Medium-Range Weather Forecasts\".\n",
- "The dataset provides comprehensive and high-resolution historical weather and climate data. The source data is from the [Copernicus Climate Data Store (CDS)](https://cds.climate.copernicus.eu/#!/home). A comprehensive data documentation guide is available [here](https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation). In total, the entire CDS ERA data is over 10Petabytes.\n",
- "\n",
- "Fortunately for us, there are existing [Python](https://github.com/Climate-CAFE/era5-daily-heat-aggregation-python) and [R](https://github.com/Climate-CAFE/era5-daily-heat-aggregation) packages that have gone ahead and demonstrated extracting the data from the API for us, so we are going to use those to develop our workflow. Specifically, we're trying to understand the\n",
- "following characteristics of the data:\n",
- "\n",
- "* size, \n",
- "* how to download, \n",
- "* what are the key transformations to map things into the health sheds\n",
- "* two important variables: \n",
- " * 2m air temp, and, \n",
- " * 2m air dew point\n",
- "\n",
- "Let's get started\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Important: we need to install the CDS API first, so you'll need to grab an API key. First, you must register for an account and accept the T&Cs, afterwhich the page [here](https://ecmwf-projects.github.io/copernicus-training-c3s/cds-tutorial.html#install-the-cds-api-key) will autopopulate an API key for you. The following code shows a test case to make sure your API key works"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 13:31:07,682 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 13:31:07,683 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 13:31:07,959 INFO Request ID is 07de689d-b7df-439b-b303-2214b8f3eec0\n",
- "2025-03-03 13:31:08,091 INFO status has been updated to accepted\n",
- "2025-03-03 13:34:00,781 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "fb379a3afd064123b72fc016bd7ea267",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "1fd5a2b7ad40b8c614c78061a75d30d0.grib: 0%| | 0.00/1.98M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "'download.grib'"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "import cdsapi\n",
- "\n",
- "client = cdsapi.Client()\n",
- "\n",
- "dataset = 'reanalysis-era5-pressure-levels'\n",
- "request = {\n",
- " 'product_type': ['reanalysis'],\n",
- " 'variable': ['geopotential'],\n",
- " 'year': ['2024'],\n",
- " 'month': ['03'],\n",
- " 'day': ['01'],\n",
- " 'time': ['13:00'],\n",
- " 'pressure_level': ['1000'],\n",
- " 'data_format': 'grib',\n",
- "}\n",
- "target = 'download.grib'\n",
- "\n",
- "client.retrieve(dataset, request, target)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This demonstration is expected to amass 9GB of data for raw raster files (24 years, 12 files per year). The demonstration generates the 24 years of heat measures across Kenya administrative boundaries, in 1-month periods of ERA5-Land data across Kenya with three variables (2-m temp, dew point temp, skin temp)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# imports as recommended by the github repo\n",
- "import cdsapi\n",
- "import geopandas as gpd\n",
- "import os\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "I'll use pyprojroot to specify a data path"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from pyprojroot.here import here\n",
- "\n",
- "ecmw_dir = here(\"data\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def create_dir(path):\n",
- "\n",
- " if not os.path.exists(path):\n",
- " os.makedirs(path)\n",
- "\n",
- " return path"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "create_dir(ecmw_dir)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# create a directory for the kenya data\n",
- "create_dir(os.path.join(ecmw_dir, \"Kenya_GADM\"))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Next, we need to manually fetch this GADM file for Kenya from here: https://gadm.org/download_country.html\n",
- "\n",
- "This is a boundaries geopackage; GeoBoundaries is a global database of administrative boundaries (e.g., countries, states, provinces, districts). Hence, this file provides the\n",
- "boundaries for Kenyan regions"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "kenya_shape = gpd.read_file(os.path.join(ecmw_dir, \"Kenya_GADM/gadm41_KEN.gpkg\"), layer = \"ADM_ADM_0\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
GID_0
\n",
- "
COUNTRY
\n",
- "
geometry
\n",
- "
\n",
- " \n",
- " \n",
- "
\n",
- "
0
\n",
- "
KEN
\n",
- "
Kenya
\n",
- "
MULTIPOLYGON (((39.38014 -4.71792, 39.37986 -4...
\n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " GID_0 COUNTRY geometry\n",
- "0 KEN Kenya MULTIPOLYGON (((39.38014 -4.71792, 39.37986 -4..."
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "kenya_shape"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The bounding box represents the coordinates of the shapefile, which is what we'll\n",
- "use to query Copernicus. Think of it like a mask provided in a file"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "kenya_bbox = kenya_shape.total_bounds"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([33.909588 , -4.720417 , 41.92621613, 5.06116581])"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "kenya_bbox"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Technical: Add a small buffer around the bounding box to ensure the whole region \n",
- "is queried, and round the parameters to a 0.1 resolution. A 0.1 resolution\n",
- "is applied because the resolution of netCDF ERA5 data is .25x.25\n",
- "https://confluence.ecmwf.int/display/CKB/ERA5%3A+What+is+the+spatial+reference\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "kenya_bbox[0] = round(kenya_bbox[0], 1) - 0.1\n",
- "kenya_bbox[1] = round(kenya_bbox[1], 1) - 0.1\n",
- "kenya_bbox[2] = round(kenya_bbox[2], 1) + 0.1\n",
- "kenya_bbox[3] = round(kenya_bbox[3], 1) + 0.1"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# to build a query, specify [xmin, ymin, xmax, ymax]\n",
- "query_area = [kenya_bbox[0], kenya_bbox[1], kenya_bbox[2], kenya_bbox[3]]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "query_years = list(range(2000, 2024))\n",
- "query_years_str = [str(x) for x in query_years]\n",
- "\n",
- "query_months = list(range(1, 13))\n",
- "query_months_str = [str(x).zfill(2) for x in query_months]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "output_dir = create_dir(os.path.join(ecmw_dir, \"ERA5_out\"))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing year 2000 \n",
- "\n",
- "Now processing month 01 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 13:52:06,896 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 13:52:06,897 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 13:52:07,181 INFO Request ID is 56d97887-22e9-441c-b33c-2236e5feaa87\n",
- "2025-03-03 13:52:07,308 INFO status has been updated to accepted\n",
- "2025-03-03 13:52:15,943 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "5e8426ff1c4d4f7c9c4bf7fe34369109",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "3f0a8829f1720f8fa1289e11eedada58.nc: 0%| | 0.00/23.4M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 02 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 13:52:26,335 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 13:52:26,336 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 13:52:26,730 INFO Request ID is 02e31b6b-e047-4f4b-bf56-4eda01d5d08a\n",
- "2025-03-03 13:52:27,070 INFO status has been updated to accepted\n",
- "2025-03-03 13:52:36,181 INFO status has been updated to running\n",
- "2025-03-03 14:02:50,214 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "3818ec1261d94244826414c00c40038d",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "bdfda68cb5ba4affec5b1f592ec6ed5f.nc: 0%| | 0.00/20.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 03 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 14:02:56,179 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 14:02:56,180 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 14:02:56,555 INFO Request ID is 3e4ebcbc-7038-43b4-927b-b4ce291fd60f\n",
- "2025-03-03 14:02:56,703 INFO status has been updated to accepted\n",
- "2025-03-03 14:03:18,825 INFO status has been updated to running\n",
- "2025-03-03 14:11:18,070 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "da26cd78e6ce4e0391eb081bd3d1ed10",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "eadfd863f7f6a6f82d1f077b6f137cdf.nc: 0%| | 0.00/24.4M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 04 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 14:11:27,837 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 14:11:27,838 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 14:11:28,199 INFO Request ID is f2a01b32-4853-4d2d-a873-9986d4c668fa\n",
- "2025-03-03 14:11:28,358 INFO status has been updated to accepted\n",
- "2025-03-03 14:11:33,798 INFO status has been updated to running\n",
- "2025-03-03 14:17:48,663 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "ea59bb3364f648b1a990fa22b58bc379",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "7790b627e1ed17c07398e4943ddc66f8.nc: 0%| | 0.00/20.2M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 05 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 14:17:54,131 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 14:17:54,132 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 14:17:54,453 INFO Request ID is 1cf8b1bc-ba8b-4865-a895-1634b22b0cb0\n",
- "2025-03-03 14:17:54,572 INFO status has been updated to accepted\n",
- "2025-03-03 14:18:03,340 INFO status has been updated to running\n",
- "2025-03-03 14:24:14,619 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "fd851f61eb294e7c94eeecad7903a30e",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "af8da5f83b8f4edfcfdc9e9b01e85167.nc: 0%| | 0.00/22.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 06 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 14:24:18,644 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 14:24:18,645 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 14:24:18,893 INFO Request ID is 8d9297fa-d2d6-4cf8-8473-669175f205a6\n",
- "2025-03-03 14:24:19,004 INFO status has been updated to accepted\n",
- "2025-03-03 14:24:27,645 INFO status has been updated to running\n",
- "2025-03-03 14:30:40,248 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "3b2a46c558c242578f2c7e5602495436",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "f222a9f45eec1093306d1f8bbf9bbdd2.nc: 0%| | 0.00/21.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 07 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 14:30:45,830 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 14:30:45,832 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 14:30:46,161 INFO Request ID is 13f963a5-df07-42b0-9535-40f5dd511890\n",
- "2025-03-03 14:30:46,284 INFO status has been updated to accepted\n",
- "2025-03-03 14:30:54,964 INFO status has been updated to running\n",
- "2025-03-03 14:37:06,377 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "6a9a32f3b00044ef9dfd2b15db9a892e",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "a56370ec686f31437a5c9328b74e48da.nc: 0%| | 0.00/22.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 08 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 14:37:13,309 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 14:37:13,310 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 14:37:13,713 INFO Request ID is 33d25bd3-d6a3-4faa-8b21-7e9d93faf9f2\n",
- "2025-03-03 14:37:13,869 INFO status has been updated to accepted\n",
- "2025-03-03 14:37:22,628 INFO status has been updated to running\n",
- "2025-03-03 14:41:33,768 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "5877f53e1c974f13bda779dd1b2db544",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "a393f6cb418b557a044ef0b4e0fb3b68.nc: 0%| | 0.00/22.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 09 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 14:41:37,733 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 14:41:37,734 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 14:41:38,015 INFO Request ID is bc09a8e9-669b-4d05-85c4-e4cf6f0cc897\n",
- "2025-03-03 14:41:38,137 INFO status has been updated to accepted\n",
- "2025-03-03 14:41:46,811 INFO status has been updated to running\n",
- "2025-03-03 14:51:58,738 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "3d87c52f8b2842bfa6745f921025fb64",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "53d56522c26bfa010f95b2a567bfba6b.nc: 0%| | 0.00/21.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 10 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 14:52:02,100 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 14:52:02,100 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 14:52:02,388 INFO Request ID is 241ee8b5-f821-40e3-85a2-8409879e70f9\n",
- "2025-03-03 14:52:02,514 INFO status has been updated to accepted\n",
- "2025-03-03 14:52:16,437 INFO status has been updated to running\n",
- "2025-03-03 15:00:13,552 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "679b476a4f144069996a0f2ac17a54a2",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "f7c843ccd2d84ceb1b96e518a6aa2700.nc: 0%| | 0.00/23.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 11 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 15:00:27,786 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 15:00:27,788 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 15:00:28,099 INFO Request ID is 8105daba-e0c3-4645-99cb-5cfe48933195\n",
- "2025-03-03 15:00:28,306 INFO status has been updated to accepted\n",
- "2025-03-03 15:00:42,459 INFO status has been updated to running\n",
- "2025-03-03 15:04:48,209 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "4c6742fa7b4b46288fc033cdbcd69d93",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "48275b1e8bd062eebd8362a189661294.nc: 0%| | 0.00/20.2M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 12 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 15:04:52,699 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 15:04:52,700 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 15:04:53,116 INFO Request ID is dd89e23a-cada-48d1-8c5f-4792a907f6c0\n",
- "2025-03-03 15:04:53,231 INFO status has been updated to accepted\n",
- "2025-03-03 15:05:07,118 INFO status has been updated to running\n",
- "2025-03-03 15:11:13,173 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "abb09e98a258494789bbf255afb60293",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "63a09f7c91b0eee81c2abc276659ff0.nc: 0%| | 0.00/22.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing year 2001 \n",
- "\n",
- "Now processing month 01 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 15:11:17,671 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 15:11:17,672 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 15:11:18,119 INFO Request ID is d39ace79-6b2b-46f1-9dfc-97a95c08be81\n",
- "2025-03-03 15:11:18,254 INFO status has been updated to accepted\n",
- "2025-03-03 15:11:27,207 INFO status has been updated to running\n",
- "2025-03-03 15:17:38,525 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "1addb50452054720a0c593ea70d5bbc8",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "a6f5c95b97ba2a5ea63d74168362d060.nc: 0%| | 0.00/23.0M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 02 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 15:17:43,072 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 15:17:43,073 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 15:17:43,421 INFO Request ID is 24a2df52-8ea9-47b6-bd22-8e3bdb4abc41\n",
- "2025-03-03 15:17:43,550 INFO status has been updated to accepted\n",
- "2025-03-03 15:17:52,171 INFO status has been updated to running\n",
- "2025-03-03 15:22:03,113 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "8d7958687cfc4c11a2413e52a8f7053b",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "7353a6681620a6ac4e57207c4c34cf7e.nc: 0%| | 0.00/19.2M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 03 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 15:22:06,759 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 15:22:06,759 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 15:22:07,172 INFO Request ID is 83ad483b-c8ad-4626-9ed2-49df53bca636\n",
- "2025-03-03 15:22:07,283 INFO status has been updated to accepted\n",
- "2025-03-03 15:22:15,995 INFO status has been updated to running\n",
- "2025-03-03 15:26:27,325 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "9d2d23c5b6894b20a0157f39177359e1",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "b49dba0c41e7c7174ed705b77edab42.nc: 0%| | 0.00/23.4M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 04 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 15:26:33,108 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 15:26:33,109 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 15:26:33,462 INFO Request ID is c07a211a-40ef-4b0a-b63e-605127309fa3\n",
- "2025-03-03 15:26:33,603 INFO status has been updated to accepted\n",
- "2025-03-03 15:26:42,257 INFO status has been updated to running\n",
- "2025-03-03 15:30:53,083 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "7fd7f4006a004d04ba330ed4e7c51b0f",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "53bf46ff597b3d357606e8cae56c470a.nc: 0%| | 0.00/20.4M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 05 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 15:30:56,589 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 15:30:56,591 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 15:30:56,905 INFO Request ID is ca147122-6e94-4fd4-a20b-ee88ec92653f\n",
- "2025-03-03 15:30:57,038 INFO status has been updated to accepted\n",
- "2025-03-03 15:31:05,718 INFO status has been updated to running\n",
- "2025-03-03 15:35:16,989 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "c6c62232f90a42eb92e80811d4dac82a",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "c155f6e8757b8648c1ce728eaa1f0866.nc: 0%| | 0.00/23.0M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 06 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 15:35:21,024 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 15:35:21,025 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 15:35:21,366 INFO Request ID is d335b374-1775-4614-97a4-5a3821e02309\n",
- "2025-03-03 15:35:21,490 INFO status has been updated to accepted\n",
- "2025-03-03 15:35:30,131 INFO status has been updated to running\n",
- "2025-03-03 15:41:41,797 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "a21c8103d97b4fbcb3d7d626855dbe39",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "6146d7b9b2da8097fbe3021e1682cfa0.nc: 0%| | 0.00/21.6M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 07 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 15:41:45,590 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 15:41:45,591 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 15:41:45,881 INFO Request ID is 9a7bd182-69d5-462c-a4ab-b9785f1378da\n",
- "2025-03-03 15:41:46,002 INFO status has been updated to accepted\n",
- "2025-03-03 15:41:54,620 INFO status has been updated to running\n",
- "2025-03-03 15:50:06,189 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "5d80258ccb0e4be69d1f2af58b985ecc",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "e2b5d31f1e484f56cbcc8ef7eaa61941.nc: 0%| | 0.00/22.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 08 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 15:50:09,722 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 15:50:09,723 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 15:50:10,010 INFO Request ID is 12d44164-5b53-44d1-b59e-8014fc094cf1\n",
- "2025-03-03 15:50:10,148 INFO status has been updated to accepted\n",
- "2025-03-03 15:50:15,289 INFO status has been updated to running\n",
- "2025-03-03 15:58:30,547 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "f46d6d7fda4841389190afe276996c42",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "edd45e6f0ef2cca4d8eacbffd0965bbb.nc: 0%| | 0.00/23.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 09 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 15:58:34,164 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 15:58:34,165 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 15:58:34,507 INFO Request ID is 47938f42-99b6-4950-b016-9d26a52a1c58\n",
- "2025-03-03 15:58:34,631 INFO status has been updated to accepted\n",
- "2025-03-03 15:58:43,316 INFO status has been updated to running\n",
- "2025-03-03 16:04:54,992 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "a9c243a238ee4513a13098952a929a0b",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "57793eca83ae1a510eed0c349ee3ec86.nc: 0%| | 0.00/21.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 10 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 16:04:59,087 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 16:04:59,087 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 16:04:59,386 INFO Request ID is 181de390-2ee5-4f22-9e30-eee697fa6cfb\n",
- "2025-03-03 16:04:59,600 INFO status has been updated to accepted\n",
- "2025-03-03 16:05:08,251 INFO status has been updated to running\n",
- "2025-03-03 16:09:19,270 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "0152bc860eb4459586d1de04f8f1ab8a",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "df861e01490f4bb9f2b017dddbeb4c9a.nc: 0%| | 0.00/23.1M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 11 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 16:09:24,093 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 16:09:24,093 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 16:09:24,402 INFO Request ID is 05046ebf-a981-4608-a637-69fbf0be52c6\n",
- "2025-03-03 16:09:24,530 INFO status has been updated to accepted\n",
- "2025-03-03 16:09:33,211 INFO status has been updated to running\n",
- "2025-03-03 16:15:45,215 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "45c8338e59464e0d8f48ee97526b6d45",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "cc92abb473270b684e4311a89ea223b8.nc: 0%| | 0.00/21.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 12 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 16:15:48,551 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 16:15:48,552 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 16:15:48,865 INFO Request ID is 78c4cfb8-5e49-409b-af59-5aaea12c7443\n",
- "2025-03-03 16:15:49,009 INFO status has been updated to accepted\n",
- "2025-03-03 16:15:57,626 INFO status has been updated to running\n",
- "2025-03-03 16:22:09,151 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "cb4613bbd8ba4e21a711df600b63772a",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "4030d07d1ca90f940aaf0bcdbbfc9a69.nc: 0%| | 0.00/23.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing year 2002 \n",
- "\n",
- "Now processing month 01 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 16:22:14,128 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 16:22:14,128 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 16:22:14,432 INFO Request ID is 8ee4bafd-9b6e-4884-948a-eb86a903be4b\n",
- "2025-03-03 16:22:14,563 INFO status has been updated to accepted\n",
- "2025-03-03 16:22:23,215 INFO status has been updated to running\n",
- "2025-03-03 16:28:34,713 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "0bfde1af72f5484da0066f93dc20559b",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "4952a6018fe09831b536f02cc68cfdaa.nc: 0%| | 0.00/22.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 02 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 16:28:38,466 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 16:28:38,468 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 16:28:38,748 INFO Request ID is 7e7c46c6-7e33-4456-bb84-ab6abd905e73\n",
- "2025-03-03 16:28:38,899 INFO status has been updated to accepted\n",
- "2025-03-03 16:28:47,557 INFO status has been updated to running\n",
- "2025-03-03 16:34:58,844 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "4a31e5296f324d15b80634dc6bac0675",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "746bfeac4672f082014b9bfc8629e0fc.nc: 0%| | 0.00/19.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 03 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 16:35:02,056 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 16:35:02,057 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 16:35:02,383 INFO Request ID is a1dbd535-ef6a-4ff8-843a-dc380bbfd9fc\n",
- "2025-03-03 16:35:02,490 INFO status has been updated to accepted\n",
- "2025-03-03 16:35:12,020 INFO status has been updated to running\n",
- "2025-03-03 16:41:23,295 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "33aa9295a7a04fd0a7b99504c7e7f575",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "f0aa24727636a12faf9e051c128a6dbf.nc: 0%| | 0.00/23.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 04 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 16:41:26,983 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 16:41:26,983 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 16:41:27,339 INFO Request ID is 169d5b0c-53e0-422a-a99b-753cb770b553\n",
- "2025-03-03 16:41:27,473 INFO status has been updated to accepted\n",
- "2025-03-03 16:41:32,657 INFO status has been updated to running\n",
- "2025-03-03 16:47:47,676 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "24c4463134554e47a9cac840f9637504",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "1139875c53f8ba5a2279763e094e82dd.nc: 0%| | 0.00/20.1M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 05 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 16:47:53,627 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 16:47:53,629 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 16:47:53,949 INFO Request ID is e057a82f-26c1-49c2-86d2-e803f4e609b9\n",
- "2025-03-03 16:47:54,149 INFO status has been updated to accepted\n",
- "2025-03-03 16:47:59,351 INFO status has been updated to running\n",
- "2025-03-03 16:54:14,219 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "6341e9f0f14d4777b0cd0cda7d0267d0",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "e6cd334a734633bf2677a41b64ef0503.nc: 0%| | 0.00/22.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 06 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 16:54:17,476 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 16:54:17,477 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 16:54:17,808 INFO Request ID is 57f52ced-a466-46f7-8e89-60962b0b1e71\n",
- "2025-03-03 16:54:17,962 INFO status has been updated to accepted\n",
- "2025-03-03 16:54:26,612 INFO status has been updated to running\n",
- "2025-03-03 17:04:38,619 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "31579ab608ef461ba017145618f066ea",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "c549e11cbe2ac4ea745dfee071d5477c.nc: 0%| | 0.00/21.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 07 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 17:04:41,945 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 17:04:41,947 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 17:04:42,262 INFO Request ID is 0c4a12a8-9e3f-4479-903c-12218d4995cc\n",
- "2025-03-03 17:04:42,395 INFO status has been updated to accepted\n",
- "2025-03-03 17:04:56,207 INFO status has been updated to running\n",
- "2025-03-03 17:11:02,228 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "3aba2d3e8e25401782014b3569373713",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "4910d00a419d7e9af6772646a37c5ef2.nc: 0%| | 0.00/22.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 08 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 17:11:05,552 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 17:11:05,553 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 17:11:05,923 INFO Request ID is 2a14a8a9-72bb-4ad5-be7b-66ea35cc5f78\n",
- "2025-03-03 17:11:06,092 INFO status has been updated to accepted\n",
- "2025-03-03 17:11:19,956 INFO status has been updated to running\n",
- "2025-03-03 17:17:26,215 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "8a72c7a0e6ef4ffe945d7883adcfd157",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "df18b33085c197ddb5ead7aad67e32ea.nc: 0%| | 0.00/23.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 09 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 17:17:40,447 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 17:17:40,448 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 17:17:40,911 INFO Request ID is ab2a6b15-0383-4bda-877a-38c400783092\n",
- "2025-03-03 17:17:41,098 INFO status has been updated to accepted\n",
- "2025-03-03 17:17:49,765 INFO status has been updated to running\n",
- "2025-03-03 17:24:01,146 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "4fa890dc303d49e585bfc82f8e8854fc",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "24747fb2834f85bc2bc161a526047576.nc: 0%| | 0.00/21.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 10 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 17:24:05,314 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 17:24:05,315 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 17:24:05,613 INFO Request ID is 2222bf20-78ae-480b-8866-d04fb0ebd22b\n",
- "2025-03-03 17:24:05,723 INFO status has been updated to accepted\n",
- "2025-03-03 17:24:14,410 INFO status has been updated to running\n",
- "2025-03-03 17:28:25,275 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "3ac9d234b7794c09a5a177a2afb01906",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "4a0047e55e40631d4257bc999430f369.nc: 0%| | 0.00/22.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 11 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 17:28:28,643 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 17:28:28,644 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 17:28:28,930 INFO Request ID is 4247b9db-0eb8-4dd4-bea7-71a188cd7191\n",
- "2025-03-03 17:28:29,062 INFO status has been updated to accepted\n",
- "2025-03-03 17:28:37,733 INFO status has been updated to running\n",
- "2025-03-03 17:34:48,991 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "989127f5cdaa41c89febcfc02a57eddf",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "83f38bf86f197534418c3f793d00df51.nc: 0%| | 0.00/20.2M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 12 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 17:34:52,520 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 17:34:52,520 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 17:34:52,853 INFO Request ID is 54b41ffd-5411-438b-9209-5a1a48e726f2\n",
- "2025-03-03 17:34:52,983 INFO status has been updated to accepted\n",
- "2025-03-03 17:35:01,605 INFO status has been updated to running\n",
- "2025-03-03 17:41:12,968 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "bb06602689914d90b4f0992b600da0e0",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "c548caeddfbc3b57a873757af31bb3f2.nc: 0%| | 0.00/22.6M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing year 2003 \n",
- "\n",
- "Now processing month 01 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 17:41:16,083 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 17:41:16,084 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 17:41:16,395 INFO Request ID is da7f6ab3-f9a4-4cf4-a336-8d4c6069eab6\n",
- "2025-03-03 17:41:16,530 INFO status has been updated to accepted\n",
- "2025-03-03 17:41:25,165 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Max retries exceeded with url: /api/retrieve/v1/jobs/da7f6ab3-f9a4-4cf4-a336-8d4c6069eab6?log=True&request=True (Caused by NameResolutionError(\": Failed to resolve 'cds.climate.copernicus.eu' ([Errno 8] nodename nor servname provided, or not known)\"))], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-03 18:40:35,507 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "aeccd1069d2d4f80a3b569d25da240ff",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "9cbcabcbc8259ee36e1d9c27c7bcd2e5.nc: 0%| | 0.00/22.9M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 02 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 18:41:15,737 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 18:41:15,738 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 18:41:16,175 INFO Request ID is 4516d2f1-5b65-4107-b5b8-4ce4d95e348a\n",
- "2025-03-03 18:41:16,336 INFO status has been updated to accepted\n",
- "2025-03-03 18:41:25,142 INFO status has been updated to running\n",
- "2025-03-03 18:45:36,530 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "f32cb18bfec9432695ce7b6dc6dcb890",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "18bd057dcbf50d7d98976cc045d62028.nc: 0%| | 0.00/19.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 03 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 18:45:39,813 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 18:45:39,813 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 18:45:40,206 INFO Request ID is 34d46db0-4d01-40bd-b373-8162efa32401\n",
- "2025-03-03 18:45:40,364 INFO status has been updated to accepted\n",
- "2025-03-03 18:45:49,388 INFO status has been updated to running\n",
- "2025-03-03 18:52:02,740 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "260c24823bad44a9b4d6edd3745e2cf8",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "322964381fe45a66af5c5413fbf5644a.nc: 0%| | 0.00/23.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 04 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 18:52:11,330 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 18:52:11,331 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 18:52:11,722 INFO Request ID is 4d879f72-cb55-4254-aa16-5a72ef43d23c\n",
- "2025-03-03 18:52:11,883 INFO status has been updated to accepted\n",
- "2025-03-03 18:52:20,741 INFO status has been updated to running\n",
- "2025-03-03 18:58:32,697 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "698cd55310a549a59ac746617016f236",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "9ae86c78223efe1a0ac750a2a221fc6d.nc: 0%| | 0.00/21.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 05 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 18:58:55,948 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 18:58:55,949 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 18:58:56,566 INFO Request ID is 6e70abd1-da04-4fb4-b969-e4048d40ef03\n",
- "2025-03-03 18:58:56,721 INFO status has been updated to accepted\n",
- "2025-03-03 18:59:10,952 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-03 20:16:37,571 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "d12050ce8620478eb7420de671999c85",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "77351793564a16788bd9ff4ff003f15e.nc: 0%| | 0.00/24.0M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 06 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 20:16:41,181 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 20:16:41,182 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 20:16:41,558 INFO Request ID is c5b24a83-481b-4e13-8662-3174a3cc8038\n",
- "2025-03-03 20:16:41,704 INFO status has been updated to accepted\n",
- "2025-03-03 20:16:55,758 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-03 21:54:10,191 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "0b315e9b63c7460b83f625e8d568fe5d",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "7f99b74ab25ed4e8b5c181262f90162b.nc: 0%| | 0.00/21.1M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 07 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 21:54:13,686 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 21:54:13,687 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 21:54:14,053 INFO Request ID is aa63909b-dd55-4b50-9109-d97101407bb4\n",
- "2025-03-03 21:54:14,221 INFO status has been updated to accepted\n",
- "2025-03-03 21:54:23,021 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-03 22:46:05,448 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "934ff6b4de3f400087592ed675baab83",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "b5f4942e64b0f955a2a693d1e5bdbc64.nc: 0%| | 0.00/22.9M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 08 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-03 22:46:15,545 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-03 22:46:15,547 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-03 22:46:15,927 INFO Request ID is 20adab3a-13d3-4718-9b95-1545745ac571\n",
- "2025-03-03 22:46:16,445 INFO status has been updated to accepted\n",
- "2025-03-03 22:46:21,698 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-04 00:21:52,696 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "e6bc2de255b24d44b82a688b1ac10deb",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "d4ff59b4ec3fda00d4074896dbcb7bc.nc: 0%| | 0.00/23.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 09 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 00:22:07,044 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 00:22:07,059 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 00:22:07,433 INFO Request ID is 9e1dad3f-786e-4f36-8f48-a3d8b0e3d291\n",
- "2025-03-04 00:22:07,590 INFO status has been updated to accepted\n",
- "2025-03-04 00:22:16,404 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-04 03:10:29,610 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "3d1ca201585140c6b40252677adf643f",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "132b6164c0ca36daaf324f4207628ff0.nc: 0%| | 0.00/21.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 10 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 03:10:33,344 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 03:10:33,344 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 03:10:33,702 INFO Request ID is 59d7b1b3-5a6c-4305-bec5-84d8a54588d2\n",
- "2025-03-04 03:10:33,859 INFO status has been updated to accepted\n",
- "2025-03-04 03:10:39,118 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-04 04:48:49,216 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "a9b4fd82010141a8b465911b7149fb6f",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "a555c239db00bc554d8244e8ad695672.nc: 0%| | 0.00/23.1M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 11 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 04:48:53,323 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 04:48:53,324 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 04:48:53,679 INFO Request ID is 16f644ad-e849-4a58-8214-d2e620a096e4\n",
- "2025-03-04 04:48:53,848 INFO status has been updated to accepted\n",
- "2025-03-04 04:48:59,101 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-04 06:27:39,088 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "ea0bc53ce4b14fc6931c6e4017473dc2",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "2f4822e1cbe36b79a3992ad7f01f4214.nc: 0%| | 0.00/21.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 12 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 06:27:49,122 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 06:27:49,123 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 06:27:49,516 INFO Request ID is 45635b7d-1128-402c-99ec-8a0ad7814c83\n",
- "2025-03-04 06:27:49,722 INFO status has been updated to accepted\n",
- "2025-03-04 06:27:55,019 INFO status has been updated to running\n",
- "2025-03-04 06:28:04,209 INFO status has been updated to accepted\n",
- "2025-03-04 06:28:12,005 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-04 08:01:27,064 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "cdb69efbd04542039c06314d43d8d306",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "4bd60f841fa47583f79fc66a8401d0ad.nc: 0%| | 0.00/22.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing year 2004 \n",
- "\n",
- "Now processing month 01 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 08:01:32,319 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 08:01:32,320 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 08:01:33,516 INFO Request ID is bb759952-18b4-48d9-8d0e-954362686a6a\n",
- "2025-03-04 08:01:33,677 INFO status has been updated to accepted\n",
- "2025-03-04 08:01:38,967 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-04 09:38:12,083 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "533c3b3733754da480da0f570e6c9a0d",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "d4f4e4aeb1a9248692853a5c3efbf89.nc: 0%| | 0.00/23.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Recovering from connection error [HTTPSConnectionPool(host='object-store.os-api.cci2.ecmwf.int', port=443): Read timed out.], attemps 1 of 500\n",
- "Retrying in 120 seconds\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "df70a07129dc4c81ac58a29561de8df5",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "d4f4e4aeb1a9248692853a5c3efbf89.nc: 0%| | 0.00/23.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 02 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 09:47:43,625 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 09:47:43,626 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 09:47:43,900 INFO Request ID is e3420e1c-4f8d-4251-964c-2e001a220819\n",
- "2025-03-04 09:47:44,024 INFO status has been updated to accepted\n",
- "2025-03-04 09:47:49,179 INFO status has been updated to running\n",
- "2025-03-04 09:54:04,081 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "fa56954c3f7647279dd7914b28b0a6bc",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "6e51aaba6e9e394aec2c472dba256e72.nc: 0%| | 0.00/20.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 03 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 09:54:10,509 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 09:54:10,510 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 09:54:10,838 INFO Request ID is beb2cc9b-50ba-4385-885a-74f82cd85646\n",
- "2025-03-04 09:54:11,012 INFO status has been updated to accepted\n",
- "2025-03-04 09:54:16,257 INFO status has been updated to running\n",
- "2025-03-04 10:02:32,275 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "c2b9d4e5be4149cea9197ed0d9720794",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "577113643e225204be90c7d41e3255c1.nc: 0%| | 0.00/22.9M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 04 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 10:02:36,182 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 10:02:36,183 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 10:02:36,590 INFO Request ID is 0485dce7-9b91-4e4d-b46a-57c9dcb2acca\n",
- "2025-03-04 10:02:36,721 INFO status has been updated to accepted\n",
- "2025-03-04 10:02:45,458 INFO status has been updated to running\n",
- "2025-03-04 10:08:57,057 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "e2ff17238ba0422d98e9c9e1be546d8d",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "b15d5bde5ff8d58b65a653eb30e811bc.nc: 0%| | 0.00/20.4M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 05 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 10:09:02,921 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 10:09:02,922 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 10:09:03,198 INFO Request ID is 6ee8d338-0f4e-4f05-a394-18607c0edb9d\n",
- "2025-03-04 10:09:03,336 INFO status has been updated to accepted\n",
- "2025-03-04 10:09:18,215 INFO status has been updated to running\n",
- "2025-03-04 10:15:24,462 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "9b4cacabfd754e16bb3ce53862f473b0",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "e9d63b8d3e4766add5c9adda544ffb77.nc: 0%| | 0.00/22.9M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 06 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 10:15:29,851 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 10:15:29,852 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 10:15:30,148 INFO Request ID is ad2bfc81-e9d2-4a1b-bb03-eee097b8ebff\n",
- "2025-03-04 10:15:30,286 INFO status has been updated to accepted\n",
- "2025-03-04 10:15:35,440 INFO status has been updated to running\n",
- "2025-03-04 10:21:50,350 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "2292fc2f3e9548ac93d018112cc55464",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "d4e93dc9a851bdcd8ae7c4f4fa7b61d9.nc: 0%| | 0.00/20.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 07 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 10:21:56,973 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 10:21:56,974 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 10:21:57,348 INFO Request ID is 93a6680d-baa9-49e7-9a29-4224f5e24de2\n",
- "2025-03-04 10:21:57,495 INFO status has been updated to accepted\n",
- "2025-03-04 10:22:02,865 INFO status has been updated to running\n",
- "2025-03-04 10:22:11,652 INFO status has been updated to accepted\n",
- "2025-03-04 10:22:19,406 INFO status has been updated to running\n",
- "2025-03-04 10:28:17,758 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "c72b83f0e0184bdbae2c7aa07950d9d2",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "75ff3e1ccd6f92b7d8c8dbdd7d4ae92d.nc: 0%| | 0.00/23.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 08 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 10:28:24,118 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 10:28:24,119 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 10:28:24,440 INFO Request ID is a70f92e0-ec80-493a-8b6a-41fa00b00119\n",
- "2025-03-04 10:28:24,574 INFO status has been updated to accepted\n",
- "2025-03-04 10:28:33,356 INFO status has been updated to running\n",
- "2025-03-04 10:34:44,894 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "36db07a5c15b40c1bfca3f9ad39fbd72",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "4a6b2d7d1cb2482b423c99751d84a34d.nc: 0%| | 0.00/23.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 09 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 10:34:48,913 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 10:34:48,913 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 10:34:49,247 INFO Request ID is 8462b2cd-cd64-4afb-a331-929d6c36b6d3\n",
- "2025-03-04 10:34:49,385 INFO status has been updated to accepted\n",
- "2025-03-04 10:35:03,181 INFO status has been updated to running\n",
- "2025-03-04 10:41:09,230 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "3b3d2a0de5f6442aae1c18c71a992953",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "6b2313ffa46bdca1e1b8a0272157602c.nc: 0%| | 0.00/20.4M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 10 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 10:41:18,443 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 10:41:18,443 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 10:41:18,803 INFO Request ID is 9c4e4872-cc6d-4c69-b5e5-5e9611545f92\n",
- "2025-03-04 10:41:18,955 INFO status has been updated to accepted\n",
- "2025-03-04 10:41:27,708 INFO status has been updated to running\n",
- "2025-03-04 10:47:40,234 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "34ea4423541646c48f00affdebd94030",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "e69b8907411d58169a4757de7beb788c.nc: 0%| | 0.00/22.9M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 11 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 10:47:46,398 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 10:47:46,399 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 10:47:46,722 INFO Request ID is f9da7b12-259e-4213-ad8f-b09b805176a3\n",
- "2025-03-04 10:47:46,844 INFO status has been updated to accepted\n",
- "2025-03-04 10:48:08,549 INFO status has been updated to running\n",
- "2025-03-04 10:54:06,935 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "55f246a97b1f4d618acf2cc93904ec42",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "fba17f50666c361d8f4774e5e8c3d6aa.nc: 0%| | 0.00/21.1M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 12 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 10:54:12,500 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 10:54:12,501 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 10:54:12,796 INFO Request ID is 5d529887-5804-4e3e-8bfb-2a1a936f531e\n",
- "2025-03-04 10:54:13,007 INFO status has been updated to accepted\n",
- "2025-03-04 10:54:18,251 INFO status has been updated to running\n",
- "2025-03-04 11:00:33,100 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "9e54feb461444e8db65691c61e98104b",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "3e0cb42efea037099b66d46828f8d149.nc: 0%| | 0.00/23.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing year 2005 \n",
- "\n",
- "Now processing month 01 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 11:00:37,145 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 11:00:37,146 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 11:00:37,503 INFO Request ID is a40e8775-d8b4-499e-baa7-f564cd05123c\n",
- "2025-03-04 11:00:37,635 INFO status has been updated to accepted\n",
- "2025-03-04 11:00:46,318 INFO status has been updated to running\n",
- "2025-03-04 11:06:57,519 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "2d344154229a474088606f9c4cf1f4f1",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "100dc6b3e4ef04c0b33734cde7d11ca5.nc: 0%| | 0.00/23.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 02 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 11:07:01,610 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 11:07:01,611 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 11:07:02,019 INFO Request ID is 03fdd916-29b5-4d4e-923f-19e3bd66bca1\n",
- "2025-03-04 11:07:02,130 INFO status has been updated to accepted\n",
- "2025-03-04 11:07:23,700 INFO status has been updated to running\n",
- "2025-03-04 11:13:22,011 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "2c200dfce4eb4b198bc9702243b38ebe",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "82f6b0ec92e3083461af35420a4356b9.nc: 0%| | 0.00/19.6M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 03 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 11:13:26,175 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 11:13:26,176 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 11:13:26,462 INFO Request ID is dcf86bd8-5951-47eb-833b-f9971bf3f587\n",
- "2025-03-04 11:13:26,587 INFO status has been updated to accepted\n",
- "2025-03-04 11:13:31,737 INFO status has been updated to running\n",
- "2025-03-04 11:19:46,761 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "1e2e1d634e974b338c61a56e7110f1b0",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "5ca6b0c8145b049c15ae44413de55d25.nc: 0%| | 0.00/22.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 04 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 11:19:53,689 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 11:19:53,690 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 11:19:54,001 INFO Request ID is 005fbc44-a1d1-4b23-ad05-0cd81f88ffea\n",
- "2025-03-04 11:19:54,132 INFO status has been updated to accepted\n",
- "2025-03-04 11:20:27,373 INFO status has been updated to running\n",
- "2025-03-04 11:32:15,333 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "f89ee9a22fc045df89f9831224d0fc6a",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "ffc02554880287503940142e8a510d4a.nc: 0%| | 0.00/21.0M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 05 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 11:32:19,220 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 11:32:19,222 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 11:32:19,570 INFO Request ID is 6bb5e9c7-9b29-41ea-a109-389b8d991ac0\n",
- "2025-03-04 11:32:19,744 INFO status has been updated to accepted\n",
- "2025-03-04 11:42:40,683 INFO status has been updated to running\n",
- "2025-03-04 11:48:41,817 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "ec52c053bfae4146b2017cd88a36c584",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "8f252a394e01bee76c3963e6dd84b52e.nc: 0%| | 0.00/21.9M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 06 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 11:48:47,592 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 11:48:47,592 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 11:48:47,914 INFO Request ID is 724fe9e0-d76b-42ba-8c5a-f38589b1efeb\n",
- "2025-03-04 11:48:48,047 INFO status has been updated to accepted\n",
- "2025-03-04 12:09:12,266 INFO status has been updated to running\n",
- "2025-03-04 12:15:13,455 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "b249ff9061424656a32e48f2c035e6cf",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "17d0d4b440635e196965c85a437e0f98.nc: 0%| | 0.00/20.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 07 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 12:15:19,513 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 12:15:19,514 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 12:15:19,816 INFO Request ID is 3a4657cf-d697-48da-83ea-d110a50fd5e3\n",
- "2025-03-04 12:15:19,949 INFO status has been updated to accepted\n",
- "2025-03-04 12:27:41,331 INFO status has been updated to running\n",
- "2025-03-04 12:33:43,172 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "c073ea87e29942e29837603c9b4f5fa5",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "3462d7892fbf021e487d38ff00ea729f.nc: 0%| | 0.00/22.2M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 08 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 12:33:53,005 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 12:33:53,006 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 12:33:53,308 INFO Request ID is d92c960f-bf32-43f7-becb-1a8c2023442a\n",
- "2025-03-04 12:33:53,430 INFO status has been updated to accepted\n",
- "2025-03-04 12:40:13,337 INFO status has been updated to running\n",
- "2025-03-04 12:46:14,388 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "0e6ba7333efe4c998c402113e72275ac",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "6f115164f9f9eb7585681eea3410cc27.nc: 0%| | 0.00/23.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 09 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 12:47:32,769 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 12:47:32,770 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 12:47:33,094 INFO Request ID is a128253d-b7b4-4b64-8a56-de57091c5433\n",
- "2025-03-04 12:47:33,525 INFO status has been updated to accepted\n",
- "2025-03-04 12:47:42,430 INFO status has been updated to running\n",
- "2025-03-04 12:53:54,370 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "3c50aa5591ea400fa4f134699660d760",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "a4366d50b0870de70d4c225f8b69a97.nc: 0%| | 0.00/21.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 10 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 12:54:02,725 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 12:54:02,725 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 12:54:03,069 INFO Request ID is 6587609e-a8d7-4dde-a82d-5869f63c94a0\n",
- "2025-03-04 12:54:03,186 INFO status has been updated to accepted\n",
- "2025-03-04 12:54:24,878 INFO status has been updated to running\n",
- "2025-03-04 13:02:24,112 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "ba6b1efbea5849a6a61ec6fab07ba109",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "5bec83b48bd3580b8b513169027b6201.nc: 0%| | 0.00/23.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 11 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 13:02:33,128 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 13:02:33,129 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 13:02:33,548 INFO Request ID is 4158b3e1-5c5b-4695-b4c6-ca3e14184cc7\n",
- "2025-03-04 13:02:33,721 INFO status has been updated to accepted\n",
- "2025-03-04 13:05:26,404 INFO status has been updated to running\n",
- "2025-03-04 13:10:53,924 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "0478f7cd80a74621877898d6bdae7bd7",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "24b332ae08734d20c69fd626bf6f87d0.nc: 0%| | 0.00/20.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 12 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 13:10:59,388 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 13:10:59,388 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 13:10:59,743 INFO Request ID is a4119521-697c-468c-8b0f-b3ac9dbc880d\n",
- "2025-03-04 13:10:59,854 INFO status has been updated to accepted\n",
- "2025-03-04 13:15:19,448 INFO status has been updated to running\n",
- "2025-03-04 13:19:20,182 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "d3fc34ef118d450bbea3c0024fa6fd46",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "36c1d0e91f1d06cc96e4ee770c352d78.nc: 0%| | 0.00/22.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing year 2006 \n",
- "\n",
- "Now processing month 01 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 13:19:24,935 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 13:19:24,935 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 13:19:25,230 INFO Request ID is a2dc4d2a-68ee-4869-b1ef-72a164ee55d2\n",
- "2025-03-04 13:19:25,365 INFO status has been updated to accepted\n",
- "2025-03-04 13:43:36,177 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "8f43e95b0567426695f1742eaeb076a6",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "c4d21666dcb9762756b906901bb87ce3.nc: 0%| | 0.00/23.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 02 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 13:43:55,145 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 13:43:55,146 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 13:43:55,439 INFO Request ID is 77c4a306-c706-4bc9-9812-3975e2b8e41e\n",
- "2025-03-04 13:43:55,552 INFO status has been updated to accepted\n",
- "2025-03-04 14:03:34,910 INFO status has been updated to running\n",
- "2025-03-04 14:07:35,576 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "553f597557ec4258bd105f550e2862c5",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "7fb6e3d31203a337a177b1affc94248a.nc: 0%| | 0.00/19.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 03 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 14:07:39,516 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 14:07:39,517 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 14:07:39,825 INFO Request ID is 23a1ad68-259b-46d4-b059-38a5861c05a0\n",
- "2025-03-04 14:07:39,955 INFO status has been updated to accepted\n",
- "2025-03-04 14:13:59,789 INFO status has been updated to running\n",
- "2025-03-04 14:22:01,316 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "c3e6effa4081440d9ff5c4821c177313",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "91c8464e6133709207cc8f626fdacaa6.nc: 0%| | 0.00/23.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 04 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 14:22:08,354 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 14:22:08,355 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 14:22:08,630 INFO Request ID is bba66118-5631-4065-b679-e6478985fd2b\n",
- "2025-03-04 14:22:08,773 INFO status has been updated to accepted\n",
- "2025-03-04 14:32:29,918 INFO status has been updated to running\n",
- "2025-03-04 14:38:31,238 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "143edc1991c447b68cea880422eea505",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "5af1280a2e6c071c496ea4c2f3e31610.nc: 0%| | 0.00/21.2M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 05 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 14:38:36,698 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 14:38:36,699 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 14:38:37,015 INFO Request ID is b6296593-c255-4e69-9e96-6365d1f022a3\n",
- "2025-03-04 14:38:37,151 INFO status has been updated to accepted\n",
- "2025-03-04 14:44:57,211 INFO status has been updated to running\n",
- "2025-03-04 14:48:57,902 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "1ecdc6ef39b24e33b879916f8837db5e",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "717197501229ff5cf3a9c7135eb35efd.nc: 0%| | 0.00/23.2M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 06 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 14:49:02,332 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 14:49:02,333 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 14:49:02,660 INFO Request ID is 80e8f5ab-4412-42c8-92df-a73aa7f67004\n",
- "2025-03-04 14:49:02,857 INFO status has been updated to accepted\n",
- "2025-03-04 14:51:55,841 INFO status has been updated to running\n",
- "2025-03-04 14:57:23,648 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "a2559e1f334f4a70aa26023e59cbecff",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "6045a7a92a5d56f1b6ab92ef0f92315a.nc: 0%| | 0.00/20.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 07 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 14:57:28,130 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 14:57:28,131 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 14:57:28,489 INFO Request ID is b61e8e4c-81d4-49c7-b4c6-fc501fcb0f13\n",
- "2025-03-04 14:57:28,655 INFO status has been updated to accepted\n",
- "2025-03-04 14:57:42,476 INFO status has been updated to running\n",
- "2025-03-04 15:05:49,261 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "13645182fdff46fcb84ff47672fad211",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "1b705fb48a6cddaebf381faa0524dd60.nc: 0%| | 0.00/22.9M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 08 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 15:05:52,832 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 15:05:52,832 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 15:05:53,178 INFO Request ID is 7f5f3573-8fc3-42eb-a2ed-3477c4ea04da\n",
- "2025-03-04 15:05:53,308 INFO status has been updated to accepted\n",
- "2025-03-04 15:06:07,216 INFO status has been updated to running\n",
- "2025-03-04 15:12:13,342 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "4523e077de7244459a67c0ace1f6c41b",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "173b48c7fd50609852dbcd6c2f153b89.nc: 0%| | 0.00/24.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 09 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 15:12:18,662 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 15:12:18,663 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 15:12:18,957 INFO Request ID is f4aff46e-2ba1-4942-8246-d88740abec0f\n",
- "2025-03-04 15:12:19,105 INFO status has been updated to accepted\n",
- "2025-03-04 15:12:27,876 INFO status has been updated to running\n",
- "2025-03-04 15:16:39,164 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "f734601d66ff48e381f0d6bbc0498e27",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "182c849cd1af687aee9c38bd14da741f.nc: 0%| | 0.00/20.2M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 10 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 15:16:43,159 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 15:16:43,160 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 15:16:43,472 INFO Request ID is 70945f07-94e9-47cc-bef0-644bd1c86328\n",
- "2025-03-04 15:16:43,601 INFO status has been updated to accepted\n",
- "2025-03-04 15:16:52,326 INFO status has been updated to running\n",
- "2025-03-04 15:23:04,014 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "860b5e49035e4c8885f12a828ae61ea1",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "1dc116db561fe8dbb14db6782a9ef596.nc: 0%| | 0.00/22.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 11 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 15:23:09,745 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 15:23:09,745 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 15:23:10,056 INFO Request ID is efe1500e-f60b-4691-965b-177fff6272f9\n",
- "2025-03-04 15:23:10,254 INFO status has been updated to accepted\n",
- "2025-03-04 15:23:19,170 INFO status has been updated to running\n",
- "2025-03-04 15:29:30,651 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "cac1be2657e643e7b1c7ac4767ae8444",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "6a50f62d51e8045da1538c5fabcf8309.nc: 0%| | 0.00/22.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 12 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 15:29:36,745 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 15:29:36,746 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 15:29:37,269 INFO Request ID is a0ad1303-28b8-46a1-b413-5255290b5cbe\n",
- "2025-03-04 15:29:37,465 INFO status has been updated to accepted\n",
- "2025-03-04 15:29:46,379 INFO status has been updated to running\n",
- "2025-03-04 15:35:57,932 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "550d502bb3834a5bb5658f81e621dc7a",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "7c67de64aea1dfe0794d5a3008cc9624.nc: 0%| | 0.00/23.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing year 2007 \n",
- "\n",
- "Now processing month 01 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 15:36:01,967 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 15:36:01,968 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 15:36:02,360 INFO Request ID is e0d7d0a9-ab37-4f6c-9542-562c085792a7\n",
- "2025-03-04 15:36:02,481 INFO status has been updated to accepted\n",
- "2025-03-04 15:36:11,104 INFO status has been updated to running\n",
- "2025-03-04 15:42:22,339 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "5f9313cb1c7146cba42bd220edc4c0b4",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "11e831d9caecec7e1fb12f5da2becf73.nc: 0%| | 0.00/22.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 02 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 15:42:26,635 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 15:42:26,636 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 15:42:26,962 INFO Request ID is 77ae239a-d853-4366-b5ab-ecbec90ba32c\n",
- "2025-03-04 15:42:27,070 INFO status has been updated to accepted\n",
- "2025-03-04 15:42:35,718 INFO status has been updated to running\n",
- "2025-03-04 15:46:46,707 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "9390479c31134c29a8fc7f3280bec2c5",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "fbe37db7e9e90292a0bb04f046113bcc.nc: 0%| | 0.00/19.4M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 03 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 15:46:51,132 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 15:46:51,133 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 15:46:51,418 INFO Request ID is 11dac302-36ed-485c-acdb-663ecdfb3b2e\n",
- "2025-03-04 15:46:51,524 INFO status has been updated to accepted\n",
- "2025-03-04 15:47:00,159 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Max retries exceeded with url: /api/retrieve/v1/jobs/11dac302-36ed-485c-acdb-663ecdfb3b2e?log=True&request=True (Caused by NameResolutionError(\": Failed to resolve 'cds.climate.copernicus.eu' ([Errno 8] nodename nor servname provided, or not known)\"))], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Max retries exceeded with url: /api/retrieve/v1/jobs/11dac302-36ed-485c-acdb-663ecdfb3b2e?log=True&request=True (Caused by NameResolutionError(\": Failed to resolve 'cds.climate.copernicus.eu' ([Errno 8] nodename nor servname provided, or not known)\"))], attemps 2 of 500\n",
- "Retrying in 120 seconds\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Max retries exceeded with url: /api/retrieve/v1/jobs/11dac302-36ed-485c-acdb-663ecdfb3b2e?log=True&request=True (Caused by NameResolutionError(\": Failed to resolve 'cds.climate.copernicus.eu' ([Errno 8] nodename nor servname provided, or not known)\"))], attemps 3 of 500\n",
- "Retrying in 120 seconds\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Max retries exceeded with url: /api/retrieve/v1/jobs/11dac302-36ed-485c-acdb-663ecdfb3b2e?log=True&request=True (Caused by NameResolutionError(\": Failed to resolve 'cds.climate.copernicus.eu' ([Errno 8] nodename nor servname provided, or not known)\"))], attemps 4 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-04 22:18:35,613 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "d2701bf43b734520bd134beaba73eed8",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "bf5ea2b1c1907fc99fe424f97a71a8c1.nc: 0%| | 0.00/23.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Recovering from connection error [HTTPSConnectionPool(host='object-store.os-api.cci2.ecmwf.int', port=443): Read timed out.], attemps 1 of 500\n",
- "Retrying in 120 seconds\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "cfe996ba40fc4ea3985a3248f10a4ac5",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "bf5ea2b1c1907fc99fe424f97a71a8c1.nc: 72%|#######2 | 17.0M/23.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 04 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 22:58:42,093 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 22:58:42,094 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 22:58:42,469 INFO Request ID is 136468ea-fa06-46a2-b99c-03a925f1425f\n",
- "2025-03-04 22:58:42,645 INFO status has been updated to accepted\n",
- "2025-03-04 22:58:51,556 INFO status has been updated to running\n",
- "2025-03-04 23:16:37,990 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "911072bec1b14a26b1a98a5efbabebdf",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "dd07d89f067b8781572528ac2b5bb18c.nc: 0%| | 0.00/21.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 05 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-04 23:16:41,964 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-04 23:16:41,965 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-04 23:16:42,339 INFO Request ID is d625108a-9f82-474b-b46f-20d35ffa72f2\n",
- "2025-03-04 23:16:42,495 INFO status has been updated to accepted\n",
- "2025-03-04 23:16:48,275 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 00:52:09,706 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "51efb5038d984f36ae50dead378614ae",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "4c0f98667c1d4390de9052c89cfe7a69.nc: 0%| | 0.00/23.6M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 06 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 00:52:13,673 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 00:52:13,674 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 00:52:14,051 INFO Request ID is a6f3b1ae-e374-4a33-ad09-6f4e47710096\n",
- "2025-03-05 00:52:14,213 INFO status has been updated to accepted\n",
- "2025-03-05 00:52:19,481 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 02:50:17,290 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "a735255237ab4c4695b0f362bbc07dd2",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "13e378d4e913dd77cd34f6e5ab1aec5f.nc: 0%| | 0.00/22.2M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 07 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 02:50:23,031 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 02:50:23,031 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 02:50:23,379 INFO Request ID is 6b93ef2e-60d8-4ce4-87cb-0739b8dad79f\n",
- "2025-03-05 02:50:23,522 INFO status has been updated to accepted\n",
- "2025-03-05 02:50:28,781 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 04:57:59,083 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "2541377e647d4fe4932557b5de4178df",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "13e7b2f75be1040dc90e501baf0fe901.nc: 0%| | 0.00/22.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 08 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 04:58:16,028 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 04:58:16,029 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 04:58:16,368 INFO Request ID is a3135c9c-e265-4cad-bae3-a9b07ecfe550\n",
- "2025-03-05 04:58:16,537 INFO status has been updated to accepted\n",
- "2025-03-05 04:58:25,390 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 07:37:53,120 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "edd34e27d5034ba79f7fb15abe29e098",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "3664413d6f266e22e4141928d2c8fd13.nc: 0%| | 0.00/23.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 09 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 07:37:56,894 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 07:37:56,896 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 07:37:57,298 INFO Request ID is 988df20f-32f1-4e79-af39-f2929513bd0f\n",
- "2025-03-05 07:37:57,477 INFO status has been updated to accepted\n",
- "2025-03-05 07:38:02,718 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Max retries exceeded with url: /api/retrieve/v1/jobs/988df20f-32f1-4e79-af39-f2929513bd0f?log=True&request=True (Caused by NameResolutionError(\": Failed to resolve 'cds.climate.copernicus.eu' ([Errno 8] nodename nor servname provided, or not known)\"))], attemps 2 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 11:17:00,960 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "6f3645a9aec942aca89ca91059a818b9",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "6305750f2c651fc9ef6aac03b3db5eef.nc: 0%| | 0.00/20.1M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 10 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 11:17:07,077 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 11:17:07,078 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 11:17:07,438 INFO Request ID is 66b5de59-1f90-44d6-8966-3b6114007b64\n",
- "2025-03-05 11:17:07,574 INFO status has been updated to accepted\n",
- "2025-03-05 11:17:13,020 INFO status has been updated to running\n",
- "2025-03-05 11:23:27,799 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "d7edd5713277467b9809e0b3064e9366",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "168a1447d22a5befc8daa3ef3ee5b2f4.nc: 0%| | 0.00/23.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 11 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 11:23:33,353 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 11:23:33,354 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 11:23:33,656 INFO Request ID is d1be8e5c-b3e9-4ae5-9294-e8052262b781\n",
- "2025-03-05 11:23:33,797 INFO status has been updated to accepted\n",
- "2025-03-05 11:23:39,024 INFO status has been updated to running\n",
- "2025-03-05 11:39:55,822 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "4a806d78a9504ffb8977e6c35d92abf0",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "843997243436bef7669cba0f19e4a7e6.nc: 0%| | 0.00/20.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 12 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 11:39:59,368 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 11:39:59,369 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 11:39:59,665 INFO Request ID is d6464ec5-94fc-468e-b993-0feee0f68cfa\n",
- "2025-03-05 11:39:59,785 INFO status has been updated to accepted\n",
- "2025-03-05 11:40:08,406 INFO status has been updated to running\n",
- "2025-03-05 11:46:19,717 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "4d3918e51afe436d94c07fb6966173dc",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "b7b397f90f54f2061e6be9757bacb5c6.nc: 0%| | 0.00/22.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing year 2008 \n",
- "\n",
- "Now processing month 01 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 11:46:23,596 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 11:46:23,597 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 11:46:23,925 INFO Request ID is 068bc61f-efe4-499a-a7ee-c66faea78077\n",
- "2025-03-05 11:46:24,050 INFO status has been updated to accepted\n",
- "2025-03-05 11:46:32,726 INFO status has been updated to running\n",
- "2025-03-05 11:52:44,043 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "3e5204e03d13487eaa64f1dcfcb1b6ea",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "1e34586d8edd7e943717e61930bcb3c1.nc: 0%| | 0.00/22.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 02 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 11:52:48,118 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 11:52:48,118 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 11:52:48,406 INFO Request ID is c997e23c-f539-494d-b6eb-2e96a946643b\n",
- "2025-03-05 11:52:48,538 INFO status has been updated to accepted\n",
- "2025-03-05 11:53:02,358 INFO status has been updated to running\n",
- "2025-03-05 11:53:10,077 INFO status has been updated to accepted\n",
- "2025-03-05 11:53:21,598 INFO status has been updated to running\n",
- "2025-03-05 12:07:09,782 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "66fa743a285f478e8c989a99f3d5e8c9",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "7e5825f226628cc6c730e2f8748875ed.nc: 0%| | 0.00/20.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 03 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 12:07:13,233 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 12:07:13,234 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 12:07:13,515 INFO Request ID is 0f1a5bbc-6e12-48be-93e7-9fcf700ee9a1\n",
- "2025-03-05 12:07:13,647 INFO status has been updated to accepted\n",
- "2025-03-05 12:07:22,414 INFO status has been updated to running\n",
- "2025-03-05 12:13:33,899 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "636b6c0f843245a88627c2a2caba0ad4",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "bb00f6992d38fcb26d8dcbb7f444ba86.nc: 0%| | 0.00/23.0M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 04 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 12:13:38,567 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 12:13:38,568 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 12:13:38,844 INFO Request ID is c9d491f7-d9c8-46df-8879-bb260c1e2b2e\n",
- "2025-03-05 12:13:38,968 INFO status has been updated to accepted\n",
- "2025-03-05 12:13:52,891 INFO status has been updated to running\n",
- "2025-03-05 12:21:57,680 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "92f31a1e528a4f4896e9c68df0f2319b",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "9315fa25477b1198aa064b3f756f300c.nc: 0%| | 0.00/20.4M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 05 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 12:22:01,146 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 12:22:01,147 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 12:22:01,437 INFO Request ID is 576292a5-567c-4777-b6cb-7824da3a60ad\n",
- "2025-03-05 12:22:01,584 INFO status has been updated to accepted\n",
- "Recovering from connection error [('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 12:32:41,612 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "fdb98e79170649fbb993149e015d1d09",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "15424c9a685be2bca1c2d0ae07a8d13d.nc: 0%| | 0.00/23.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 06 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 12:32:47,798 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 12:32:47,799 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 12:32:48,140 INFO Request ID is 8b63ff28-efa7-4682-a59c-921c01d8474a\n",
- "2025-03-05 12:32:49,350 INFO status has been updated to accepted\n",
- "2025-03-05 12:32:58,123 INFO status has been updated to running\n",
- "2025-03-05 12:39:09,438 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "c20cf06de95a4e80acd4b6c83158466a",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "ed9641587361892e6999d3bc2a922a53.nc: 0%| | 0.00/21.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 07 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 12:39:14,687 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 12:39:14,688 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 12:39:14,999 INFO Request ID is 3014599c-4862-458a-b74f-512270b35d57\n",
- "2025-03-05 12:39:15,107 INFO status has been updated to accepted\n",
- "2025-03-05 12:39:24,017 INFO status has been updated to running\n",
- "2025-03-05 12:45:35,571 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "01e9757e5e2f4c9ab41db699df0ba12d",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "40af3114d25a8649d467c2483f969f22.nc: 0%| | 0.00/24.6M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 08 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 12:45:41,725 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 12:45:41,726 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 12:45:41,990 INFO Request ID is 5904eb2a-3fac-4989-a757-b60f25479405\n",
- "2025-03-05 12:45:42,105 INFO status has been updated to accepted\n",
- "2025-03-05 12:45:47,367 INFO status has been updated to running\n",
- "2025-03-05 12:52:02,362 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "05dc57567ea34217b689ffadd4e6c7ef",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "ae20755ce1d1a3deed046dc5bf3acf32.nc: 0%| | 0.00/24.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 09 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 12:52:11,083 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 12:52:11,084 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 12:52:11,555 INFO Request ID is 20ba67b8-f807-4ae1-8d0f-12c23f6b581c\n",
- "2025-03-05 12:52:11,662 INFO status has been updated to accepted\n",
- "2025-03-05 12:52:33,414 INFO status has been updated to running\n",
- "2025-03-05 13:02:40,959 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "4618cfdf1d534baa8abc459e2ccf43ed",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "6e5f1b489fbdece77d6be7f47c2b51e1.nc: 0%| | 0.00/21.1M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 10 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 13:02:47,251 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 13:02:47,252 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 13:02:47,617 INFO Request ID is 58c13e75-441e-49d7-8171-18a2e543822f\n",
- "2025-03-05 13:02:47,772 INFO status has been updated to accepted\n",
- "Recovering from connection error [('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 13:10:23,177 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "97ca272e5245440eb0decc4c84aa8d5b",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "5c3797eb7376b2651ac156c26dc61b96.nc: 0%| | 0.00/21.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 11 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 13:10:27,079 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 13:10:27,081 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 13:10:27,404 INFO Request ID is 1bf29d08-b15a-4fa9-accd-3440dcb00971\n",
- "2025-03-05 13:10:27,529 INFO status has been updated to accepted\n",
- "2025-03-05 13:10:36,343 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 13:23:42,141 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "be144c32decc44d0b516b7b1a5b95ad6",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "f904596a37761289db388e370fb0b1f1.nc: 0%| | 0.00/20.2M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 12 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 13:23:46,075 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 13:23:46,076 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 13:23:46,392 INFO Request ID is 53953d53-4e03-4a0b-b491-334b26ba406b\n",
- "2025-03-05 13:23:46,525 INFO status has been updated to accepted\n",
- "2025-03-05 13:23:51,635 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 13:42:24,578 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "5c500efa026c46538d57148a53c0a09c",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "7d12dfca50361a747aba82fcdf4c5089.nc: 0%| | 0.00/22.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing year 2009 \n",
- "\n",
- "Now processing month 01 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 13:42:28,769 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 13:42:28,771 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 13:42:29,064 INFO Request ID is 40cf7143-f966-454f-b120-974e6ce15883\n",
- "2025-03-05 13:42:29,190 INFO status has been updated to accepted\n",
- "2025-03-05 13:42:34,362 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 13:50:48,121 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "6d5cfe2be62e440392a8f9d09c308ba2",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "5a1e2972ab682c0a998e708a8df457e9.nc: 0%| | 0.00/23.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 02 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 13:50:53,595 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 13:50:53,596 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 13:50:53,876 INFO Request ID is f8167f7c-8213-4ba9-9931-2ba98d325c28\n",
- "2025-03-05 13:50:54,012 INFO status has been updated to accepted\n",
- "2025-03-05 13:50:59,192 INFO status has been updated to running\n",
- "2025-03-05 13:57:13,963 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "6bedd2c60af048c89c1c6f8b308a18d1",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "49c39eac5e37ab9b001f21eff34ca219.nc: 0%| | 0.00/19.9M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 03 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 13:57:18,019 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 13:57:18,020 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 13:57:18,282 INFO Request ID is f47cc7e5-0e6a-4cdb-a640-ea92fb67292d\n",
- "2025-03-05 13:57:18,394 INFO status has been updated to accepted\n",
- "2025-03-05 13:57:23,544 INFO status has been updated to running\n",
- "2025-03-05 14:03:38,474 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "e0c27d2722f347659d757d54c229f413",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "8ab922a2e1b685282e9e4b33aa5cf5f2.nc: 0%| | 0.00/23.4M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 04 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 14:03:45,816 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 14:03:45,817 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 14:03:46,162 INFO Request ID is b018a4c8-6a92-4986-be84-30af66e5737a\n",
- "2025-03-05 14:03:46,303 INFO status has been updated to accepted\n",
- "2025-03-05 14:03:55,006 INFO status has been updated to running\n",
- "Recovering from connection error [('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Max retries exceeded with url: /api/retrieve/v1/jobs/b018a4c8-6a92-4986-be84-30af66e5737a?log=True&request=True (Caused by NameResolutionError(\": Failed to resolve 'cds.climate.copernicus.eu' ([Errno 8] nodename nor servname provided, or not known)\"))], attemps 2 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 14:21:52,638 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "a76dbe6c671f400a83e785e9d2bde4dc",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "a47e7af5675feb4a154920e3d062d2cb.nc: 0%| | 0.00/20.1M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Recovering from connection error [HTTPSConnectionPool(host='object-store.os-api.cci2.ecmwf.int', port=443): Read timed out.], attemps 1 of 500\n",
- "Retrying in 120 seconds\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "fd005bfb4f604d37a4ca8ec8532430c1",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "a47e7af5675feb4a154920e3d062d2cb.nc: 40%|###9 | 8.00M/20.1M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 05 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 14:46:58,948 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 14:46:58,950 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 14:46:59,283 INFO Request ID is 1fb44581-a851-4b47-a087-8356b5adf38f\n",
- "2025-03-05 14:46:59,414 INFO status has been updated to accepted\n",
- "2025-03-05 14:47:04,576 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 15:49:41,591 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "88e0b2278c8748eba001ba1e16bc08b2",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "13855fba0c6c849e7ee62d023b882ac0.nc: 0%| | 0.00/23.4M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 06 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 15:49:47,548 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 15:49:47,549 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 15:49:47,861 INFO Request ID is 2b5ccf1a-ebb0-4578-b51d-6fca8be55173\n",
- "2025-03-05 15:49:47,970 INFO status has been updated to accepted\n",
- "2025-03-05 15:49:57,329 INFO status has been updated to running\n",
- "2025-03-05 15:56:08,797 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "fc7110d17a734a77b27aba929c13a6b9",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "c9d81889505ffcd992d8c10facb325a5.nc: 0%| | 0.00/22.1M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 07 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 15:56:16,252 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 15:56:16,253 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 15:56:16,582 INFO Request ID is db0ab97e-f3a2-43dd-b105-88f64e79465b\n",
- "2025-03-05 15:56:16,689 INFO status has been updated to accepted\n",
- "2025-03-05 15:56:38,851 INFO status has been updated to running\n",
- "2025-03-05 16:02:37,204 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "dcc01ce8069d411fb4621a35eb15e660",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "f80579f2d5a443b7a606a6e2577c41ae.nc: 0%| | 0.00/22.9M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 08 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 16:02:41,428 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 16:02:41,429 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 16:02:41,707 INFO Request ID is b805f8f7-dc50-4e0e-8fcc-8ec1e2a0095b\n",
- "2025-03-05 16:02:41,846 INFO status has been updated to accepted\n",
- "2025-03-05 16:02:47,025 INFO status has been updated to running\n",
- "2025-03-05 16:09:02,579 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "ae4a32abe8044a97bebea4eeb23b7f53",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "8d809ed725c708c1010957c33a5d4f3.nc: 0%| | 0.00/22.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 09 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 16:09:07,792 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 16:09:07,793 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 16:09:08,098 INFO Request ID is 08853daf-b5b7-4a85-90a4-c58bc452691f\n",
- "2025-03-05 16:09:08,222 INFO status has been updated to accepted\n",
- "2025-03-05 16:09:16,868 INFO status has been updated to running\n",
- "2025-03-05 16:15:28,587 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "8555eec07d4a40e7a48e90d118cb41ae",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "8515e5575b4b14ae277ddf48a2a2e33a.nc: 0%| | 0.00/21.1M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 10 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 16:15:34,073 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 16:15:34,074 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 16:15:34,416 INFO Request ID is eec4c50d-6659-43da-9da8-9e3698748a77\n",
- "2025-03-05 16:15:34,546 INFO status has been updated to accepted\n",
- "2025-03-05 16:15:48,485 INFO status has been updated to running\n",
- "2025-03-05 16:21:55,347 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "d87fb11fb96948f28668f64d21072116",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "e2f4f58439cf6dcbe2ad9bdfae67fe11.nc: 0%| | 0.00/23.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 11 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 16:22:06,604 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 16:22:06,605 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 16:22:07,014 INFO Request ID is 79592a28-024c-42fa-8086-6326002dce86\n",
- "2025-03-05 16:22:07,142 INFO status has been updated to accepted\n",
- "2025-03-05 16:22:15,945 INFO status has been updated to running\n",
- "2025-03-05 16:28:27,261 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "51c6f77f369c41659cbfca6cb2d39f11",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "454c17344ed7995f132f3603830024a3.nc: 0%| | 0.00/20.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 12 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 16:28:31,637 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 16:28:31,638 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 16:28:31,950 INFO Request ID is 7f7279b9-7a8c-46af-898a-71a4af709cbe\n",
- "2025-03-05 16:28:32,090 INFO status has been updated to accepted\n",
- "2025-03-05 16:28:37,273 INFO status has been updated to running\n",
- "2025-03-05 16:34:52,454 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "dbead0546c2e400aa8f43e5275557e1a",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "c417fa3c63e2157769c6856f78ad1883.nc: 0%| | 0.00/22.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing year 2010 \n",
- "\n",
- "Now processing month 01 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 16:34:56,298 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 16:34:56,299 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 16:34:56,634 INFO Request ID is 45d7bab5-b775-46c4-9e3b-2f7c7554c09d\n",
- "2025-03-05 16:34:56,766 INFO status has been updated to accepted\n",
- "2025-03-05 16:35:05,454 INFO status has been updated to running\n",
- "Recovering from connection error [('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 16:41:57,289 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "89d53065cdc34ea7aa343324760f3072",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "6f16645d90b7d91a978bb803c6350a51.nc: 0%| | 0.00/22.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 02 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 16:42:09,147 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 16:42:09,150 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 16:42:09,468 INFO Request ID is b0c59bb2-de75-4f84-89be-e84d42bd8ff4\n",
- "2025-03-05 16:42:09,615 INFO status has been updated to accepted\n",
- "2025-03-05 16:42:14,976 INFO status has been updated to running\n",
- "2025-03-05 16:48:29,803 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "d009f12791d94c1689acc765b479ade2",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "b79d505e1c85439bd0c58dc48e48dbbb.nc: 0%| | 0.00/19.6M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 03 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 16:48:34,053 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 16:48:34,054 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 16:48:34,375 INFO Request ID is 9b1eeb69-72bd-47d2-ac24-36eee403fc4a\n",
- "2025-03-05 16:48:34,504 INFO status has been updated to accepted\n",
- "2025-03-05 16:48:39,694 INFO status has been updated to running\n",
- "2025-03-05 16:54:54,632 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "f18c14e16426400fbe2189bb0ad6777e",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "17e87627f6c7655d6d770c35f75fe922.nc: 0%| | 0.00/22.4M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 04 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 16:54:58,538 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 16:54:58,539 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 16:54:58,889 INFO Request ID is 84e0ef88-038b-492f-8f03-3a0439b700c4\n",
- "2025-03-05 16:54:58,999 INFO status has been updated to accepted\n",
- "2025-03-05 16:55:07,675 INFO status has been updated to running\n",
- "2025-03-05 17:01:18,996 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "0e0bd1eca4db40f2b0490c783998421c",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "79fa1f7d510260bd8434c2c04e9f4746.nc: 0%| | 0.00/20.4M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 05 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 17:01:23,362 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 17:01:23,363 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 17:01:23,666 INFO Request ID is 6b91eb56-7c31-4072-bea8-cbcf3a34a13b\n",
- "2025-03-05 17:01:23,795 INFO status has been updated to accepted\n",
- "2025-03-05 17:01:28,985 INFO status has been updated to running\n",
- "2025-03-05 17:07:43,708 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "a548f88c4f084160bf638bdf30ae5313",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "6dd5483f81b93fcb4e2daf8777dae2f8.nc: 0%| | 0.00/23.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 06 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 17:07:47,411 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 17:07:47,412 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 17:07:47,814 INFO Request ID is 44cc0c8e-e089-4d2d-b843-6a89828cf8d2\n",
- "2025-03-05 17:07:47,953 INFO status has been updated to accepted\n",
- "2025-03-05 17:07:56,623 INFO status has been updated to running\n",
- "2025-03-05 17:14:07,963 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "ba39a6121caa495bbd343dd8325b2e88",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "f3734ee429da7bd1ae15e76250a8bbe0.nc: 0%| | 0.00/21.0M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 07 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 17:14:11,673 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 17:14:11,673 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 17:14:11,954 INFO Request ID is 1c12e772-ad36-4145-8a28-fb30eeefc14e\n",
- "2025-03-05 17:14:12,087 INFO status has been updated to accepted\n",
- "2025-03-05 17:14:20,721 INFO status has been updated to running\n",
- "2025-03-05 17:14:25,895 INFO status has been updated to accepted\n",
- "2025-03-05 17:14:33,625 INFO status has been updated to running\n",
- "2025-03-05 17:20:31,925 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "0f8c769de69a4fa2bfaae7f0cf2823d8",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "b4d3d4a02692f503a63ab6104301c5fa.nc: 0%| | 0.00/23.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 08 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 17:20:53,139 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 17:20:53,139 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 17:20:53,421 INFO Request ID is 99f4fa1a-ef94-44ee-a620-d64d99ccd36f\n",
- "2025-03-05 17:20:53,555 INFO status has been updated to accepted\n",
- "2025-03-05 17:21:15,069 INFO status has been updated to running\n",
- "2025-03-05 17:27:13,338 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "ceb7a1102d5d458d9597c1afac60ecff",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "6b7aa232ef4004867ff85ee97fc502f5.nc: 0%| | 0.00/22.6M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 09 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 17:27:16,930 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 17:27:16,931 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 17:27:17,531 INFO Request ID is 92d24c8a-4191-429f-a2ba-d79aa6712671\n",
- "2025-03-05 17:27:17,664 INFO status has been updated to accepted\n",
- "2025-03-05 17:27:22,829 INFO status has been updated to running\n",
- "2025-03-05 17:43:35,034 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "62febdfde78048f38523ca9212f60306",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "99030ec08189b31efe35c93867ef9804.nc: 0%| | 0.00/20.2M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 10 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 17:43:40,812 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 17:43:40,813 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 17:43:41,147 INFO Request ID is c393962a-a55e-4562-ae70-2aff093f54bc\n",
- "2025-03-05 17:43:41,260 INFO status has been updated to accepted\n",
- "2025-03-05 17:44:24,568 INFO status has been updated to running\n",
- "Recovering from connection error [('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 17:56:09,076 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "c74c5d189205420c80e51e235a6d250f",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "14ef1186b080f60a4d19fcd97cfd6226.nc: 0%| | 0.00/22.6M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 11 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 17:56:22,628 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 17:56:22,629 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 17:56:22,903 INFO Request ID is 80a09a39-0dd9-4a25-87f7-24eb9182c78f\n",
- "2025-03-05 17:56:23,042 INFO status has been updated to accepted\n",
- "2025-03-05 17:56:29,059 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 19:33:07,346 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "0e2bba4e5da24c94a93b9c427f6a9dd2",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "a992390493b85e2d48b02b277452b5da.nc: 0%| | 0.00/20.2M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 12 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 19:33:11,289 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 19:33:11,290 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 19:33:11,708 INFO Request ID is 0b3dd28d-dd49-45df-8afb-f522c84116eb\n",
- "2025-03-05 19:33:11,881 INFO status has been updated to accepted\n",
- "2025-03-05 19:33:25,913 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 20:33:42,305 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "516fbbc73a984348adfcfc46e1cfbb9c",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "a57cfe91f5e1f87154c7563f96c107db.nc: 0%| | 0.00/22.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing year 2011 \n",
- "\n",
- "Now processing month 01 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 20:33:48,953 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 20:33:48,954 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 20:33:49,340 INFO Request ID is 892da5fd-5e83-4a2a-87e8-bd308affe3e2\n",
- "2025-03-05 20:33:49,493 INFO status has been updated to accepted\n",
- "2025-03-05 20:33:58,262 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-05 22:41:32,077 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "440f645d70634d6dac1bd0848796a737",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "b14b22381ec5afc41e72644dc8d701dd.nc: 0%| | 0.00/22.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 02 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-05 22:41:42,110 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-05 22:41:42,111 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-05 22:41:42,584 INFO Request ID is 9bc4eec6-2f59-4b0d-814c-7ebd80ef9fc3\n",
- "2025-03-05 22:41:42,748 INFO status has been updated to accepted\n",
- "2025-03-05 22:41:51,545 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-06 00:34:53,968 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "bc173741973d4aa39fd61c64ebef20d1",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "ca76059e2a566add0765ed41603b1fcc.nc: 0%| | 0.00/19.7M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 03 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-06 00:34:57,820 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-06 00:34:57,822 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-06 00:34:58,258 INFO Request ID is 2bf01197-9dfd-45fc-80ec-f336348521d8\n",
- "2025-03-06 00:34:58,463 INFO status has been updated to accepted\n",
- "2025-03-06 00:35:12,587 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-06 03:23:12,625 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "44ef6773414b44d4b31bdcb8afe77ff5",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "45f31eebc002b35a503bb442aa4fbbb5.nc: 0%| | 0.00/23.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 04 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-06 03:23:22,870 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-06 03:23:22,871 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-06 03:23:23,218 INFO Request ID is abb30cb9-1913-4c47-8936-efc61cbacd05\n",
- "2025-03-06 03:23:23,385 INFO status has been updated to accepted\n",
- "2025-03-06 03:23:28,677 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-06 05:00:45,944 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "5b78ded120214d208c82142cb569ea8d",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "fa9b2b7aab0eebb7ef0b613581d2aedc.nc: 0%| | 0.00/21.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 05 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-06 05:00:50,251 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-06 05:00:50,252 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-06 05:00:50,671 INFO Request ID is 44c24dd2-8c3c-4f45-afec-84b626b77e44\n",
- "2025-03-06 05:00:50,822 INFO status has been updated to accepted\n",
- "2025-03-06 05:00:59,609 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-06 06:26:06,792 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "b31301665fd4439ca2ad11f1a15e2fe6",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "4af4a4d4842e6ea5d2baf841bca95c84.nc: 0%| | 0.00/23.0M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 06 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-06 06:26:10,302 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-06 06:26:10,303 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-06 06:26:10,656 INFO Request ID is a125828b-eda3-4068-83f2-1eed3ce5e1a0\n",
- "2025-03-06 06:26:10,827 INFO status has been updated to accepted\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-06 08:01:49,777 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "3598c8fba90d4f2d92cc4706f21edb07",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "d19ac6bccb5025b79ca4c85c08e9f044.nc: 0%| | 0.00/20.1M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 07 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-06 08:01:53,274 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-06 08:01:53,275 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-06 08:01:53,638 INFO Request ID is 5424c6d9-dd21-4a28-a1b1-fef9c28afac0\n",
- "2025-03-06 08:01:53,796 INFO status has been updated to accepted\n",
- "2025-03-06 08:02:02,603 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Max retries exceeded with url: /api/retrieve/v1/jobs/5424c6d9-dd21-4a28-a1b1-fef9c28afac0?log=True&request=True (Caused by NameResolutionError(\": Failed to resolve 'cds.climate.copernicus.eu' ([Errno 8] nodename nor servname provided, or not known)\"))], attemps 2 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-06 10:10:27,455 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "ea9da59b2aa046b79e659bf361a87c96",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "1a4f913cba0a5f56439a442d3fc437f7.nc: 0%| | 0.00/22.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 08 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-06 10:11:06,185 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-06 10:11:06,186 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-06 10:11:06,713 INFO Request ID is 654ab4e7-d72c-490e-b758-dcfaec456c64\n",
- "2025-03-06 10:11:06,854 INFO status has been updated to accepted\n",
- "2025-03-06 10:11:15,738 INFO status has been updated to running\n",
- "Recovering from connection error [HTTPSConnectionPool(host='cds.climate.copernicus.eu', port=443): Read timed out. (read timeout=60)], attemps 1 of 500\n",
- "Retrying in 120 seconds\n",
- "2025-03-06 10:33:19,728 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "f9c90aefb5aa4e26b3a85789b1b7534f",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "593eed1e2f25b17aebd76573a7d633af.nc: 0%| | 0.00/23.4M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 09 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-06 10:33:23,795 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-06 10:33:23,795 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-06 10:33:24,109 INFO Request ID is 4d3ae6eb-7df7-4d8b-b2c5-e9186b5f3d94\n",
- "2025-03-06 10:33:24,238 INFO status has been updated to accepted\n",
- "2025-03-06 10:33:45,855 INFO status has been updated to running\n",
- "2025-03-06 10:39:44,564 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "4d7b005b6c504001916fcb07e8202c63",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "bac1c442f6c1d6acc8462fbb619209fc.nc: 0%| | 0.00/21.2M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 10 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-06 10:39:49,533 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-06 10:39:49,534 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-06 10:39:49,833 INFO Request ID is e10135dd-3323-4f07-88e0-4d5b85a600eb\n",
- "2025-03-06 10:39:49,945 INFO status has been updated to accepted\n",
- "2025-03-06 10:40:25,364 INFO status has been updated to running\n",
- "2025-03-06 10:44:12,177 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "2037de160e8a4ab49ccce1c2566e9e2d",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "a08d6791e29bf9ca54e72673eff8832e.nc: 0%| | 0.00/23.5M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 11 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-06 10:44:29,941 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-06 10:44:29,941 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-06 10:44:30,416 INFO Request ID is 3c82d50e-08af-463c-88d6-24fbf4381eae\n",
- "2025-03-06 10:44:30,628 INFO status has been updated to accepted\n",
- "2025-03-06 10:44:53,721 INFO status has been updated to running\n",
- "2025-03-06 10:50:54,038 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "bd17a9a226784c87b2ed251cdd29ce51",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "c0c85103951cd02a36abf4cddb9df38a.nc: 0%| | 0.00/20.9M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 12 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-06 10:51:01,012 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-06 10:51:01,013 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-06 10:51:01,330 INFO Request ID is 440220ff-e711-426f-8e40-2f392bce5cbb\n",
- "2025-03-06 10:51:01,461 INFO status has been updated to accepted\n",
- "2025-03-06 10:51:16,013 INFO status has been updated to running\n",
- "2025-03-06 10:57:22,301 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "d9b019aff1dc496398c10c8b3c97273c",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "30c65ee5e68ffea1795a8156cd51650c.nc: 0%| | 0.00/23.3M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing year 2012 \n",
- "\n",
- "Now processing month 01 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-06 10:57:28,703 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-06 10:57:28,704 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-06 10:57:29,446 INFO Request ID is f1646491-2f20-4da2-abf2-731dc1d9f843\n",
- "2025-03-06 10:57:29,585 INFO status has been updated to accepted\n",
- "2025-03-06 11:03:51,419 INFO status has been updated to running\n",
- "2025-03-06 11:15:54,705 INFO status has been updated to successful\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "d3ae9541ef874543b5a89319c19c23c4",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "357563ffc71c1b2eb9fc66bdfb45d7f1.nc: 0%| | 0.00/22.8M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Now processing month 02 \n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-03-06 11:15:58,899 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-03-06 11:15:58,900 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-03-06 11:15:59,253 INFO Request ID is 754d82e7-8b3e-44ac-b487-59af4669ed00\n",
- "2025-03-06 11:15:59,369 INFO status has been updated to accepted\n"
- ]
- }
- ],
- "source": [
- "for year_str in query_years_str:\n",
- " # Track progress\n",
- " print(\"Now processing year \", year_str, \"\\n\")\n",
- "\n",
- " # For each year, the query is divided into each month sections. \n",
- " # If a request is too large, it will not be accepted by the CDS servers, \n",
- " # so this division of requests is required.\n",
- "\n",
- " for month_str in query_months_str:\n",
- " # Track progress\n",
- " print(\"Now processing month \", month_str, \"\\n\")\n",
- "\n",
- " # The below is the formatted API request language. All of the inputs\n",
- " # specified below in proper formatting can be identified by forming a \n",
- " # request using the Copernicus CDS point-and-click interface for data\n",
- " # requests. https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-land?tab=form\n",
- " # Select the variables, timing, and netcdf as the output format, and then \n",
- " # select \"Show API Request\" at the bottom of the screen. \n",
- " \n",
- " # Note that the argument in the download() function is the file path and \n",
- " # file name that data will be exported to and stored at. If using a loop, \n",
- " # ensure that the unique features of each request are noted in the output.\n",
- "\n",
- " # Note: need to create \"ERA5_Out\" subfolder on your path\n",
- " \n",
- " dataset = \"reanalysis-era5-land\"\n",
- " request = {\n",
- " \"product_type\": \"reanalysis\",\n",
- " \"variable\": [\"2m_dewpoint_temperature\",\n",
- " \"2m_temperature\",\n",
- " \"skin_temperature\"], \n",
- " \"year\": year_str,\n",
- " \"month\": month_str,\n",
- " \"day\": [ \n",
- " \"01\", \"02\", \"03\",\n",
- " \"04\", \"05\", \"06\",\n",
- " \"07\", \"08\", \"09\",\n",
- " \"10\", \"11\", \"12\",\n",
- " \"13\", \"14\", \"15\",\n",
- " \"16\", \"17\", \"18\",\n",
- " \"19\", \"20\", \"21\",\n",
- " \"22\", \"23\", \"24\",\n",
- " \"25\", \"26\", \"27\",\n",
- " \"28\", \"29\", \"30\",\n",
- " \"31\"],\n",
- " \"time\": [\n",
- " \"00:00\", \"01:00\", \"02:00\",\n",
- " \"03:00\", \"04:00\", \"05:00\",\n",
- " \"06:00\", \"07:00\", \"08:00\",\n",
- " \"09:00\", \"10:00\", \"11:00\",\n",
- " \"12:00\", \"13:00\", \"14:00\",\n",
- " \"15:00\", \"16:00\", \"17:00\",\n",
- " \"18:00\", \"19:00\", \"20:00\",\n",
- " \"21:00\", \"22:00\", \"23:00\"],\n",
- " \"data_format\": \"netcdf\",\n",
- " \"download_format\": \"unarchived\",\n",
- " \"area\": query_area\n",
- " }\n",
- "\n",
- " client = cdsapi.Client()\n",
- " client.retrieve(dataset, request).download(os.path.join(output_dir, \n",
- " \"{}_{}.nc\".format(year_str, month_str)))\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "From the logs, it looks like one month of data takes approximately 10 minutes. In this query, we might end up with 48 hours of downloading for the full 24 years. Clearly this will need to be multithreaded/paralleled to be efficient."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Testing Soil Moisture Downloads"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-05-13 12:55:08,967 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-05-13 12:55:08,968 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-05-13 12:55:09,296 INFO Request ID is 7cc51283-7f15-4176-9cca-5b596a7a2280\n",
- "2025-05-13 12:55:09,840 INFO status has been updated to accepted\n",
- "2025-05-13 12:55:43,013 INFO status has been updated to running\n",
- "2025-05-13 12:56:00,367 INFO status has been updated to successful\n",
- " \r"
- ]
- },
- {
- "data": {
- "text/plain": [
- "'aafb095eaccbe4c07bd91711fa10f7b7.nc'"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "import cdsapi\n",
- "\n",
- "dataset = \"reanalysis-era5-land\"\n",
- "request = {\n",
- " \"variable\": [\"volumetric_soil_water_layer_1\"],\n",
- " \"year\": \"2009\",\n",
- " \"month\": \"01\",\n",
- " \"day\": [\"01\"],\n",
- " \"time\": [\"01:00\"],\n",
- " \"data_format\": \"netcdf\",\n",
- " \"download_format\": \"unarchived\"\n",
- "}\n",
- "\n",
- "client = cdsapi.Client()\n",
- "client.retrieve(dataset, request).download()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "from pyprojroot.here import here\n",
- "\n",
- "ecmw_dir = here(\"notes/prototypes/\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "PosixPath('/net/rcstorenfs02/ifs/rc_labs/dominici_lab/lab/data_processing/csph-era5_sandbox/notes/prototypes/soil.nc')"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ecmw_dir / \"soil.nc\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [],
- "source": [
- "import xarray\n",
- "import os\n",
- "\n",
- "temp_file = xarray.open_dataset(os.path.join(ecmw_dir / \"soil.nc\"), decode_coords=\"all\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
European Centre for Medium-Range Weather Forecasts
GRIB_subCentre :
0
Conventions :
CF-1.7
institution :
European Centre for Medium-Range Weather Forecasts
history :
2025-03-27T17:15 GRIB to CDM+CF via cfgrib-0.9.15.0/ecCodes-2.39.0 with {"source": "tmpznyjyhvw/data.grib", "filter_by_keys": {"stream": ["oper"], "stepType": ["instant"]}, "encode_cf": ["parameter", "time", "geography", "vertical"]}
"
- ],
- "text/plain": [
- " Size: 12MB\n",
- "Dimensions: (valid_time: 744, latitude: 59, longitude: 33)\n",
- "Coordinates:\n",
- " number int64 8B ...\n",
- " * valid_time (valid_time) datetime64[ns] 6kB 2010-01-01 ... 2010-01-31T23:...\n",
- " * latitude (latitude) float64 472B -11.6 -11.85 -12.1 ... -25.85 -26.1\n",
- " * longitude (longitude) float64 264B 42.7 42.95 43.2 ... 50.2 50.45 50.7\n",
- " expver (valid_time) "
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "# Select a specific grid point (e.g., latitude=-1, longitude=0)\n",
- "variable='mean'\n",
- "\n",
- "# note: we can use the isel method to select the grid point. In this case,\n",
- "# we are selecting the bottom-left grid point (latitude=-1, longitude=0) because we're selecting\n",
- "# the smallest value for latitude:\n",
- "# time=0: Selects the first time point.\n",
- "# latitude=-1: Selects the last latitude (bottom-most, as latitude is usually ordered from north to south).\n",
- "# longitude=0: Selects the first longitude (left-most).\n",
- "t2m_mean_point = daily_aggregated[\"t2m_\" + variable].isel(latitude=-1, longitude=0)\n",
- "\n",
- "# Plot the time series\n",
- "plt.figure(figsize=(10, 6))\n",
- "t2m_mean_point.plot(label=\"Daily Mean t2m\")\n",
- "plt.title(\"Daily Aggregated ({}) Temperature at Bottom-Left Grid Point\".format(variable))\n",
- "plt.xlabel(\"Time\")\n",
- "plt.ylabel(\"Temperature (K)\")\n",
- "plt.legend()\n",
- "plt.grid()\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "How does this compared to the disaggregated data?"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- "
"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "t2m_point = ds[\"t2m\"].isel(latitude=-1, longitude=0)\n",
- "\n",
- "# Plot the time series\n",
- "plt.figure(figsize=(10, 6))\n",
- "t2m_point.plot(label=\"Daily Mean t2m\")\n",
- "plt.title(\"Daily Disaggregated Temperature at Bottom-Left Grid Point\")\n",
- "plt.xlabel(\"Time\")\n",
- "plt.ylabel(\"Temperature (K)\")\n",
- "plt.legend()\n",
- "plt.grid()\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "These temperature plots match beautifully! This means our aggregation over the 31 days works!\n",
- "\n",
- "Let's look at the aggregation over a map:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAIgCAYAAADQuf3oAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAA/ptJREFUeJzsnXd4FMX7wD97Pb0XQjoQIHQEqUoHQZrYUYq9YEXxZ/kqigp2xQYqYkWxIFUEkSoiSpXeS0IgCS29XZnfH5ccOXK5S7nLBrKf59nnSXZn552Z3Zt3Z+ad95WEEAIFBQUFBYV6hkruAigoKCgoKDhCUVAKCgoKCvUSRUEpKCgoKNRLFAWloKCgoFAvURSUgoKCgkK9RFFQCgoKCgr1EkVBKSgoKCjUSxQFpaCgoKBQL1EUlIKCgoJCvaRGCurLL79EkiQkSWLNmjUVrgshaNq0KZIk0bt371oW0bP07t3bVheVSoWfnx9Nmzblxhtv5Oeff8ZisdQ47/HjxxMfH293Lj4+nvHjx9eu0Jchx44dsz0HV8exY8fkLq5sfPfdd7z33ntyF8Mpd955J9dcc43t/4ufrVarJSQkhM6dO/P444+ze/du2cq6ZMkSxo4dS5s2bdBqtUiS5DDdypUr8fX1JS0trY5L2LDR1OZmPz8/Pv/88wpKaO3atRw+fBg/P7/aZF9nJCYmMmfOHADy8/M5evQoCxYs4MYbb+Sqq65i8eLFBAQEVDvf559/nkcffdTdxb0sadSoEX///bfduQcffJDs7GzbsymftqHy3XffsWvXLh577DG5i+KQbdu28dVXX/HPP/9UuPbwww8zevRoLBYLWVlZbNu2jdmzZ/PBBx8wbdo0Jk2aVOflnT9/Phs3bqRDhw7o9Xq2bNniMF2/fv248sorefbZZ/nqq6/quJQNGFEDvvjiCwGIu+++W3h5eYns7Gy767fffrvo1q2baNWqlejVq1dNRNQZvXr1Eq1atXJ4bfbs2QIQN910k9vkxcXFiXHjxrktv8sZZ8/mcqGgoKBa6a+99loRFxfn9nLk5+e7JZ+bbrpJdO3a1e7c0aNHBSDefPPNCukLCgrENddcIwCxdOlSt5ShOpjNZtvfEyZMEM66xJ9//lmo1WqRkpJSF0VTEELUag3q1ltvBeD777+3ncvOzmbevHnceeedDu8pKSnhlVdeoUWLFuj1esLCwrjjjjs4ffq0XboffviBgQMH0qhRI7y8vGjZsiVPP/00+fn5dunGjx+Pr68vhw4dYsiQIfj6+hITE8MTTzxBcXFxbarHHXfcwZAhQ/jpp584fvy47fxHH33E1VdfTXh4OD4+PrRp04Y33ngDo9FYoWwXT/GVJy8vj8DAQO67774K144dO4ZarebNN9+s9P6yqZM333yT119/nfj4eLy8vOjduzcHDhzAaDTy9NNPExUVRUBAANdddx2ZmZkV8vnhhx/o1q0bPj4++Pr6MmjQILZt22aXZvPmzdxyyy02GfHx8dx666127QIXpn9Xr17NAw88QGhoKCEhIYwaNYqTJ09WWpeqkpOTw5NPPklCQgI6nY7GjRvz2GOPVXgvJEnioYce4osvvqB58+Z4eXnRqVMnNm7ciBCCN998k4SEBHx9fenbty+HDh2yu7937960bt2aP//8k65du+Ll5UXjxo15/vnnMZvNdmmr+k7Hx8czdOhQfvnlFzp06IDBYOCll14CqvZO9e7dm19//ZXjx4/bTZkBrFmzxuGUe9k78uWXX9rOlf1mdu7cycCBA/Hz86Nfv37VqosjMjIymD9/PmPGjHGZtgwvLy8+//xztFqt3bt++vRpHnzwQZKTk/H19SU8PJy+ffvy559/2tIIIWjWrBmDBg2qkG9eXh4BAQFMmDDBqXyVqupd4LBhw/D19eWzzz6r8j0KtaQmWq1sBLVp0yYxZswYceWVV9quzZgxQ/j4+IicnJwKIyiz2SyuueYa4ePjI1566SWxYsUKMWvWLNG4cWORnJxs9zX58ssvi3fffVf8+uuvYs2aNWLmzJkiISFB9OnTx64s48aNEzqdTrRs2VK89dZb4o8//hAvvPCCkCRJvPTSSy7r4uorfebMmQIQ33zzje3c448/LmbMmCGWLVsmVq1aJd59910RGhoq7rjjjgplu/hr9+IR1OOPPy58fHxEVlaWXbpJkyYJg8Egzpw5U2nZyr5M4+LixLBhw8SSJUvEt99+KyIiIkRSUpIYM2aMuPPOO8Vvv/0mZs6cKXx9fcWwYcPs8nj11VeFJEnizjvvFEuWLBG//PKL6Natm/Dx8RG7d++2pfvpp5/ECy+8IObPny/Wrl0r5s6dK3r16iXCwsLE6dOnbenK3o3ExETx8MMPi+XLl4tZs2aJoKCgCs/OFRc/m/z8fNG+fXsRGhoq3nnnHfHHH3+I6dOni4CAANG3b19hsVhsacvapXv37uKXX34R8+fPF0lJSSI4OFg8/vjjYsSIEWLJkiVizpw5IiIiQrRt29bu/l69eomQkBARFRUl3n//fbF8+XLxyCOPCEBMmDDBlq4673RcXJxo1KiRSExMFLNnzxarV68W//77rxCiau/U7t27RY8ePURkZKT4+++/bYcQQqxevVoAYvXq1Q7fkS+++MJ2bty4cUKr1Yr4+Hgxbdo0sXLlSrF8+fJq1cURX3/9tQDEnj17HJbB0QiqjK5duwq9Xi+MRqMQQoh9+/aJBx54QMydO1esWbNGLFmyRNx1111CpVLZ1XH69OlCkiRx4MABu/w++ugjAdi9w65wNYISQojBgweLjh07VjlPhdpRawVV9sPYtWuXEEKIzp07i/HjxwshRAUF9f333wtAzJs3zy6/TZs2CUB8/PHHDuVZLBZhNBrF2rVrBSD+++8/27Vx48YJQPz444929wwZMkQ0b97cZV1cKajffvtNAOL11193eN1sNguj0Si+/vproVarxblz5+zK5kpBHT58WKhUKvHuu+/azhUWFoqQkJAKCu9iyn747dq1s5uqeO+99wQghg8fbpf+scceE4BtSjYlJUVoNBrx8MMP26XLzc0VkZGRTqc2TSaTyMvLEz4+PmL69Om282XvxoMPPmiX/o033hCAOHXqlNM6lefiZzNt2jShUqnEpk2b7NL9/PPPFaaIABEZGSny8vJs5xYsWCAA0b59eztlVNZeO3bssJMNiIULF9rJuueee4RKpRLHjx8XQlTvnY6LixNqtVrs37/fab2dvVOVTfFVV0EBYvbs2XZpa/r7LOOBBx4QXl5edm1bvgzOFNTNN98sAJGRkeHwuslkEkajUfTr109cd911tvM5OTnCz89PPProo3bpk5OTq/1BVBUF9dxzzwmVSmX3Xil4jlqbmffq1YsmTZowe/Zsdu7cyaZNmyqd3luyZAmBgYEMGzYMk8lkO9q3b09kZKTd9MSRI0cYPXo0kZGRqNVqtFotvXr1AmDv3r12+UqSxLBhw+zOtW3btsL0U00QDsJlbdu2jeHDhxMSEmIr29ixYzGbzRw4cKBa+ScmJjJ06FA+/vhjm6zvvvuOs2fP8tBDD1UpjyFDhthNVbRs2RKAa6+91i5d2fmUlBQAli9fjslkYuzYsXbPw2Aw0KtXL7vnkZeXx//93//RtGlTNBoNGo0GX19f8vPzKzwPgOHDh9v937ZtW4BaPZMlS5bQunVr2rdvb1feQYMGOZze6tOnDz4+PhXqP3jwYDtrrbLzF5fNz8+vQj3KFvnXrVtnK1NV3+mydkhKSqpQN3e+U1Xl+uuvt/u/unW5mJMnTxIWFlapJZwzHP3OZs6cSceOHTEYDGg0GrRaLStXrrR73/z8/Ljjjjv48ssvbdO8q1atYs+ePVX+/VSH8PBwLBYL6enpbs9boSK1suIDq3K44447eP/99ykqKiIpKYmrrrrKYdqMjAyysrLQ6XQOr585cwawdoZXXXUVBoOBV155haSkJLy9vUlNTWXUqFEUFhba3eft7Y3BYLA7p9frKSoqqm31bJ1WVFQUYO3cr7rqKpo3b8706dOJj4/HYDDw77//MmHChAplqwqPPvoo/fr1Y8WKFQwcOJCPPvqIbt260bFjxyrdHxwcbPd/WftWdr6sXTIyMgDo3Lmzw3zLK73Ro0ezcuVKnn/+eTp37oy/vz+SJDFkyBCHdQ4JCbH7X6/XA9SofcrIyMjg0KFDaLVah9fL3p8yatouZURERFSQERkZCcDZs2dtZarKO12GIwtET7xTrvD29sbf39/uXHXrcjGFhYUVfodV5fjx4+j1etuzeeedd3jiiSe4//77efnllwkNDUWtVvP8889X+CB6+OGH+fDDD5kzZw733nsvH374IdHR0YwYMaJGZXFGWf088UwUKlJrBQXWRdcXXniBmTNn8uqrr1aarmzBfNmyZQ6vl5mlr1q1ipMnT7JmzRrbqAkgKyvLHcWtFosWLUKSJK6++moAFixYQH5+Pr/88gtxcXG2dNu3b6+xjL59+9K6dWs+/PBDfH192bp1K99++21ti+6S0NBQAH7++We7ulxMdnY2S5YsYfLkyTz99NO288XFxZw7d87j5SwjNDQULy8vZs+eXel1d1KmwMtT9uVcpoCr+k6X4Wh04Y53qqzjvNgwqDKl4qgc1a2Lo/u3bt1aleLakZaWxpYtW+jVqxcajbVL+vbbb+nduzczZsywS5ubm1vh/qZNmzJ48GA++ugjBg8ezKJFi3jppZdQq9XVLosryt53d79rCo5xi4Jq3LgxkyZNYt++fYwbN67SdEOHDmXu3LmYzWa6dOlSabqyH0/ZV3cZn3zyiTuKW2W++OILfvvtN0aPHk1sbGylZRNC1Nqy55FHHuH+++8nOzubiIgIbrzxxlrlVxUGDRqERqPh8OHDFaZ7yiNJEkKICs9j1qxZFSzaPMnQoUOZOnUqISEhJCQkeFxebm4uixYtspvm++6771CpVLYPlqq+086ozjul1+sdfr2XWYvu2LHDzqpt0aJFVS5HbevSokULvv/+e7Kzs6u8b7CwsJC7774bk8nEU089ZTsvSVKF923Hjh38/fffxMTEVMjn0UcfZeDAgYwbNw61Ws0999xT7fJXhSNHjhASEuJwdK3gftyioABee+01l2luueUW5syZw5AhQ3j00Ue58sor0Wq1nDhxgtWrVzNixAiuu+46unfvTlBQEPfffz+TJ09Gq9UyZ84c/vvvP3cV147CwkI2btxo+/vIkSMsWLCAJUuW0KtXL2bOnGlLO2DAAHQ6HbfeeitPPfUURUVFzJgxg/Pnz9eqDLfffjvPPPMM69at43//+1+l0yzuJD4+nilTpvDcc89x5MgRrrnmGoKCgsjIyODff//Fx8eHl156CX9/f66++mrefPNNQkNDiY+PZ+3atXz++ecEBgZ6vJxlPPbYY8ybN4+rr76axx9/nLZt22KxWEhJSeH333/niSeeqLGScERISAgPPPAAKSkpJCUlsXTpUj777DMeeOAB2wdLVd9pZ1TnnWrTpg2//PILM2bM4IorrkClUtGpUyciIyPp378/06ZNIygoiLi4OFauXMkvv/xS5frWti69e/dGCME///zDwIEDK1xPSUlh48aNWCwWsrOzbRt1jx8/zttvv213z9ChQ3n55ZeZPHkyvXr1Yv/+/UyZMoWEhARMJpPDNkxOTmb16tXcfvvthIeHV6nOx48fZ9OmTQAcPnwYsM4ogPX30alTJ7v0GzdupFevXjVaZ1OoATWxrChvxecMRxt1jUajeOutt0S7du2EwWAQvr6+okWLFuK+++4TBw8etKXbsGGD6Natm/D29hZhYWHi7rvvFlu3bnVokeTj41NB9uTJk11a5AhxwVqr7PDx8RGJiYnihhtuED/99JOddVwZixcvtpW/cePGYtKkSTZrv/JWVFWx4ivP+PHjhUajESdOnHBZbiEqt44qs+j66aef7M5X9twWLFgg+vTpI/z9/YVerxdxcXHihhtuEH/88YctzYkTJ8T1118vgoKChJ+fn7jmmmvErl27KtSnMhmVWZk5w5GFZV5envjf//4nmjdvLnQ6nQgICBBt2rQRjz/+uEhPT7el4yJz8Oq2V5nsNWvWiE6dOgm9Xi8aNWoknn32WZspdBlVfafj4uLEtdde67CuVX2nzp07J2644QYRGBgoJEmye8dPnTolbrjhBhEcHCwCAgLE7bffLjZv3lzl30x16uIIs9ks4uPjK1hwlrV72aFWq0VQUJC44oorxGOPPebQFLy4uFg8+eSTonHjxsJgMIiOHTuKBQsWOPxNlfHiiy8KQGzcuNFpOctT9r46Oi7+nR46dMihlaOC55CEcGA+o1DnlJSUEB8fT8+ePfnxxx/lLk6Dp3fv3pw5c4Zdu3bJXZRLirfffptXX32VtLQ0vLy86lR2p06dkCTJNiJyN88//zxff/01hw8ftq2VKXgWxZu5zJw+fZr169fzwAMPkJGRYWeEoKBwqTFhwgQCAgL46KOP6kReTk4OGzZs4Nlnn2XLli0899xzHpGTlZXFRx99xNSpUxXlVIcoLS0zv/76K3fccQeNGjXi448/rrJpuYJCfcRgMPDNN99UcJXlKbZu3UqfPn0ICQlh8uTJjBw50iNyjh49yjPPPMPo0aM9kr+CY5QpPgUFBQWFeokyxaegoKCgUC9RFJSCgoKCQr1EUVAKCgoKCvUSRUEpKCgoKNRLFAWloKCgoFAvURSUA6ZNm4YkSTz22GMOr993331IksR7771nd37//v306NGD6OhopkyZYnctPj7eLgpq2VEVF1F1RWX13rt3L8OHDycgIAA/Pz+6du1qC9kBl3a9HdU5Ly+Phx56iOjoaFs054udll5qdX7xxRcrlKfMMztYff+9+OKLREVF2aIy79692y6PS63OCpcBsvqxqIf8+++/Ij4+XrRt27ZCEDQhhJg/f75o166diIqKsgsyKIQQ/fr1EzNmzBCbN28WnTp1EuvXr7ddi4uLE1OmTBGnTp2yO+pL4LPK6n3o0CERHBwsJk2aJLZu3SoOHz4slixZYhdY7lKtd2V1vvvuu0WTJk3E6tWrxdGjR8Unn3wi1Gq1WLBggS3NpVbnyZMni1atWtmVJzMz03b9tddeE35+fmLevHli586d4uabbxaNGjUSOTk5tjSXWp0VLn2UjbrlyMvL47bbbuOzzz7jlVdeqXA9LS2Nhx56iOXLl1cIBgjW3eYdOnSgbdu2REVFkZ2dbXfdz8/P7qu1vuCs3s899xxDhgzhjTfesJ1LTEy0S3Mp1ttZnf/++2/GjRtH7969Abj33nv55JNP2Lx5sy3G0KVYZ41G47BMQgjee+89nnvuOUaNGgXAV199RUREBN999x333XcfcGnWWeHSRpniK8eECRO49tpr6d+/f4VrFouFMWPGMGnSJFq1auXw/ilTpjBgwAC8vb1RqVR2YQ/qM5XV22Kx8Ouvv5KUlMSgQYMIDw+nS5cuLFiwwC7dpVhvZ8+6Z8+eLFq0iLS0NIQQrF69mgMHDtjV61Ks88GDB4mKiiIhIYFbbrmFI0eOAFYvCenp6XbexPV6Pb169WLDhg22c5dinRUuceQewtUXvv/+e9G6dWtRWFgohLB6sy4/7TN16lQxYMAAYbFYhBDWKY2Lp/iEEKKoqMhu6qSMuLg4odPphI+Pj91RHe/ensBZvU+dOiUA4e3tLd555x2xbds2MW3aNCFJklizZo1dPpdSvV096+LiYjF27FgBCI1GI3Q6nfj6668r5HMp1Xnp0qXi559/Fjt27BArVqwQvXr1EhEREeLMmTPir7/+EoBIS0uzu+eee+4RAwcOtDt3KdVZ4dJHmeIDUlNTefTRR/n9998dhqzesmUL06dPZ+vWrS7jwOj1esLCwhxemzRpEuPHj7c717hx4xqXu7a4qrfFYgFgxIgRPP744wC0b9+eDRs2MHPmTLtox5dKvV3VGeD9999n48aNLFq0iLi4ONatW8eDDz5Io0aN7EZcl0qdAQYPHmz7u02bNnTr1o0mTZrw1Vdf0bVrV6BilF0hRIVzl1KdFS4D5NaQ9YH58+fb4tSUHYCQJEmo1Wrx1ltv2f4uf12lUlUam+ZiKhtxyYmrehcVFQmNRiNefvllu/ueeuop0b179yrJqG/1dlXnvLw8odVqxZIlS+zuu+uuu8SgQYOqJKO+1bky+vfvL+6//35x+PBhAYitW7faXR8+fLgYO3ZslfK6VOqscGmhjKCAfv36sXPnTrtzd9xxBy1atOD//u//aNSoUYX59kGDBjFmzBjuuOOOuiyqW3FVb71eT+fOndm/f79dmgMHDhAXF1eXRXUbrupsNpsxGo2oVPbLs2q12jaivBwoLi5m7969XHXVVSQkJBAZGcmKFSvo0KEDYI1PtnbtWl5//XWZS6rQkFEUFFbro9atW9ud8/HxISQkxHY+JCTE7rpWqyUyMpLmzZtXWU5ubi7p6el257y9vfH3969hyWtHVeo9adIkbr75Zq6++mr69OnDsmXLWLx4MWvWrKmynPpU76rUuVevXkyaNAkvLy/i4uJYu3YtX3/9Ne+8806V5dSnOgM8+eSTDBs2jNjYWDIzM3nllVfIyclh3Lhxtn1gU6dOpVmzZjRr1oypU6fi7e1drfAS9a3OCpcBcg/h6isXL5xfTHWnNOLi4hyGlb7vvvtqX1g34qjen3/+uWjatKkwGAyiXbt2dvuBXHEp1PviOp86dUqMHz9eREVFCYPBIJo3by7efvttm4GMK+pjncv2NWm1WhEVFSVGjRplF2rdYrGIyZMni8jISKHX68XVV18tdu7cWeX862OdFS59lHhQCgoKCgr1EmUflIKCgoJCvURRUAoKCgoK9RJFQSkoKCgo1EsUBaWgoKCgUC9RFJSCgoKCQr1EUVAKCgoKlzAzZsygbdu2+Pv74+/vT7du3fjtt99s13/55RcGDRpEaGgokiSxffv2CnkUFxfz8MMPExoaio+PD8OHD+fEiRN1WAvHKApKQUFB4RImOjqa1157jc2bN7N582b69u3LiBEjbAEn8/Pz6dGjh9PgkY899hjz589n7ty5rF+/nry8PIYOHYrZbK6rajhE2QeloKCgcJkRHBzMm2++yV133WU7d+zYMRISEti2bRvt27e3nc/OziYsLIxvvvmGm2++GYCTJ08SExPD0qVLZQ2rctm4OioqKqKkpETuYigoKLgZnU5Xqed5ufFkvyMq8Sav1+srvcdsNvPTTz+Rn59Pt27dqiRny5YtGI1Gu3hgUVFRtG7dmg0bNigKqrYUFRWRkJBQwQ+YgoLCpU9kZCRHjx6td0qqqKiIKC9fzuOZaTBfX1/y8vLszk2ePJkXX3yxQtqdO3fSrVs3ioqK8PX1Zf78+SQnJ1dJTnp6OjqdjqCgILvzERERsvepl4WCKikpIT09ndTUVFkcU+7evbvSKLuK7MtLdkOss5yyc3JyiImJoaSkpN4pqJKSEs5j5kt1At5uXs4vwML4vKMV+rTKRk/Nmzdn+/btZGVlMW/ePMaNG8fatWurrKQc4WgEV9dcFgqqjDIrlrqmWbNmsnlsVmQ3DLkNWXZ9x0erxltSuzVPSZjBXPU+TafT0bRpUwA6derEpk2bmD59Op988onLeyMjI63K9vx5u1FUZmYm3bt3r3kl3IBixecG5LQzUWQ3DLkNWbZC9RFCUFxcXKW0V1xxBVqtlhUrVtjOnTp1il27dsmuoC6rEZRcpKenVxoGW5F9ecluiHWWW3Z9R9JIqNw8FSaJquf37LPPMnjwYGJiYsjNzWXu3LmsWbOGZcuWAXDu3DlSUlI4efIkgC0AaWRkJJGRkQQEBHDXXXfxxBNPEBISQnBwME8++SRt2rShf//+bq1XdVEUlIKCgkItkLQqJMm9k1FSNUasGRkZjBkzhlOnThEQEEDbtm1ZtmwZAwYMAGDRokV2kb9vueUWwN7g4t1330Wj0XDTTTdRWFhIv379+PLLL1Gr3Tt1WV0ui31QOTk5BAQEkJ2dLcs8udFoRKvV1rlcRXbdy26IdZZTtty/bWeUlW1eUHN83LwGlS/MXH9+f72sd12irEG5gZSUFEV2A5HdEOsst+z6jkotodK4+VDLaz1XX1AUlBsoKChQZDcQ2Q2xznLLVmi4KGtQbsDLy0uR3UBkN8Q6yy27viNpJSSVm40kLMoIChrYCMpsNvP333+zYMECCgsL3ZZvXFyc2/JSZNdv2Q2xznLLVmi4NBgFNX36dBo1asSwYcOYOHEi1113ndv2duzbt88t+Siy67/shlhnuWXXd9y+/lR6KDQQBZWVlcUzzzzDW2+9RXp6Otu2bWPjxo1s3bpV7qIpKCgoKFRCg1iDmjt3LrGxsYwdOxaAgIAA/P393bbwGxER4ZZ8FNn1X3ZDrLPcsus7yhqU52gQCioiIsLO0aTJZCItLc1t8+pybmZTZDcMuQ1Zdn1HpXa/WbjKrCgouMwUVN5nk1F56VEZ7D3+nv7rPwKKsymYPRlhNJFyNhsJQcCSGeSr7Wc5LUVFTmU4Wrc6XKJFqzMCOPX+62rNqyaeg4+UaNDpTK4TqpzP5tbkC/BIsRq93gxu3kVfJdlFavQGM7j5y9Wl3EIVei9Lncp0KNtSt/vrbe0tPFd33wlveCxvhUuTy0pBVcbZvAJCfb1t/6ecyyYq0A+NukEswTVIlu44SPPIUJqEB7lOrKBQCyS1hOTmEZSEMoKCBmIkcTavgJByCurEuRxiggPcln+C1ui2vKovuwqjJw+RqPVMoLYqydY5l33zjHlM+3W9++Xq5Rk9yS7bRXsrKHiChjGCyi0gMtDX9v/JrFwiA3yd3FE9MsxqYjXy/IAzTGpiZVIUGSYVsTp5Ok1XstPeeRydxv3rJvW5zper7PqOR9aglBEU0EBGUOfyCwnyubATPi4kgL2nTrttH1SBRb5mLKiGW353ky+nbBdWTv5eegxa939/5cs4kJBVtmJVpiADDUJBCSHILrhg/NC/VSI5hcV8ttY9+6B0knwO4XUy9ht6WWXL0+Z6GX8xssqW8R2v70gqySOHQgNQUFuPnWTl7iOMvCLZdi7Ay8Bn44fz/PxVHDl9vtYy4jTyrQPJKlvGNag4maab5JLbkGUrNFwuSwUlzGbbsXTbfga3bUZyoxDrOWFBCAtXJcUwpE0zPl2z2XZOCIvVHNvJIUlSheOQSXvhf4268kPt/HAl29Fx0KSzlsuZXI3a9deapKr8UKsdHgeMWuvfKqnOjwMl6jo3MQc4UCTfT0ZW2cXKPqjKkNQqjxwKl6mCKk/PpFhW7DrM1mMnK1zrl5zA1uOnZCiVgoKCgoIrLnsF1atFPJOG9ODmj3/CctHmxiAfA4cyz3M6J79WMkLU8k1/yCk7VCPfuoRcshtina2ylSm+yiiz4nP3oVADBbVu3TqGDRtGVFQUkiSxYMEC2zWj0cj//d//0aZNG3x8fIiKimLs2LGcPGk/etm/fz89evQgOjqaKVOm2F3btm0bQ4cOJTw8HIPBQHx8PDfffDNnzpypWQ2BCf2vRC2peOTbpWw4lMrR01kADG7TlCZhQXy1YUeN8wbQIl/HoZVx8Vor429ILtkNsc5yy3aFXH3S2bNnAasHGLcbSdTAq8zlSLUVVH5+Pu3atePDDz+scK2goICtW7fy/PPPs3XrVn755RcOHDjA8OHD7dJNmDCBMWPGsHDhQhYvXsxff/0FQGZmJv379yc0NJTly5ezd+9eZs+eTaNGjWrl2FWnUfP7U2M5nZvPXbMX02nKZ0z8/ncsQvD88Kt487cN7E8/W+P8083yzc+nm+STfcoo349ILtkNsc5W2fV3suVS7JMUqka1N4oMHjyYwYMHO7wWEBDAihUr7M598MEHXHnllaSkpBAbGwtYw1906NCBtm3bEhUVRXZ2NgAbNmwgJyeHWbNmodFYi5aQkEDfvn2rW8wKxIYE8MOEmxBmMylnsxn5wQ+M/3wh79wyCLPFwvl89wUwVFBQqDvk6pNycnIAkNS4fUpOseq34vHPouzsbCRJIjAw0HZuypQpDBgwAG9vb1QqFYMGDQIgMjISk8nE/Pnz3baJ1hGxIQH8/uTt/JeSwZVTZtEmOpyuTaJrnJ+8pt7yyY6X0fWOXLLra52Pn81mzsadnC9w7uy4xrIvI1dH9bFPUnCMRxVUUVERTz/9NKNHj8bf3992fsiQIZw+fZqTJ08yf/58myv/rl278uyzzzJ69GhCQ0MZPHgwb775JhkZGW4vW6ivN78+fisf3j6YBY/cXKu8zlrkm2Y7a5Zv6uWsSb4pJ7lk18c670rLpOe0L3hz2d8MfPtbjyips6b6O8VXHTzRJ5U5i3X3oeBBBWU0GrnllluwWCx8/PHHFa7r9XrCwsIqnH/11VdJT09n5syZJCcnM3PmTFq0aMHOnTtdyrTtfzKaKj0wm21HTIAv17ZOJECnvXDeBY72GOVJ5f/XVnqodDqnh6t9Uo4WU/NQud7H5GQvU9nhVK6kcnjkWSq/5q6jMnJlipcjl9wKsoXFdoyfvYjburZmy/N3EhsSwPO/rLLbC3jxUf5eh4cj2ZeBqyN390m7d++ui2I3aDziLNZoNHLTTTdx9OhRVq1aZfelUhVCQkK48cYbufHGG5k2bRodOnTgrbfe4quvvnJ634ESDb4qDc28BCklKoqFhLdK0Ehj4XCJ9YsoQi0QQGbpyKOpzkyaUU2hAC8JotQmDpdYmyVUbUEtCTJKDRESdCYyS1TkCwm9ZPWkcKBETYZRRajaglaC9NLF5Hi9hTMmiTyzhFYSNNEL9pVutAzWCLxUgrQS6/9xegvnTRLZRjUaSdBMb2FfkRoBBKot+KoEJ4xqQCJGayHXLJFlkVABWqybKM2o8FcLAtUWUkrr2lhrodAC58wqkKCll4WDRSpMAvzUghCN4FixtQyNdVAsJNtXenODhaPFEiVCwkcliNAKjpSmjdRaMAmJDCOAimYGC6klEkUWCS+VIEorOFyaNlxr7fAyS9ulid7CSaNEoUXCoBLE6AQHS9slVCPQSMLWhol6C+lGFflm0KkgQWdhf2naAotEtlniZIlka++zJolcs4RGgmYGC3sLHbd3rM5CllkixyyhliDJYM3XIiBQI/BTCVJL00brLORZJLJMEpJktWYra0N/tSBIIzhua0MLhRaJc6Vt2MJg4XCxCqOD9m6kFRgFnClNm2SwcLxERbEFfNQQobHY2jtCK7AAGaXt0tRgIa1ETaEFVu46yJGzOXS9oiMHjFqGd2nHD//uYW+xGgmJRJ2JdJOaAouEThLEas0cKt10W/bOniodHSVozZw2Wz88dJKwtndp2nwL5Jgl0ozW/+O0Zs6ZVeRaJDRAM72ZfcXWdzZIJfBRCU6U5hurtZBtlsgufWeb683sL1ZjAQJUggC1IMWowrBzJ3FxceTl5XH27FkkSbKtEdUWT/RJ77//PgCSSoXkIt5adXF3fpcqbldQZS/CwYMHWb16NSEhIbXKT6fT0aRJE/LzXe9VStKZ8NerkSR1hdAELQ2lo6PSeeSQct7Hy8+vCwu00Nuv6wSpL/x/sUfnlnozLXRmyqxCg8rJjdEJKGeC3rI02JzRbOaXzXtZuH0/LwzvhXejULx1gij1hXK0MNiP5lqqLwSL81UJGpWVV2CVXW5VtWW5e/3VEKG9ENivmeGi8peWyTpaEYRrL+TTxOC4/KWS6aURtnon6J2lhZBy+2jiXaQNKpe2QnuXpi2rd4DXhXyiK2nvMvzL/e+jFjQul7Z5Je0CVuXSqLRdbO1dSVp/tVWZl9HUSb4AYeXSVnhnL0rby89kk51+IoXpf/zLkh2HANDmnKFlfCChzRrx0k/L2bFjJzd3trr3utjbfUu9/f+B5d67GJWF8o60y96lsnr7qy6UyVtlX74WegfvbCk+KkFUuWvNL06rN+Pbpg0A/v7+REVZU5cZItQGT/VJihWf56m2ms7Ly2P79u1s374dgKNHj7J9+3ZSUlIwmUzccMMNbN68mTlz5mA2m0lPTyc9PZ2SkhKXeS9ZsoTbb7+dJUuWcODAAfbv389bb73F0qVLGTFiRLUrV1fsK6neGtRz81Zx71eLOZdXyFM/rnB9gxtlu5N9MrrekUt2fajzXwdTGfDOdyzZcYhBrRIB+N+CNQCE+fkwrltb1h9Kda/seuzqSK4+6dprrwUUZ7GepNojqM2bN9OnTx/b/xMnTgRg3LhxvPjiiyxatAiA9u3b2923evVqevfu7TTv5ORkvL29eeKJJ0hNTUWv19OsWTNmzZrFmDFjqlvUektBsZEH+3ZmQt8raf2/jysEVFRQcEZiWCCvjOzFrrTT/LxlHwB39GjHX4dO0KNpNEazGa26/ioUdyNXnzRq1Cjuu+8+z8SDkjGUTX2i2gqqd+/eTs0ta2OKmZiYyKefflrj++UiWO24zuNmzee/lAxeuq43V8RFkV1YxLEzWczfupcfH7yR6GB/2sZE8NuOQ9zeva1bZdcFwTK63pFLdn2oc6NAPx7tfyVFRhMbDp8gv8TI539u57n5a7i/V0dmrt1KuJ83b9/Yz20eCYJldKnlCrn6JHdMPyo4p0FE1PU0XpXsqgv19eZQ5jlu++QX27lwfx8eHdCVHs2si7/D2iexaPv+GiuoymTXBV6qhie7PtV5+e4jmC2CDU+Po/n/ZgLw85Z9tI4KY9fJ02xNSeeKuEaOsqqBbLdkc1niiSk5ZYrPivLaOeDomSzWHkip8pdXWiV7RN6+ZRB//+8uOidE4aPX8kj/LvzwwA08eU13W5qh7ZJYs+8YJaaabYSsTHZdUGYV15Bk15c6n8krwKDVkF9cgsliHd00CQvk07FDWPH4rQDMXLvNbZtL0+qxqyOFy5fLawQlLGCxOHXd6urLRFKrmbz4TxZs20+orzdXxDdiQp9O9GkRb03g6H5JBWVT/hddbxMbyaqnx/P7rsN8tnYrI9+fy7AOLfjkTqsvsKZRYRSUGMkqLiHMW++8fg5Dy6usFnyu6uVkT1F9prJyW/dJub5fVLKvR25cPg8Hz1NSSaCCtPO5XDH5U/KLjYzr2Z7YyHBWPXMHfad9waiPf+aFYVcB8OvOQ3y/eS+3dW1jl49wsd9PUjlqM6nSPVINHUnygJn5Jfp7dTeXl4JyE2aL4ImBXRncpinrDhxn9Kfzua5jc968sT8+XhWVSJwL9zeSJDGoTVOuaZfEsdPn6fLipwT6GHjxuj6oSnvZmq4VxMnogsZVvS9H2XLWOVJt5K7Pl/Djv7vp3iyG124aSNuYCFQqiS5NookM8CU9O48pi/8EYHyPdry74p8KCqomyBk9WaHhoqhpB1zXsTkfrd6MQDDpmu7889ydHM48T49pX/LXwYrmu+er4f4mPiyIP5+/i38Pn6D7lM/Yd8oaRqSma9nnZXR1VJ16Xy6y5ZL707+76f7Gd2w5dpKXR/Xlm/tv4IqEKLSaC9Z6W15+gL8n38O9V3dkysjePNinE0fPZLE77TQmc+0Uq5zvWX1HMTP3HMoIygE3dkpm7j+7eez730mOCsVLp2Vwm6ZsOHyCa96Zw6JHbqFPy3hb+hyzZLfp0xVJkaH88fR47v58IS/MWwWARM1eyByzRGNtjW6tNdWt9+UgWw652QVF3DN7EZPvuInBsQG0aBTqcEop0NtAoHckb988wHbu3qs70nXqbABu7pzMJ2OvrdFXaY5FonFNK6CgUEOUz6JK+OC2axjfox2tGocT4e/DxiNp7E7LJNBLT7HJ3tOEpga6Ra1SMXlkb7annKJV43BCfL1qVE6NjFZ86gYouybPuqpsOJTK3H92sefkaYqMF96xvafOoNOoGdgqgRaNQquV561XtiLQS8+ka7rxw6Y9fLx6c43KpnzJVo4SUddzKO9dJUQF+nF/7ysqXnAw9L7YfVBViQ8L4sAbj6JWqZAkqUbf5c1kXBNJMsinoOSSXdNn7Yp9p84w8K1v0KhU6LVqSkxmXh7Vl4cGdGH13qMMbZ9EK9/qf0+2jYlg8/N30/TZjwB49pfV3NuzPTpN9TbyNtMra1CVoZiZew5lBOUGauP+Rq/VoFHX/P59RTK6OiqU0e2PTLI95epo/cEUAEwWC7teeZDfnhzDiwvWsOtEBtkFRQT7erGvsGadVkSALwsfusn2/09b9lY7j/rs6kjh8uXyVFAWS6WHsAinh8tQBBZR4RAWLvztJMyBMDk/XKKSKhxCsp6vadiK2iBn+LYy2UJYnB5ul1uu0u5s77uv7siWF+/FR6/ls7Vb6ZYUx529ruCOWQv57u+d9EiKB0l1IQSK3lDp4eg96dsqkczpT9CzWQyPzP3d+eK8g3AtAhfhXMqOBkiZN3N3HwrKFJ9bCJTRDUygjK53AmV0sySXbE+2d/PIUBY9eiujPviBRsH+PDv8agC6No1mVOdkThXXLn8vnZb1pVao5wuKCPI2VPleOd9xhYaLoqDcgK+M7m9klS2jgpJLtqfbu0tiNO+OvoY7P1/I+gMpvHvbYAJKFYmvjLNscr5n9R1lDcpzKONIN3DCKF/PcUJG1zsNUXZdyL0yoTGdEqL4YeNOjpw+f0F2LUdQAP83pDuDWiVWa/QE8r7jCg0XRUEpKNQz4kMD+ezukQA0DQ92a97zt+5nSJumbs2zoaNs1PUcioJyAzEyuoGJ0VmwWAQ3f/wTK/ccqXPZciGX7LqS+3fpWlGZI1iAmOoNehxyOjef91f+y4+b9lTrPjnfcYWGi6Kg3ECuRb6vnRwzPD53Ob/uOMiI9+fW2Ct6Tcg1y1dvuWTX1bO+qnkcSZEhPFsu4nKOyckNVeTTcUO5pnUTnvjxDz6qxqZdOd/x+o4ygvIcioJyA1ky+inbcyafL9f/x8b/3U2H2EiGTf+Ox79fxu+7Drst1EJlZMmooOSSnVVHvvgSw4OZdfdIFm/dZzuX7QYFdU2bprx2fV/mT7iRFxau43RufpXuk/Mdr+9YFYq7zcwVBQWXmRWfEAIhhHPP4BbnUzTChc52FIpAVT4UgcOQGGV5uxjd1CBkRoivF946LT9v3sNPE27i2793cCa3gNs//YVXr+/LhkOpPDqgK+1iwp3LrgES7gln4WzfUGX5u0t2dSn/iGoj3+W9RhPxQf6cLygi42w24f4+YFYhjKX3Odsn4yKcBkCn+Ea0iw7npcXr+WrDDoa1a8acu0dYfzsO3kOVCpfvJ+D0/QeUkB0K1UL5LHIDzWV0A9MxQMOiR2/l+392sm7/cZ68pjuv3difns1i2Z12mp827eGXGngOqArNPeT2pz7Lrku5wT5e9EtO4MUFaxBC0MLLvbIDvQ18tWEHAIv/O8gdXyyh2Oh4mCbns67vSCr3++GrzghqxowZtG3bFn9/f/z9/enWrRu//fab7boQghdffJGoqCi8vLzo3bs3u3fvtsujd+/eSJJkd9xyyy1ua6OaoigoN3BARjcwB4okrkxsTN+WCexIzQBg7j+72HQ0jXE92gHQPDLEQ7Lle33kkl3Xcj+6/Vp+3XGA5bsOc6DIfdM+BSVGVuw5CsCqJ2/jyLQJ7D55mndW/OMwvZzPWsE50dHRvPbaa2zevJnNmzfTt29fRowYYVNCb7zxBu+88w4ffvghmzZtIjIykgEDBpCbm2uXzz333MOpU6dsxyeffCJHdey4rKb45EJO+yazkADB4u0HyCoo4pXr+/LHniPEhATQLiaSPa9OICbYH084JjLLuHdTLtl1LTc62J+kyBDO5hUQV/qs3YG3TsuRaQ9itggiA3wB67RfXrHRYXo5n3V9R+6NusOGDbP7/9VXX2XGjBls3LiR5ORk3nvvPZ577jlGjRoFwFdffUVERATfffcd9913n+0+b29vIiMj3VMBN6F8FrkBfxl32furBblFxfRtmQDA0h0HeaBPJ3akZnAmr4DYkIAaR+utimy5kEt2Xcs1ms38l5JB+9hIt8sO8/OxKSeA6zo056sNO3hkzjL+PZJml1bOZ92QycnJsTuKi53v1jabzcydO5f8/Hy6devG0aNHSU9PZ+DAgbY0er2eXr16sWHDBrt758yZQ2hoKK1ateLJJ5+sMMKSA2UE5QZk9cWnFnz4xybbOtP7K/4hNiSA6zq2sC6se1i2XMjmi6+O5e5JO41aJdGyURiFHrbKHNgqkXdvHsDD3y8nLSuXeRNutF2T81nXdzzh3LUsv5iYGLvzkydP5sUXX6yQfufOnXTr1o2ioiJ8fX2ZP38+ycnJNiUUERFhlz4iIoLjx4/b/r/ttttISEggMjKSXbt28cwzz/Dff/+xYsUK5ERRUG4gxaimpUyGEiklKsb1aM/Li9YhSVbPAz9u2s2WF++tE9kt3bxwX99l16VcIQQfrvwXAJVKIqXQ87Jv7NSSx+auIKugiOzCIgK8rLuD5XzWDZnU1FT8/f1t/+v1eofpmjdvzvbt28nKymLevHmMGzeOtWvX2q5fPItysbXzPffcY/u7devWNGvWjE6dOrF161Y6duzorupUG2WK7zLgVJZ1KC4EBPkYKCgxcjavUOZSKdSW1XuP8fvuw/z9v7vrVO7CR2+mqMTIE3Pl/Xq+VPDkRt0yy7yyozIFpdPpaNq0KZ06dWLatGm0a9eO6dOn29aU0tPT7dJnZmZWGFWVp2PHjmi1Wg4ePOimVqoZl6WCKtsP5ehwiZNYUpXFk2qsMlc9nlQ1Y03ZxZ1yEO8oSmuifWwEyybeRvem0fy4aTf9kxNIDAtwX1ykSsrTWOO6zFhct3lN4jk1lsnVUXm5rmJw1eYQZjM//ruL6zu2JC7ID2E2E6U2XYgtVlxS6SHpdE6PSmM5lR6d4qN4b/Q1LN1xkEOZ52Rt70uB+uhJQghBcXGxbdqu/FRdSUkJa9eupXv37pXev3v3boxGI40aNapVOWqLMsXnBgqEhJ9M4fsKLRL+akGPZjEsf+L2OpYN/jJZ2JfV+3KV+8eeI8zfuo/1z4wvJ7vu2rtzQhS3dW3DxO9/Z9Gjt8jW3gquefbZZxk8eDAxMTHk5uYyd+5c1qxZw7Jly5Akiccee4ypU6fSrFkzmjVrxtSpU/H29mb06NEAHD58mDlz5jBkyBBCQ0PZs2cPTzzxBB06dKBHjx6y1k1RUG7gvFlFhEaeL8xzJokIrTwdxzmzigiZnIjKVe+6kGsyW3hs7u9Mu6GvnTfzum7vwW2b8kfpXik537P6jieNJKpCRkYGY8aM4dSpUwQEBNC2bVuWLVvGgAEDAHjqqacoLCzkwQcf5Pz583Tp0oXff/8dPz8/wDo9uHLlSqZPn05eXh4xMTFce+21TJ48GbVa3jArioJSUKhnrD+YQpHRxJiubWUth4TkcX+OCrXn888/d3pdkiRefPFFh9Z/YLUULG9QUZ9QFJQbaKF3gxfPGiKnZVVLg3xblOWqd13IXbH7MAOSE9Go7b+i67q903PyiAy07pNSLPgqR+6Nupczl6WRRF1zqES+YfBBGV3QHCyWUbZM9a4Luav3HaN/ckJF2XXc3iezcokKsE4DyfmeKTRclBGUGzAJ+b52TDLOwDTEeruS+9y8lSzatp9BbZry1s0DnSeuhPTsPOJCAhzIrtv29tZqySkqLpVdp6IvKeReg7qcUVrBDfg6CMFRV/jJaFnlJ6OLJ7nqXZncrIIiLBbBzNWbGda+OTNXb67UM7grgn282HvqTEXZddzeLaNC2ZGaQbHRJOt7ptBwuSxHUJ7yPQc4jCcVIl3Y7+MqnpQzHMWaspftIB6USoCFqsXqcTMhVbVcrMJeqEqppF4hmtp3mM7iUFVGqLbi+3Ug/SwdX7zg+XnHCatX+bN5hUQF+VVbxuA2Tfnm752M7dnBXrYeJJV1OlkyON6wCSBptU7zF4XON3FbjFaHsT2axRAZ4MvUX9fz9PDeVSg5rt/DyzFyvCRZD3fnqaCMoNzBcZN8a1DHZFz/klW2TOtfjuTGlk7HPXlNd16/sT8zxw6lX3ICn67dUu3895w8zax1W3mwb+cqyfYkWrWap6/twZL/DsrW3pcCkuSBjbqKggIUBaWgUGvyi0sAmDioGxP6XUl0sD8tG4Wx5L8D1c7r5UVrGd21Dddd0dLdxawRRpMZlSRxKPMcxipE6lVQcCeKgnIDjdTy/XCjtPKtf8kqWyfPmoijOgd6G1CrJM7mFdjOHco8Z7fJ9nDmOa55+1v+PpTqNH+VJJF6LqfKsj1Ni6gwDmee48GZc3nrt7/rXP6lQJmRhLsPBUVBuYUS5BuOF8u4di2rbJl0Y7EDSzq1SkXn+Ma0eX4G9325mBmrN1FsNLHkvwPsTssEYMvxU6w/mMLNM34mu7CoQh5CCD5etYk/9hzh6Wt7Vlm2p2keGcKJdx7nxi5tKDA6DmaooOApLksjibrmrFlFmEwxoc6aVIRr5BnByStbIlwG1zuVyf3qnpFMXfInKpXEp2u2cDDjHG2iw20xuaKD/An28UKtkhjw5jfc36cTwT5eZOTk4aXV8s3f/3HsdBaLHx1NxzjHDjrlqrO3Tst5i5oOkSF1LvtSQNmo6zkUBaWg4AYaB/nz0ZhrK73erUk0vZrHoVJJXJUUx9d//Ud2YRGxIQEUFBsZ2LoJ9/fphJ8T6zw5CfI2kHI2W+5iKDQwFAXlBpK05fa7ODBDryquTNQdmaE311lA4NAE3Q4PfJE1lylII0BzQ9XauSam5DYuarOUs9mYiorBJ9y13IudbAqBWq3m5027+WDcMO4Z2M25aN+K5unJQqAqs+7SODEld+Hg01WbWM6fr3Culbfgp00pTu9rqCgbdT2H0gpu4JhJPj1/1CifqffREvlen7qWnZ6VS/IzHzLu+7X43vsqvve+yq/bK7fSO5tXwN8HU8guKGLku3Pwv+cVft60G4CFW/fVqAxHcuXz+dguuSWbjp3knyNpspVBoeGhjKDcQImMxgLyypZvnrykjpf89qWfBayhCQBC/bz5eNUmrm2f5DD9TR/MZeOhEwD0ahHPllceIMDLwMH0s7SvZI3JFXVd5/L4eHszaVA3HpmzjH+ev0u+gtRDrHEe3b0G5dbsLlmUZnAD3pJ8WsJHTtkyujryqcOB452zFjD0nTlcEd+IvLw82sZE8OOEm1i77xgj3vuegxlnK9zTp2Wi7e9XbuxPUmQoEQG+9Gweh69BV6Ny+Gjk+yDwUQm6JjamoHTPl8IF6mNE3csFZQTlBiJksmSzypbvs7ohyD6QfpafNu3m3dHX0C85kaiQQHQSmC0WHujbmf9S0/nwj3+Zfttgu/t6JMXirdOy9ZUHiXHg+LUmRHrJN50bobHgFxvJqew8/u/HP3j9pv6ylUWh4aCMoNzAUaN8ev6IjGtQR2R0dXSkjlzvbDt+iisTGnNP7ytIDA/iaLEKlUpCq1Hz5i0Deaj/lazde6xCYL93ftvAI4O6uk05ARyWcQ3qSImaIG8D79w6kA0uNhs3OFQqzxwKioJSUHCGl07D4dPn2XUi0+H1ga2bciavgM1HT9qd33fqNANaN62LItYp5/OLCPb1krsYCg0ERUG5gQgZXR1FyjjNFimjq6PIOtqw6u9l4ExuAW8v22CVe5GLJYNWQ/vYSGav22o7Z7ZYyC0sRq9x7wizkYxTfGXPemTH5mw4dMLmIUOh1FmsBw4FZQ3KLZiRsG5Gos6H5uaq9tO1CXlRW9muqMGCcNkngTv3OV2MpFIxbfGfPNj/SiaP7IOkUmGWLlhYSaUWfc2jI/hy7RY+uf9GAEwmE2qVirS8Yq7w8naceZBzrwwWb/8K50ryirD4Gqz/iMo/DiSzc5dEksnF9ZyKvgDNFglJLYgLCmBst7a8u3wjs8YPq3izq2epmKcpVAPlbXEDZ8zyNeNpOWWbZJRtrJsvzCOnz3PdFS1tlndnHMi9v38XvLQa7vlkHluOpDHvn11YhMDf271eIc7kF7s1v2rJNl2o961dWrF81+EK624NFcVZrOdQWkFBwQmNAn1JPefcxU/zqDA2vjIBgMGvzea1hWuYfud19G51+a1BASSEBpFVWExhDSMGKyhUlWorqHXr1jFs2DCioqKQJIkFCxZUmva+++5DkiTee+89u/P79++nR48eREdHM2XKFLtr27ZtY+jQoYSHh2MwGIiPj+fmm2/mzJmKIbDrC0218v1Qm+nkW/9qJqOro2ZVdHVUW7o3i2XN3mMX5Ho5HjXEhgby2X3Xk/np8/z3xmPc0qO928vSLLT60XndJrtce/uVjiZzC+Ub0ZVHrj7p7Fnr/jdlH5TnqLaCys/Pp127dnz44YdO0y1YsIB//vmHqKioCtcmTJjAmDFjWLhwIYsXL+avv/4CIDMzk/79+xMaGsry5cvZu3cvs2fPplGjRhQUFFTIp76QKmNE3VQZzcxTjfINwFPryNXR7d3b8eM/uzh62uqfLlXGPjk1S77fQGrJhQ5Tp1Gj16jJrSebdmXvkyQPmJgra3VADYwkBg8ezODBg52mSUtL46GHHmL58uVce21FD89ZWVl06NCBtm3bEhUVRXa2dQplw4YN5OTkMGvWLDQaa9ESEhLo27dvdYtZp8gRp6eMIhmXAYosMta7jgwI28REMLZne3pPnc3YHu0Z0e9qEgxOHLV6kCKTfCNW67O+8LL5GfTkFtUPBSVXn5TjwJhEwb24XU1bLBbGjBnDpEmTaNWqlcM0U6ZMYcCAAXh7e6NSqRg0aBAAkZGRmEwm5s+ff0ktwHrJ6G5IVtkyujryqsMPzHdGX8Psu69j2/FTPP7VIjbL5DDVSyvfaPniZ+1v0JFbVD+m+Fzh8T7JE9N7yhQf4AEF9frrr6PRaHjkkUcqTTNkyBBOnz7NyZMnmT9/PurS8ABdu3bl2WefZfTo0YSGhjJ48GDefPNNMjIy3F1Mt9JIRldHDTbkex3KliSJzomN6ZEUy64Dh9iekl5nsssT5V+JyXpdyL5o35mvQUdOYf0YQbmiIfZJlwtu3Qe1ZcsWpk+fztatW11uNNPr9YSFhVU4/+qrrzJx4kRWrVrFxo0bmTlzJlOnTmXdunW0adPGaZ4HjVp81Vqa6i2kGNWUCAlvlSBSY+ZIibWq4RozAjhdum7URGcizaSmyCJhkASNtWYOl6YNVVtQS4KM0rQJOhOZZjX5QkIvQZzWzIESNSeNKtoaTGglOFW6LhOvNXPWrCLXIqEFmujM7CuxTg0Fqy14qSCtNG2czsx5s4ocs4RGEjTTW9hXpEYAgWoLvirBidK1phitmVyLRJZZhQqwAGpJYBYS/mpBoNpCSqkLosZaC4UWOFdqit7SYOZgsQqTkPBTCUI0Fo6Vpo3SC4ot1qitYI23dLRERYnF6pA2QmOxuTaK1FowC/ivUEOU1kIzvZlUo4oii4SXShCltXC4WF3a3hZQSWSWmmc30Vs4aVRRaAGDCmJ0Fg4WWssXphWogfTStIl6C5kmNfkWCZ0kSNAL9hdZ0+aYoYXBmhdAvN7CWZNEblkbGgR7S/MN0gi8VYK00nWrWJ2FLLNEjkVCLUGSl2BfoYQQEKgBP7Ugtdj6JRutF+SZYeL8vzmfV8jnE++ideNQ9hZI+CMRrJU4VmhVltEGFQVmwTmjQPLS0TLYwKHsYoxmgZ9OTaiXhqPZ1lFHpGSgxGzhbKnpeFKYP8fO5VFituCt0xAa6MPRzCwAIgJ8MAvBjpQMooL9aBoZzImzORQZTXjpNEQF+3E43bpGFubvg7aomIw8a1j5JsG+pOcWkW80oVeriAvy5mCW1agnzEuNRiVxKt/6f6K/lsxCM7kmHVoJEr1gf+kyS7YJWvpAWqE1bcuYKHZkF9O0RGtr732FKizCQpBG4KMSnCjX3tlmiWyzhEpYaK43s79YjQUIUAkC1IIUowrDzp3ExcWRl5fH2bNnkSSJ2NjYSn/vVcGTfdLSpUsB6168Wu3Hc4C787tUkUQt5tIkSWL+/PmMHDkSgPfee4+JEyeiKmfDbzabUalUxMTEcOzYsWrLKCkpoUOHDnTq1ImvvvrKYZqcnBwCAgI48doE/A16j26WdWRds7dYTcuqWLR54KXbW6SmpcENIzhnUwqVbPKtsuxaTFdU9kPdW6iipVctR1FV2Khbxtp9x7jlox+5e8RgxrdpTJOIYCSvyl3+SAFBTvO2+AU7vW7yqejDb1/aGVo0DnV6H4A295zT61KO8+uWcxUtZvfmWxWUKDWM+Pqv//jqr+2sfGqcXTrhZAMxAObK3xffCW9UOFf2287Ozsbfv+Lm5Yupyz6pbdu2zJ07l2P/uxP/Gnqor4ycohLiX5ld5Xpfrri1xxwzZgw7duxg+/bttiMqKopJkyaxfPnyGuWp0+lo0qQJ+fn57iyqWwlXyzfVFS6jqyNZZdfhFN/O1Ayy8gtZ8X/jOJaaSufJM1m992idyS8jPMCnzmXaZF/U/3ZtEs22lPR6v1bsyT7JZsVXtmbk7kOh+lN8eXl5HDp0yPb/0aNH2b59O8HBwcTGxhISYu/CRavVEhkZSfPmzV3mvWTJEubOncstt9xCUlISQggWL17M0qVL+eKLL6pbVAUFtzDkrW84l19I58TGvDZ2BMEa+GPXYfp2TJa7aLLh76WnxGTGZLGgdRFi3tPI1Sd9/PHHLFq0yO31UbhAtRXU5s2b6dOnj+3/iRMnAjBu3Di+/PLLWhUmOTkZb29vnnjiCVJTU9Hr9TRr1oxZs2YxZsyYWuXtSTLNKkJkMpTINDVQ2UYVIXU0glv21Fi6vvgp3jotT8xfz9nU44zqXPfKKTM7XzZP4pklEFLOuv5A+llC/bzR1AOXPHL1SaNGjbJu/PWAayLF1ZGVaiuo3r17V2tYX5053sTERD799NPqFklBwaO0ahzOjVe2IsjHi/WZhaSey+a6K1rKXSxZ2XzsJFcnxdULr9ty9UnKPijPo3gzdwNNZHQ31ERGd0Pyyvbc6Ol8fiHLdx6iV8sEooKsC9QRAb4s2bafE9n5PDqoG50TG3tMfmUkRjg3vPAkTS4auOUWFePv5V5nuJcqnnBNpLg6stLgFFStH7wDq7KTRhXxutIOs45frJPFKuI92FkDldapLmRXZhWWVlI12c7MdUUlFmU3fPAD6dl5PPXDCu7v04nsgiLmbdmLn0FHdHQ0WXkFCJMZlbO1F63zzltoXPz0HJT7VFYOcWGBAFikymULtfO8JRdrRo6unyoWxHtLNl8SiWFB/LRpD2nnc2gc1HCtzACQJPdb6NaDkWl9QJnodAOFMrr8KZTPkO6ylC2EYMuxUyx85Bamj76GgxlnMZot3N+nEyE+3kSEBDHn750YnZhLe4rCEudxnDxJwUVbDW7t0oaezWK57ZNfsHgg1piCAjTAEZQnMMjo8scg4yfG5SbbZLbw3LxVeOu0RAX60jQixG6t6dM1W/BWGZhz3yhZLNcMWvl+roaLRtEatYp3bh1EzMR3OJR5jqRI5wEYL2eUKT7PoYyg3ECMjC5/YnSKbHfx+tK/WLHnCCufGoOXrqJD2B7NYjiXfpK+LRPcLrsqRIdU3LxbV8QaKp7TqlWYLQKtWulGFDyD8ma5gYPF8u0DOVgk3yO8nGQXG03MXL2ZD28fTItGjr01zL5zBEtfesyh8qoLDqWflUUuwIGCirMER06fR6NWESuj4qwXuDvURtmhoCgoBQWAvafOIIBuTaIrTSNJEmql47Cx80QmzSNDlDZR8BjKGpQbCJPR5U+YVr71r8tJ9uncAiICfF3u6wnVyFfnUD/5vJmH6Sq2y5q9x+jTQp7pzvqEJElu3w9WH/aX1QeUTx83oJbxXZLTyczlJDvE14sD6Wc5dibLaTqNjPG3NDKu9WgcvONGsxm9jDGqFC5/lBHUxbjaz+DAuia9WEVQqaGEUzf5rixzamCum2FSEVwVI41ayK5sL1K6USJII2pUbjtqYLFkk+0m2sVEEBPsz58HUogPDQQc75M6VawmUDJXer0MV3uNhMr5T09VUljhXMaZcwRrrV7QJZWzfVAulIaLfVI42gdVYCbIoLZ7Vl2bxTB34y775+fK+v5yDCMheWDN6HJspxqgtIKCAqBWqRjTrS2frNmCxSLYeUIJSOeK1o3DOZQhn+FGfcHd0XQ9YbZ+qaIoKDeQ6GlPDorsOpGdmZtPhL8PD835je6vfsGafcfIKihi6q/reWnROnKLikmU0a1VYqh81nJNvCt2FYE+BrIKimQojUJDQZnicwMZJhWxMu0JyjBKxOrlWReRtd4ekD2hX2c6TL7gGHTYBz/gb9DRPjaSQqOJvw6lMm3cKK4IkcejeEZOAbEhfrLITi8WxHnbf9UHehsoNpkpMppk3UQsO5LKA66OlLEDKCMot5Av30c1+TK6WZK13h6Q3TQ8mKhAP4J9vJgxdggAD/e7kokDu7Lp6En+PnyCh35cxb9H09wvvArky+jqKN9c8SMo0MuAt07LjlRlOlROZsyYQdu2bfH398ff359u3brx22+/2a4LIXjxxReJiorCy8uL3r17s3v3brs8iouLefjhhwkNDcXHx4fhw4dz4sSJuq5KBRQF5QZ0MraiTkarMlnrXU3ZeUUlPP3TH6zZd6zSNKv3Wqf0Fj16M7d3a8v0Wwfx6bqtjPzwRwAe7teZkpISfvh3Ty1KXnN0MgYG1Dkwe9Zq1Nzb+wre+HW9DCWqR8gcUTc6OprXXnuNzZs3s3nzZvr27cuIESNsSuiNN97gnXfe4cMPP2TTpk1ERkYyYMAAcnNzbXk89thjzJ8/n7lz57J+/Xry8vIYOnQoZhl8TpZHUVBuIEFGlz8JMk3vgcz1robsghIjN378Ix+u/JdH5/zG+fyKFnJvLfubm2b8zLu3DqRdTCQAd/Zsz6GpD5EUYbWcG5icyKFDh0jLyq1wf12QECqf1/BEH8cd5u092rF679F6H/r9cmbYsGEMGTKEpKQkkpKSePXVV/H19WXjxo0IIXjvvfd47rnnGDVqFK1bt+arr76ioKCA7777DoDs7Gw+//xz3n77bfr370+HDh349ttv2blzJ3/88YesdVMUlBvYL6PLH0W2c4qMJm6d+TNmi+Dku08Q5ufDrHVbK6T5buNO3rt1EKO7trG7plJJTOjbmbuv6oBKJZGcnIyPTK6O9mecl0UuwL48xx8E2QVF6DQNey+UJKk8ctQEs9nM3Llzyc/Pp1u3bhw9epT09HQGDhxoS6PX6+nVqxcbNmwAYMuWLRiNRrs0UVFRtG7d2pZGLhrwymY9pCampVIV76vPIRGcla0W+7dKTGZu+2w+OYXFLHzkZvwMWqKD/VBJkt3ernu/XEKAl4HhHZIc5nNnz/YAbDxinZPvl2z1niBKSiqVLRU7t25TaZ1flywVp1ZUxmLURflW2R70NOBoD5ekEkhqtV0ocotF0O/1r0gMC7KGPZckp3vDFKrPxVF79Xo9en3FWGM7d+6kW7duFBUV4evry/z580lOTrYpmIiICLv0ERERHD9+HID09HR0Oh1BQUEV0qSnp7uzOtVGGUG5gRAZ3d+EyOhuSNZ6u5BtsQju+GIRmTn5/PLQTfgZrD/q3KISu7hKeUUlLNi2j6/uHmFLUxmd46N4tGdrbr2yVe0rUANCveWLYBvqYNFPpZL49M7h5BWX8NL81TKUqp7gwTWomJgYAgICbMe0adMcFqF58+Zs376djRs38sADDzBu3Dj27LmwVnqx6yQhhEt3SlVJ42kUBeUG9DK2ol7G90fWeruQ/dm6rexIzeCbe0cSUC40+YgOzXlt6V8UlCqpORt30rJRGDHBrtd31CoVg5PjZfvR6jXyNXhl7X1b93bMnXATn6zejNkiYwRLGZFUKo8cAKmpqWRnZ9uOZ555xmEZdDodTZs2pVOnTkybNo127doxffp0IiOt66kXj4QyMzNto6rIyEhKSko4f/58pWnkQlFQbuBkiXxaQpFdkayCIp786Q8Sw4Jo/b+ZvLL4T9u1nScyuDKxMd46LblFxbz2619MHnF1lZXOSaN8P5m0nIrGHXUmu6hy5XNFfBQlJjOHM8/VYYkaBmWm42WHo+k9RwghKC4uJiEhgcjISFasWGG7VlJSwtq1a+nevTsAV1xxBVqt1i7NqVOn2LVrly2NXChrUAqXHYHeBu7o0Y4v/voPnUbNf6X7dD5auYmfNu1l5VNjAPjgj39pFhHM4DZN5SzuJY9GraJtTATr96fQtHsb1zdcbkiS9XB3nlXk2WefZfDgwcTExJCbm8vcuXNZs2YNy5YtQ5IkHnvsMaZOnUqzZs1o1qwZU6dOxdvbm9GjRwMQEBDAXXfdxRNPPEFISAjBwcE8+eSTtGnThv79+7u3XtVEUVBuIF5Glz/xBvnWgWSttxPZ+cUl5BQVo1WraBUVxsP9OrNu/3FeXryOXx8bTWJYEKdz8vngj03Mf/imak3Zxcvo6ighyEc+2T7OLfXGX9WB6cv/ZmzX1qgUP3J1SkZGBmPGjOHUqVMEBATQtm1bli1bxoABAwB46qmnKCws5MEHH+T8+fN06dKF33//HT+/C15J3n33XTQaDTfddBOFhYX069ePL7/8ErWMe+9AUVBu4axJIlonj6I4a5SIlmkvlKz1rkT22bxCbpjxE946LXtefoDIIOuP8Jp35vDU4B5cEd8IgLn/7qZTfCO6OglQ6FiuimiZ9n+dyS8mJlCemFBnii3EeFfeWd3StQ0vL1zL0p0HGdrOsTXkZYtKcr8382oo+c8//9zpdUmSePHFF3nxxRcrTWMwGPjggw/44IMPqiy3Lrg8FZSTxVrhYtlNUrnofCwV7881qaAsaKGzDw5Xpt41MKnONUlQBUu+ykJmOMvbFXb19hSVmC3nmtSgNiMuKve4zxcQ7uvNl3cMs/qHM5uxWAT/HE7jw1sHgdnM8t1HeHbeKm7qlFzteueWdy3lzKQ6L6fya4DK1Zepg3AaeQVFSGUjGSf3S65MvUuKnV/XVlznyBUloNUh6Ryb1ht0Oh4a1J33fv+Hoe2aVzoqFa5+XwoK5VCMJNyArEHsGqpsB+d2pZ3m36Mn+WTMEDvnpVmFRZgsFiIDfDhfUMQdXyzmmcHdeXVUn+rLlbPOMk6dOQpYeDF39e3EvlNn2HAo1fMFqk+UrUG5+1BQFJQ7aCbjWkwzGdegZK233n6UUGw0MfW3vxjWrpmdWTnAmbxCDFoNPjotP23aQ/PIEJ69tgeRAb41kCtfnZNC5Av5nhSgc5nG38vAA30788zPKzEqG3YV3ICioNzA3iL5FhL3Fsr3CGWtd/EF2ftOnWHIBz+Qdj6XV0f2rpD2dG4BAV56luw4xCu//sXjA66suVwZ67zndL58srMq95pRniev6Y7RbGbi3OUNxj+fJ/dBNXSUVlC4pPlpy156v/0tXRKi+O3RW4jwr2jpFhvsT05hMY98/ztTRvZieENbxK9DDFoNPz14E0v/O8gna7bIXZy6oSwelLsPhcvUSKKOCVbLN+0TJKO7ITnrHawW/L77CI/O/Z1v7xpB/5YJlaaNCfbn6GsT0KrVtXZsKmudveT7uQZXw21IdLA/n40fzpjPfuGeXh1RK6MBhRqivDluwEvGVvRWyaeg6rre+cUlbDh8ArPFgtps5ME5y3jzhn5OlVMZPnqdW7xuy/qstfJNL3qrq1fxXi3iMGg1TJy7nGKjyUOlqidIHvDDpxhJAIqCcgtpcrq/KZFRdh3Ve1daJk/PW0WbFz9j0LvfM/ffPfyXVcKZvAKG1LEXCDmf9YkcF+bhnpRdUD0lo1apWPbE7fx1MJX3Vmz0UKkULncuqyk+IYTVA6+zPSIeNtV1ud/IGTUwfBKiijLrc7gNJ+X/dfsB7vp6Kde1T2Lm6EHM+Wc3J85l0661gaubxjDy45948/p+XJkQ5TiD2szlO3pXyoc3cfaV68qKrdCFwYPO4CBPExhLjRW0fhWvl2F0YdBQ5MKnn6N6WcxW+S72b0nlRqlJjcN5fmRvXlm4hqeH9wJAGC+/fVC1id/kLE8FZQTlFmJldH8TK2NUW0/WWwjBrD+3cdfXS/nktmv4ePQgBrRMYGByIl//vQt1fjafjx3CtpQM+r/7HSv3HsNk9nxbyNnecf6uTb09JtuFq6PKaB8byYH0s1jq8weSQr1FUVBuIMssXzNmmeWbq/ZUvc/lFzL+i8W8+usGfrl/FMPbNbNdu6VTSwa2SuDmr3/nyZ9XcWePdgBcN+Nn/th31CPlKY+c7X2+SL4PofMlNVMw4f6+mC2Cc/nyeWL3OB6MB9XQURSUG8iRsdO63GSv2X+cbtO+JL/YyD/PjqdbYmO76yqVxDs39OODscNYue84ob5e7JtyH7d0TmbzsVNuL8/FyNreJfIpqJwaTs3tTsvEW6fFW6d1c4kUGgKX1RqUXKhldH+jlvFDy131NprNPDNvNb/tOsyZvEKmXtebO3u2s4YQN1XslIUQ7E7LRFLBfVd3IMzPh26Jjfnirx08N6SHRwMKytvecsqumfB1+4/Rq0U83vrLWEF5Yt+SsgYFKCMot5Akp/sbg4yy3VBvIQRLdx7mk3XbSDmXw9/PjOOuq9o7VTLTV23m7Z+W8M34YYT5WTfm3tw5mTP5BVwzfS57Tp6udbkqQ872bh7sVSdyTpzN4rt1Wzh5LvuCbP+afcvuO3mGxPBgdxWtfqL44vMYygjKDewvUtPcIM/0y/4iFc1l6jRrW++cwmLu++ZX1h9IIdjHC5UkudyrdDa/kDd//4cZD42lV4zVQ8Si/w5wNr+Q8d3b8sqvf7Hr5BmSo8JqXC5nyNnee88V0tLNSurkuWxe+mE56/cewWyxUGwyczo7D71WzaAOLflu4lir7GwjLQOqPwo6cS6bTpVZWCoouEBRUG5ATsNZOY2jalrvc/mFHDl9nvGzF9EsPJj/XryHYJ+qdbwfrNpMYlggBSUmnl+0jm/+2U1csD/h/j6czi3grRv6cX3H5jUsmWvkbG93u7bLyi+k/4sf0yEhmul3XYdBp0WrVhMbHsznK/7maLkQ7jURvSctk81HTzLl+n7uK3R9RKXyQDwoZXILFAVVkRrM/QZW1f2NB3q3QJVFtl7TVu9q7P36+u+dTJizDICXR/bikb6dnUdgLfdDzS8u4Z2VmwD4at2/xOjgqzuH0SsptmbrTi7LXXE0F1jetZSTTsTV3jTJaHQu2kE8qECNAFPpfbXZbycsCCGY8OlPJDUK45tHbkVVri7CaKJJaCCrdhxAFFs3BwdgQRRbEGbnG3ZFuVhsa/YepXuzGDrFN7I7r6BQVRQF5Qb8ZHQ3dCnJ/mXrPv7v55U8ObAr3ZtGMyDZtYui8mw5ng7AHd3b8upNg/BVCY9vvL4YOdvbX+c+V0ffrt3Chn3H+Of1x+yUUxkFJUa7mFr+VQkIdRELt+5jWAfPjWbrDYqRhMdQWsENpBrl85F2qcj+bedhHvh2GV/eOZzJw6+qtnICaNU4jD7N47jnqg6ckKneqTK6lkrJdTHqqgZr9xxmfJ/OhFcSE6vEZOZ0Tp7Nj15KYfUUc0Gxkb8OpjCiQ4tal1Wh4aIoKAWPIoTg9d82cMcXi5l5+2AGtUqscV4hPl4snHAjrRt7xgCiIdGlWRwrdx6s9Pr4Xleg1ai5evIM/jmYUu38/zlyghBfbxoHOXHJdLmgbNT1GIqCcgPRWvk2UNZ32V9u2MHn6//jjydGc50bjRcay1TvaBldHcX4um9G/sbu7dh+9CTHT593eN2g07Jo0h10S4rjyW+XEONV9Q5TCMHUxX9y19UdPbonTeHyR1FQbiDPIt+PsL7LnvPPLp4c2IXWjcPrXLYnkLO9c93oaNUiBCaLxWnU2xA/b3QaNS2iwskzVX2K79f/DnAo4xyPDerqjqLWfyTJAwELFcUOipGEW8gyq2gk0xd9fZCdei6HuZv2cCjzHEJAmJ83d/Zsx/n8Iv45cpJXHIRhry3ZZhWNNHU/mskySTTSymMokVVsIapiwOAasSc1AyGESyeumTl5tIgK57wRGjlwsH4xJrOFF35ZzdNDe+Jn0LunsPUdT2ysVRQUoIyg3IKcr5K8sgUfrPyXK17+nE1HTxIfEkDT8CD+OZrG1F//Qq9R0yjAlwHvfMe2lHQ3y5YHOfsNd8ru0SKeO/p25v++XeI0Xc8WCcz7ZydGV+FDSvlmw3+YLRbuvKqDO4qp0MBRRlBuoEV5bwp1vCephZs8WIjSDkgIwcHMc8QFB6AvZ2acX1zCz1v2ceRMFp3jGzG0bTO+/2017/zxL3PuGsGwch7HY4L8ue/bpcy8bTB/PD6azlNno0JCVNI2zkzFK7vWwstMldSUs/1CLkx5HcUVa+kDZfujJE3lnhVcxvOpwa7blkHlRiS12QdlEUjAPf26MuiVT8krKMK33GhHmC5YC97erQ0//LWNh975lDkTbiImyL/SbE1mC68uWscbNw9EW4lHkMsyzpGyUddjKK3gBg4Wy9eM7pR9Lr+QGz/5hc5TvyD+mY+44pXZzPpzO99u3Gn9e/12Us/nMHrWQralpPNbWi4Av+85YstDCMHGI2mMbJfEmbwCxsxexPUdW9A22r1rUAeL5TEzP1Ao3xBqf5Z7I+q2j48iNjSQ37btqzSNXqth4RNjCI+J55dNe5zmt+tEBunZefRpGe/Wcio0XKrdu61bt45hw4YRFRWFJEksWLCgQpq9e/cyfPhwAgIC8PPzo2vXrqSkXDBV3b9/Pz169CA6OpopU6bY3btt2zaGDh1KeHg4BoOB+Ph4br75Zs6cOVP92tURJiFfp+Uu2QUlRq6b8TMqSeLI1AmsnDiaZwZ3Z8barXywejMvj+zFukljeLk0MurKfcdQl44wxnVva8tne2oGs//6j74t4lmx5yhbU9L582Aq42YvIvVcjlvKClC9AOTuwyyjqyN3x2OUJIm2cVEcPOXcua6XTkvzqDBOZeU6Tbd42z5C/bzx0ddtYEW5+qSzZ89aEyjOYj1GtRVUfn4+7dq148MPP3R4/fDhw/Ts2ZMWLVqwZs0a/vvvP55//nkMhgsrrBMmTGDMmDEsXLiQxYsX89dffwGQmZlJ//79CQ0NZfny5ezdu5fZs2fTqFEjCgoKalhFz+OvltG7gJtkbzySxraUDN65qT8hPl4kR4VxwxUt2PK/O/nnmTu48YqWSJJEZIAPozo259N128jLtXZYR05n2fJpHxPB1Ot687+Fa+ndPI6VE2/j9ev74qXTMuDd79h4JM0t5fWXyaODv3z7ovHXuX+kfkViNGt2H3aZLj7Qm5W7Dts27l7Mr9v38+Hv//DrxNtcOvx1N0qfdPlS7TWowYMHM3jw4EqvP/fccwwZMoQ33njDdi4x0X5zZlZWFh06dKBt27ZERUWRnW11679hwwZycnKYNWsWGo21aAkJCfTt27e6xaxTgqrqi68ey+6VFMuV8VH8uHkvEwd0qTSdWqXiy/HDADiWlc+WI6n0Toq1XZckiYf6dGJ7agZTf/uLGbdZ35WCEiPf/bubge99z7JHb6F7k+halVeuNg/SyPcxEqx3f8d/Y7d2vPjjcjYdSqVz05hK041qm8Cc37Vc9953zHnwRoLKOfc9nHGOu2ctYMYdw2nl5u0EVUGuPiknp3RGQHF15DHc2goWi4Vff/2VpKQkBg0aRHh4OF26dKkw5J4yZQoDBgzA29sblUrFoEGDAIiMjMRkMjF//nyn+zPqG8dL5Pusrky2yWzh3RX/MPrT+S5NiQ9nnketUpFVWETLyNAqyy7yCuD6ji0I8fWucO2Zwd35ecs+/jmSxryt+/jir/9s1576eVWtn+9xmVwdHS+Wb+rlmBtdHZUR5u9DbGgQx8843rBbRqZFx5JJY/E16Eic+A5D3/6GwhJred5dtoHhHVswqnOy28tXWxpqn3S54FYrvszMTPLy8njttdd45ZVXeP3111m2bBmjRo1i9erV9OplXb8YMmQIp0+fJicnh7CwC25runbtyrPPPsvo0aO5//77ufLKK+nbty9jx44lIiLCnUW9bMkpLObLv/7j8z+3YRGCY2ezOZ2bT0SAL9tT0tmWms6prDxOZedxMiuX1HM57D11hqkje3Mw8xxNI4LcUo4mYUGM6dqGAe99D4BaJfHqyF58s3EXO9IyOZdf6FCxKdQtizfvITM7j0HtXHv58DXo+X7CTexJO834T+bx5HfL6NUygR827uT3p8d7vrA1wJN9kpdX6ShS8oAVnzKCAtysoCylLvVHjBjB448/DkD79u3ZsGEDM2fOtL0MAHq93u5FKOPVV19l4sSJrFq1io0bNzJz5kymTp3KunXraNOmjTuL65gamO821ogLZsN1/GI11l4o79r9xxk/exFNwoN4YfjVDGuXRLNnP+Lo2SwMWjVXvf4VPZvGEBcSQFSgL+2iw4gM8OWnzXt5dsEa7urRjmahgVDF0AhRaqPVrL6SH+e063qTlpWLv0FHjyaNubVzMvf0aMfxs9mEeBtscoSTgbwjU29rval1W1eWt+26ruJif7RKIJV69nZ03YbBRXwrVx2aAxP26CA16Ep/ss7qXsUvfbPFwtNzfmXKLYPw8ypnwu7g+TfWAhZr6Pc20eE82P9KHvlmKVuPnWTi4O50iI2smtw69jHnyT5p6dKl1gTKRl2P4VYFFRoaikajITnZfqjfsmVL1q9fX+V8QkJCuPHGG7nxxhuZNm0aHTp04K233uKrr75yet+BEi2+Ki3NDBZSjGpKhIS3ShCpMXOkxFrVCK0FAWSarT/wpjozaUY1hQK8JKuPt0Ol02ZhagsqCTJM1rSJWjMZJhX5QkIvQZzWzIESNdlmFU10JrQSnDJZ743XmTlrUpFrkdBKgiY6C/tKTaOD1Ra8VJBmtOYbpzNz3qwixyyhkQTN9Bb2FakRWGMu+aqEzXt3jNZMrkUiy6xCVXo93aTi8Oksnlu8kRdG9aNru9aARJFkYXi3jvx+Mh+/yGhaNgolOj6Bu/t2JtJLQ4jawjGjmrgsM4FHMijUGNhX2k5JWhPHTBpKBHhLggiNmaPG0jZUmzEjcbhEQ4DaQlO9hVSjmmIh4aUSNLK1t4YZd40CiyDTrOKoBRK1Jgwh4ewrkdBLghiNmUPF1nxD1RbUkiCjtA0TtCYyzSryLRI6SZCgs7C/tA0lBAI4WdqG5du7rA33Fjlu71idmSyzitwSFWpJkGQQ7CtUlba3wE8tSC1RIZkE0QbIM0OW0dpnBGkgvVhgFhAgzATrVBzLt+4hi/ZWUWASnCsRUFBMyyAdh3JKMJrBT6ci1KDmaI51WizKT0eJWXCmyGp00DxIz7GcEorNAh+Nikh/NYezigCI9NFiFnDwfCGBeg3Ngr1IPZdPocmCl0ZNdICBg2fzrc/GV49UYiQj3yqnSZCBU3klFBgt6DUq4vz1HMixlvf06UwsGh2dOnRgT46ZRB8VmcWC3ALQqiDRAPtL7QAE0Bg4WTrFeUuPjvS6oh1GlRa1ZFVMewskhEUiSAM+asGJ0rSxekG2WSLbZNVPzb0E+wslLAICNBCgFqQUS+h37iQuLo68vDzOnj2LJEnExl5Y36wJnuyT3n///VqVTcE1blVQOp2Ozp07s3//frvzBw4cIC4ursZ5NmnShPz8fJdpk3RG/HUqJElNos5+A2sLvbUjKNv4GaK5cD3+orQt9fb/B6sv/B97kbPQlnoze4shrHTxPLBcvhc7Fm150aZa/3L5eqss1q/UsvJelLZlubS+amFzb7S3SE2iupibP/+B0V1ac0eXZMrHum0ZaGDVzp083SWRO3u0Y9LPK7m7bTTRiY1t5Q8pySErKwt9SQEtdBestBK19hZb5a+B4IzaYj0nqUiopL0BkCwElzNqiNM6SQsEqS/8H6t23IZ7i9QEqC0EqGvW3j4qC5L6wldqC6+L7vWyIBms1/000Kh0gLE3T9DS13pe5WVVgMkB5eImaSHSCyjd+NoswN7dT3Jw6f+lI6hw7wv3NimfVqMiOdR+CvR0gdF2LiHY/lpy+AWv4ZLaTIjXhZcpPsDeR1FyqSni6pR8NGYjbQIvpI31lijvbrBlqWulvfkXlEmpFJr6aigfa7elt7DbjN3S+8LfPmpBVLkBZ3Mv+9FWS2+BT+kMib+/P1FR1jDxNkOEGuLJPslmxacYSXiMaiuovLw8Dh06ZPv/6NGjbN++neDgYGJjY5k0aRI333wzV199NX369GHZsmUsXryYNWvWuMx7yZIlzJ07l1tuuYWkpCSEECxevJilS5fyxRdfVLeoDYa3lv+Nt07D4w6s7w6knyUhJJCRH/1E6rkc3ryhH1cmRNmlebT/ldzQPolwPzc5elO4JEiMCOb4mfNkFxQR4F0FR3v1FLn6pI8//phFixZ5sGYK1VZQmzdvpk+fPrb/J06cCMC4ceP48ssvue6665g5cybTpk3jkUceoXnz5sybN4+ePXu6zDs5ORlvb2+eeOIJUlNT0ev1NGvWjFmzZjFmzJjqFrXOaKFzj7uh6lJQYmTJmg18sHITv0+8DY264ldXi0ahTPrpD9pGh/PPc3fgrXPsnicqsPpxe5pr5douCy308rR5Cxl1eMsQF+ta1SQuLJhOTWJ4+tslvHfHSDvXVhfToh7bs8jVJ40aNYr77rtPWYPyIJK4DGwnc3JyCAgIIHXag/gb9E4Xvz0RIvxQsZqmZR1mDYbmxUYTGrUKdRUtgSwWweRFa/l49Wb6denEpG5JdL5oVFSeU1m5hPt5Oc1fmKrf4R82amiiNble8HdldOHk/sqe5aFiFU31tdsL5dJIwoE37kMFgqbeZVN8TnptDxhJHDxfSLMga75CX3n+UnGh87zPn7X9mXImi+vf/JITZ7MYeWVrnhjWm8TAivU6VAhNvXBpCCGqaGDjCJ97Xqlwruy3nZ2djb9/5X4A5aCsbBnzP8bfx70fDzn5hURc92C9rHddojiLdQM12Z2ycu9R/u/nlWQVFJGZm4+ERKivNxH+PkT4+xBeevjqtZzKyuPeXh1p2SiU5+avZvaf28kvMfLCsKsY2rsnLQ3OO4VGgX61cy5aCUYZP22MMrmXcmNIpurL9oCfpdjQQP597VH+PZTKV2s20eXZ6fz6xBiuvGgjtZz1rvcozmI9hqKg3IBfDdzuzFizhb4t4rmxUzKNg/ywCEFGTj4ZOfmkZ+eRWfr3yfO5aNQq+r75DVcmRnEg/RxLH7uV1o3D0WnUnCjxQIWqiK9M7oagZm3uDtwY1Lba+Ok8szlZkiS6NIulS7NYEiNCeHzOb/z1wj12aXxldPGk0HC5vBSUxWI9nMzfOttzU1OCJbNtgCKpqhbe4Xx+IVd2TrabmosOCaj01nt6d2TFzsO8cWN/WpTz9hBSFrTP1dSlxUVoiRq8CaEqkFSuey5XbS6qGGuoPMGSGWGuwpStk2k8lbfzaRlH+5zCtQJVmfWfsylCnfNgfcJQ/cWsELUBoS01szc68WxuqrnHiX5tmvHWwtUVvuBD9Vj9zrh4VpKLL//aTAHWV4QkIdy8ZuTu/C5VlHGkGzhurF7vnlNYzLEz2YRVw2quU3wUz1zb0045ARyT0c3SMZncDUH129xdHM2XxzgD4Oh511stast/x07SJjaywvljRR4XfemihHz3GIqCkoGnfv6DVo1D6dmscuecCgpysPHgcbo0Ud5LhfqBoqDcQKSm6l/VRrOZuf/u5o0b+1fZas8ZjbTyTZk00sgnuzpt7k6iDPL9ZKL8PL9XaWdKOu3iKo6gGtVtiKdLC7ePnjyw8fcSRWkFN1AdizKNSoVBoyGn0D3RUeW1pJNTtjxTICUyLqGU1EG0xGKj0eFeuZJLfjOKwqWIoqDcwFlz1ZtRkiTu6NmeFxeucxkGoyqcMcn3CM9Uo97upjpt7k7OyKihzhS4N+S7IxIjQpi7cSclJvtN2GfdH+njsqHMSMLdh4KioGTh2Wt7kHoum5lrt8hdFAUFO94aO5x1e4+yZNt+14kVFDyMoqDcQDNd9Vz++Bn0vHnTAN5evhFzLc1uk2Ry+QOQJJOLJ6h+m7uL5n7yWS42D62+O6rqEhsaSESAL9qL3GYl1WNXR7KjrEF5jMuqFYTF6k1ZiMoP216pmh4OKB/d1VYGBwfCYju6J0aRmZtPTmHRhfMukLSaCsdxi9b6t1rt/HBwb/kDtbryoxKqHNW2tm3u4DheoqpS3CqVTlfpIRm8nB7oDRWOYyWaC/8761zMJqeHUKmdH1pdheNobontb5v3AkdHDd7hMoQQ7D15muZR4Rd8zEmS1cy83P+VHi6QVKpKD4WaMW3aNDp37oyfnx/h4eGMHDmygvf2jIwMxo8fT1RUFN7e3lxzzTUcPHjQLk3v3r2RJMnuuOWWW+qyKhVQ3go3UFLNBfuNR9JoPMkaSybAgb+3asmWcdG+WMaFc7kW7YvrwFChMkpq4C+xuuQXl2AyW+wDGCLve1bvqYrirslRRdauXcuECRPYuHEjK1aswGQyMXDgQFuIIiEEI0eO5MiRIyxcuJBt27YRFxdH//79K4Qxuueeezh16pTt+OSTT9zaVNXl8vIkIRPe1XS78+OmPQDcfVV7VLV0XusjowsaH0m+ztpbJtk+GvkWr711nv+5vrdkHVckNCYywNdetuLqqN6ybNkyu/+/+OILwsPD2bJlC1dffTUHDx5k48aN7Nq1i1atWgHw8ccfEx4ezvfff8/dd99tu9fb25vIyIrbDORCGUG5gYhq7smJDw2kZ9MY3r15QO1la+VTEhEy7oOqbpu7i0hv+b7pIv3d6zH7Yo5mnOWdJet4Z8wQpIu+4CNrN9C/vHE23VqbA6vH9PJHcbFrS87s7GwAgoODAWz3GAwX9tGp1Wp0Ol2FqMJz5swhNDSUVq1a8eSTT5Kbm+uWJqopioJyA0dLqtdp3XplKzYdO8mhzPO1ln2kSL4v+iMyujo6KpOro8M58tlbHznjuc7CZDbz6BcLuaVHezolRle4fqTAY6IveTxpZh4TE0NAQIDtmDZtmvOyCMHEiRPp2bMnrVu3BqBFixbExcXxzDPPcP78eUpKSnjttddIT0/n1KlTtntvu+02vv/+e9asWcPzzz/PvHnzGDVqlOcargooU3wyEObnTUywP4cyz9E0PEju4ig0cIpKjEyY9QupZ7P46iHHi+Ln8wsxGvRole05dUpqaqpdPCi93vlQ9qGHHmLHjh12IyOtVsu8efO46667CA4ORq1W079/fwYPHmx37z33XPBg37p1a5o1a0anTp3YunUrHTt2dFONqoeioNxAdaebDmWe5+iZLNpFR9Retq6BTvGp5ZEdKeNiTISf+6f4LBYLg175DIFgyTN3EeTrjaXkwjRSfnEJT3+/jIU7j/HFHdfSr2W828twyeMJs/DS/Pz9/ascsPDhhx9m0aJFrFu3juho+1HwFVdcwfbt28nOzqakpISwsDC6dOlCp06dKs2vY8eOaLVaDh48qCgotyBKTbktTsJt1NKcVXJgpmu2qEAqC3tRef7CIsgtKmHc54u4/+qORPr7WM3Py/KuQd9Xdrur6LCuAqA5M/OtTBVYhARqgTA635NUq6DNlZTLbFGBClS+vg6v2253FvZC7eL111Z0QGc2maAsNLqzTslBRNzaYinfjs7CXphcPY8LT9RkNvPvoRQOvP9/RAX5Wa+VkzNtwRq++XMbgcEhtIkJr3305Es/gHe9QwjBww8/zPz581mzZg0JCQmVpg0IsIb0OXjwIJs3b+bll1+uNO3u3bsxGo00atTI7WWuKpeXgpKJM2YVoVX4ojeazYyetYAQXy9eHHaVW2SfNkqEymQocdooEaqRR/YZs4pQGUZwpwuMhHnJ87M5nVdEqK97HcZqNWpC/bw5lH7GYTyyFbsOYTRbGDOgJ+H+zj8GGipCUiHcPIKqTn4TJkzgu+++Y+HChfj5+ZGeng5YlZGXl3XU/dNPPxEWFkZsbCw7d+7k0UcfZeTIkQwcOBCAw4cPM2fOHIYMGUJoaCh79uzhiSeeoEOHDvTo0aNK5SguLubff//l2LFjFBQUEBYWRocOHZwqTFcoCqoOeX3Z35zOLWDlxNHotUrTK9QPHhrckwc/+4WfnhhDcnQEQggkSeKPXYfYlZoBwOB2STKXUqEyZsyYAVg32pbniy++YPz48QCcOnWKiRMnkpGRQaNGjRg7dizPP/+8La1Op2PlypVMnz6dvLw8YmJiuPbaa5k8eTJqF7MzGzZs4IMPPmDBggWUlJQQGBiIl5cX586do7i4mMTERO69917uv/9+/Pyq5w1F6SXdQBOta7c7mTn5vL9qM78/dis+evfFLmjqJd+USVODfGtQTWRyddQsyPMhLyqjaVjV1iKqy5PDriY9K4fOT1s3j9/f70peGNWXCV8sIiEsiCYRwXQO9ayJ+yVNNTfWVjnPKlKVKfRHHnmERx55pNLrMTExrF27tsoyyxgxYgSbNm1i9OjRLF++nE6dOuHtfcEv1pEjR/jzzz/5/vvveeedd/j6668ZMKDq22sUBeUG0kxq4rXODSVeWfoXvZNiaR9Te8OI8pwolkgwyKOk0kpUxOvlUVJpRjXxMvgCTM0tITFAnk1BJ7LySQipvT8+IQT/Hkplz4kMDqef5bv120jPyiXA28DAdkk8O7I3z/7wO00jQ8jMzuf6K1tzohgSFB2lcBEDBw7kp59+Qqdz/NGdmJhIYmIi48aNY/fu3Zw8ebJa+SsKyg0UuXB1NHfTHpbsOMiqJ25zv2wZXdAUyijbVZt7TK5JvkoXGd2jkJ/8egnfr99Gx8RoIgP9mHnv9XRqEk2wr/XLd9byDSzaspfJ1/fl5fmrueHKVhyXZ8B6SSDwwBrUJbJFdcKECVVKl5aWRqtWrWyeLKqKoqDcgMGJ253zBUW8uXwjz1/bk/iQQLfL9pLxPZZTtqGa7qXchZdGvkobtLU3cd+4/yjfrNvC5tceJTas4h48IQST5/3B5/dez9u//slDA7virdch42xu/UfmKT65efTRR5k+fXql19PS0ujTpw8HDhyodt6Xhpqu5zSuZB+UEIJbP1tA0/Agbu/a2jOyZdwH1VgnX69VWZt7mmhf+WKfRwf61DqPKd//xgODujtUTgBFRhPn84vILSxmZ2oG9/TtbJWtuDpSqISvv/6aKVOmOLx28uRJ+vTpU2P/fpfVCMoW1gInnZeLKLaSC+etjvZRHSrR0kJndYFTfp/U34dPsOfUafa9eC9aN3wROdqrdLhIoqW3cL3PycW+HGcRPKVKvGgfLlLR0suCJ1WkqpK57SOFVtkqLxeBipy1u6sQJw4Wnw9mFZEcXGoooal8RCM0tftpSSVFFc4dyswlObx0DcrBdRtGx/7a1u0+zNbDKXxz/yiE0bHLpr92HwZg0nfLeOmG/gT6WNv3UKGgpa+k7GNyhCR5YKPupTOCWrRoEddccw0hISF2U36nTp2iT58+hIWF8dtvv9Uo78tKQdU3vvx7J7d0SsZb5/5NmwoK1UEIwUs//s6j115FkE/l1g5v//onAN2TYm2jJwUFZ1x11VX8+OOPXH/99QQHB3PrrbeSnp5Onz59CA4OZvny5fj41Gz0rygoNxCmrjjCOHE+l4X/HWT1xNEelR0uozfzcK18U3xyyQ6XaZMuQIRvzefZ/jt2kv+OnWT+/91RaZqs/ELW7j0KwAuj+tp5NA+Xb2az3lPeuas787yUuPbaa5k9ezZ33nknxcXFvP766/j7+7N8+XJ8XXh7cYaioNzAxa/SJ39uY/rKzYzq0JzkRqF1KrsuaYiyL7F+w8a367ZwXZc2+HsbsBQ5nh40lG4eH9kpmeZRYXbXLtV6K9Qdo0ePJisri7vuuouOHTuyYsWKKvsRrAxFQbmBTLOa4HKujlbvT2FAy3imjuztcdkZRolgmUZRGUYVwTI5jJVLdkaBiRCDPD+bjLxiQryrP5Qxmsz8+Nd2vnnU+TYHg07LtmkPEx8WWFF2MQQrM9WO8aCz2EuBDh062I22tVotWVlZ9OnTxy7d1q1bq523oqDcjNFs5kDGOUZ3TsZHr/yiFeTn9//2Y9Bpuaqla59oSR4e8StcfowcOdLu/xEjRrgtb0VBuYFE7QWLqCd/XoWXVsPA5Jo7SKwOTWTyIgHQRCYvEnLKbhIg32JMk+CaLTR/8vvfjOvdCVUtPPk3cWEs2ZARSAg3Tzq7Oz9PMnnyZI/lfemMI+sx6WarufEfe4/x89b9fH/3CNt8vqc5VVInYhzLNsr3I5JL9ql8+VwqpOc6MS2vhL0nMvhzzxHu7t+1VrJPuY403mAp82bu7kPhMhtBCYsFYbHgTO9KKldf3i526zuId1NgVpNdXMgjP6zglRFXExPk59CBoye61AKLBAiXcXiE2UXH6mR/i6gk9lC+SY2owoZZycUKu2So3AGrVImiLywBSQvC5DwEu9M4Wa729DjIu6DYCKbS+jjbO6Zy8R5JLsrtSHZRCZJJW2nZbJTb6/fR0vXc1L0dEf6+tvPOYn8Bpb+hi2SXPebaWkso1haXFddccw0vvPAC3bt3d5ouNzeXjz/+GF9f3yq7R4LLTEHJhV4SPLfoT5pHBDPOQx4jnMmWC1lly/SBqVfL18Hqq+lmqdhoYs6fW1n90gO1l6180FdOAzaSuPHGG7npppvw8/Nj+PDhdOrUiaioKAwGA+fPn2fPnj2sX7+epUuXMnToUN58881q5a8oKDewftsOvvl3N9ueGedytOBuYuWL/kCsjK6O5Kp3nK98P5m4gOpV+mjmOSRJom1s7SOixiqezBUccNdddzFmzBh+/vlnfvjhBz777DOysrIA68xJcnIygwYNYsuWLTRv3rza+SsKqpYczDzPzF2nuK9nOxJDA+tefmGpqyMZOFispqVBHp94BwugZe1d01WbA9lGkoPkMZQ4cLaA5LCqV/po5jliQgJrZRxRxsF8aKkE1HVIQ9+oq9PpGD16NKNHW50SZGdnU1hYSEhICFpt7SyZL41xZD3mtd830iOxMW9c11vuoigo2JEcHU7q2Sy2HU2TuygKDYiAgAAiIyNrrZxAUVC1YsXeY6zYe4w7O9SNSbkjQmV0dRQq0yZdgFCZtpiFGWof8qLGsr2rV+m4sGAmDr2aJ75aVGvZoYqro0pRrPg8h9IKNaTYZGLivFW8OLQnkb7yTdBrZJwJ0MooW66wTDKGg0JTAwONsb078++hVApLnFsNupR96cw4KVxGXF5rUBYBFoHA2Ze9ix7GhWVaWTiOlftSUKlUjO/SmgNmNYEuzddrj8VBiISThSoCvFzLdmpu7ereSky9040SQQZR6XXb/a7WQJyEppDUlcguthDsXQVt4exLVO/C6MBBm53KMRPkUzqScWam7sqsX+vc8atwIPtkfiGBPtYyO9UX5ULGxIQFEuhjYHdaBp2axFhPuvw6r/g+pRdDUFUGcK6etYvtEJckDTxgoSdRRlA1ZOfJ03RLiELlIn6UgoKcSJJEu/gotivrUJ7DE9N7yhQfoCioGnPsbDbxwVZPvfEa+bwLJMjobihBRjdLid7yfBgk+su3GJMYXLOp5PbxjWutoBIUM3OFKpCVlcWsWbN45plnOHfuHGB1EpuWVrP3T1FQNaTYZEZbGlH1jFm+hfPTJvlGcKdldHWUWSKPcswslO9jJDOvZn6t2ic0ZtvRk7WSfbp2S1iXNWW++Nx9XGrs2LGDpKQkXn/9dd566y3bfqj58+fzzDPP1ChPRUHVkI6xEfyy7QBFRhN5Qr6XKc8sp2zZRJMnk57IM8o3Ys0rqVmDd0hozK6UUxhNNX9gcrW3wqXDxIkTGT9+PAcPHsRQzn3Z4MGDWbduXY3yVBRUDbm3Rzv0WjXPL14vqzWbTsYn2BBl62Rcc9TV0M1SYkQweq2GvWkZNZatVXqKSlHMzK1s2rSJ++67r8L5xo0bk56eXqM8L71WqCfoNGo+v30wP2zZy/79+2UrR4KM7oYS9A1wDUrGcBuJwTWLeaFSqWgZHcG+tMyay1bWoBRcYDAYyMnJqXB+//79hIWFObjDNYqCqgUJIQE8NbALv6RUfCh1xf4i+R7h/kL5RhP78uRRjvvOyxd3Yt/p/BrfGxHox+nsmt+/v+a3Xv5IXDA1d9shd6Wqz4gRI5gyZQrG0u0wkiSRkpLC008/zfXXX1+jPC+vfVBlWJyEjnC6R+rCPqdK77fYK4ThrZsy79et/Lx1P9e3T3JqHeoqb2flLhXu4BxgNjvcs2Mn29V1Z25JKrlXsphR+ahd77FyFXqikr1O1nsdt5lkLEHy1bk2x3W2n8TgYkTiqF46AV6l/vCKCmomFxAu2kRy9Kwl6UJ9nb0rDq6pJYkSo8n1O6agUEPeeusthgwZQnh4OIWFhfTq1Yv09HS6devGq6++WqM8L08FVYfEBPkxtl08035by/Xtk+pcfrBavg4nRMbFtxCZ4j+EeMnkYwkI8a759OLeExmM69WpxvcHy1fteo9AhXDzZJS786sL/P39Wb9+PatWrWLr1q1YLBY6duxI//79a5ynoqDcgK8a/PTyrE14yRiTySBjbCSDTL53DDL6Oqqp7Kz8Qg6ln6V1LcJuyOiCsN7T0L2ZA5hMJgwGA9u3b6dv37707dvXLflW+41ft24dw4YNIyoqCkmSWLBggd31vLw8HnroIaKjo/Hy8qJly5bMmDHDLs3+/fvp0aMH0dHRTJkyxe7atm3bGDp0KOHh4RgMBuLj47n55ps5c+ZM9WtXRxwoELSMDJFFdppJvg4zrUg+A420fHls3NNy5VuDSsupfsh3gD92HqR5VBjRIQE1ln2yZqLrBLn6pLNnz3q6apcMGo2GuLg4zJVE364p1e7d8vPzadeuHR9++KHD648//jjLli3j22+/Ze/evTz++OM8/PDDLFy40JZmwoQJjBkzhoULF7J48WL++usvADIzM+nfvz+hoaEsX76cvXv3Mnv2bBo1akRBgZP5fpk5ePo83RMby10MBQWHLNu2j8EdWshdDI8hd5+kmJlb+d///mfnQcIdVHuKb/DgwQwePLjS63///Tfjxo2jd+/eANx777188sknbN68mREjRgBWdxgdOnSgbdu2REVFkZ2dDcCGDRvIyclh1qxZaEodiCYkJLhtuOgJiowmVv27lecn1MxKpbbEaeXbLRtfFWetnpLtJ8/sdHygfPbW8UHVNzO3WCz8/t9+vn9sTK1kx9VjM3O5+iRHJtUNmffff59Dhw4RFRVFXFwcPj72wTW3bt1a7Tzd/ivv2bMnixYt4s477yQqKoo1a9Zw4MABpk+fbkszZcoUBgwYQGFhIUOHDmXQoEEAREZGYjKZmD9/PjfccEOdh0+vCf+lnSaucSMSajF9UhvOmVV414EndYeySwTeXvI8o3NFFrx9615Bnisw4h0gz4LMuYISvAOqpyn+PpCCxSLo0iymdrKN4H2JrkN5uk/yhGuiS9HV0ciRI92ep9sV1Pvvv88999xDdHQ0Go0GlUrFrFmz6Nmzpy3NkCFDOH36NDk5OXYbuLp27cqzzz7L6NGjuf/++7nyyivp27cvY8eOJSIiwt1FdQuFJUbCQ0JkU6a5Fvle5ByTfAYaOTK5HMopkc/nT05x9WUv3LSLoZ2S0dQi3ApA7iXs6shTfZKXVz0eVsrA5MmT3Z6nRxTUxo0bWbRoEXFxcaxbt44HH3yQRo0a2Zkb6vV6h7uLX331VSZOnMiqVavYuHEjM2fOZOrUqaxbt442bdq4u7gVEC72iUgXjVZ0GhWFxcW2PUrC2Q/ZRdykyvb82HAwm1eWo6u9SCpfP+d516AD0xYbkfRa1/dqXVg4OtsHVck1TVEB+Hi7jj9kqtzLqXBRLkd7lVQaLRaN9T6Vk3BSQu3cLls4iYEFoCquuOaqFRaksvo4iTclyt5FIVi4aRfv3THCds56wdVewIptqlELJJWEqG08J1fPywN4qk9aunQpgEfWjC7FNShPIAnhLOqai5slifnz59uGdoWFhQQEBDB//nyuvfZaW7q7776bEydOsGzZsmrLKCkpoUOHDnTq1ImvvvrKYZqcnBwCAgL449kH8DHoaSIVcULoKEaFNxYiJCNHhTVAXLjahABOW6wdSKKqiJNCR5FQYZAsRKtNHLZY04ZKJlRAprB2JvGqEk6jJV9I6CVBrMrI5jzBkwvW8u0NVxOo13BKWDuveLWRMxY1eUKFFkGi2shBtXUNIVgt8JKEzQIvTmvmvFlFjlCjkaCZwcK+IhVCQKBG4KsSnChRgbAQo7WQa5bIskiogOZ6MweK1VjUGvw1EKgWpBRbFV1jvaDADOdNEipvH5IDNBzINWGygJ9WIlSn4mipNVxjXy3FZsGZYmsH1CJAw5FcEyUW8NFIRHqpOVz6Gd3IS41JCE6XWvElBelJyTNRZBZ4aSQae2s4lGPtSCO81KDRklFg/b9poJ60PCOFJgsGjYpYPx0Hsq3Xwry1aFQSp0q9djcJNJBeZCG/xIxOLZEY5MW+M9aOO9Rbi16tIi3fmjYh0IszBUZyS0xoVBJJIT7sOZ0HZjPBBg3eWhUncq1p4/z1nC82kW1WoZYkmof5su90LhYBgQYt/noNKdmFIKmIDvQir9hEVqERSYIW4f4cOJ2L2SII0EKwl5ZjWYUARPsZKDCZOVdoRKjUtAzz49DZfIwWC346DaE+eo6et7pkiAwOwGg2cybPahWYFBHAsbO5lJgs+Og1RKlNHD5vzTfSV4dZCE7nW9upWbAXqadOW9tbLRHtreZg6bMJN6jBZCSj2MKxzPPc+9ZnrH79SYySCr1KIs5L4kBp2jCdhEaCU8XWLiDRSyKzBPLMAp0kkegN+/Kt10K0EgYVpBVan3mcl3XaL9cEagmSfCT2lnr3CNKAjxpOlBo8xhggxwTZJqtVVnMfq2cKCxCgAX8NpBaB7upRxMXFkZeXx9mzZ5EkidjYWAICAsjOzsbf39/h7788ddkntW3blrlz57Jv81/4+fpWOx9n5Obl0aJTjyrXuz6gUqmcziTVxMLPrSMoo9GI0WhEddFXklqtxlLDLy+dTkeTJk3Iz3fta6Wpqhh/lfVHEi/ZhyZoLpXayZY2YLD6QmPFlUsrSSqaq+1NiYPKDV1iVPZf5F38JAIMetLS04lLiCKgXF7Ravuv3JZ6+wfkX64M3ioLjcvtK2phsG+vll4Wq8cIwFclKNvRsq9YTQu9GUl34Wu/pfeFbw5/NUTqBOoA66NOusi4ILn0PGrrMwv3upBPU3/7UUByoP3/p4ss1nMqicSL0waVG51oNYR4XZCbEGAfTTY51H7xP8hwIW3cRb7vksOsC697zuSTHOpDQLnNqzEXrQ0lh/najaCS9Rfy9dGpaay7MARqEWY/wkwO97ONoPz0Whr5W6dz9mbk0DLC2mGoTCUX5JTij4ZIX71tBNUs1L7jSg633mvRWcsd5ndhmqhJ2IWOSJ2fY6trGacLjCSHWs8l+l78HC+0v1BJhOjUzFu+nX6tEmh28bO5aO0uqNyG61gvKO9nJ9nX+vfePAstfVX4l3tHL16Taukr2UUZblmuiL5qiCr32JvbV42WPuBdOkPi7+9PVFQUQK0NETzZJ12w4lP2QYE1rEZ5jEYj27Zt46uvvuKll16qUZ7VVlB5eXkcOnTI9v/Ro0fZvn07wcHBxMbG0qtXLyZNmoSXlxdxcXGsXbuWr7/+mnfeecdl3kuWLGHu3LnccsstJCUlIYRg8eLFLF26lC+++KK6Ra0z4oMD2JaaSfeEqDqX3WAd1zTEilezzmdz84kKcs/Xd31ubrn6pI8//phFixYpRhKllFlElueGG26gVatW/PDDD9x1113VzrPaCmrz5s306dPH9v/EiRMBGDduHF9++SVz587lmWee4bbbbuPcuXPExcXx6quvcv/997vMOzk5GW9vb5544glSU1PR6/U0a9aMWbNmMWZM7cxkPUmst5rtR2vuKbo2BKnk6zqCZHI3BBDkJY+ZeZCXfN7Mq1vn3q2b8MjnC3h4SM9aK6ogOWPKuECuPmnUqFEOw0so2NOlSxfuueeeGt1b7V957969cbZsFRkZWePRTmJiIp9++mmN7pWTViG+/LReHgXlI6OC8pHJ3RCAj1Yem2cfnXy21jWp85ncAvalZdZaQfnWYxNzufqksulHxUiicgoLC/nggw+Ijo6u0f2KLz43EBbVmIOns8grLsHX4MS0ywOcMKloqZZns+6JfDPJgfL8kE7kFJMcVvev74nsQloa5PGcWp065xYW89jshcy893r6tm5aa9mpRcK2HqWg4IigoCA7IwkhBLm5uXh7e/Ptt9/WKE9FQbkBn9JQFYVGE751q58UFBzy9drNxIcHM+bqjnIX5bJHWYOy8u6779opKJVKRVhYGF26dCEoKKhGeTY8BeVin5OreFGO3BdGYbX6ExbhdI+JMDkf6ag0LvbOONgIFautoiWSq71KeieatRLz0NgANWhULmMf4aJezuJFiUrujQnxRWg0LvcbSc7ydnGvozaLCfW31cfipF4u4z0ZS5xex8H1WB/VhfMWJ++S2cye1Ax6tUxAqu2+pTLZhip2mK7ehZrvalGo5/Tt25eYmBiHpuYpKSnExsZWO8/LY6JTZnKQb4I+2yzfl1Z2iXx+AHOK5HFtkFNU+cZfT5NdXPX2TjlznrjQmn21OiLnEvYk4WkEHnAWewl2zQkJCZw+fbrC+bNnz5KQkFCjPC+9VqiH5AoZFZSMro6yS+QLt5Elk6LILnQx8vGk7GooqOOns4gLC3Sb7CwZ3VopXBpUZqiSl5eHoYZr8w1vis8DqGTcJSLnF4Yrz0yelS2PcLnkWmVXLZ3FYiHlbJZbR1CqS3BNpK5o6GtQZWb9kiTxwgsv4O19YeO92Wzmn3/+oX379jXKWxlBuYEmKusalEWG+fXmevmm2VoE6l0n8pTsMPe6lqkqzSPk8VoP0CK4as5J07PzKDGZiXGjh/0WigVfpVg9Sbh7mq/q7T1t2jQ6d+6Mn58f4eHhjBw5kv3799ulycjIYPz48URFReHt7c0111zDwYMH7dIUFxfz8MMPExoaio+PD8OHD+fEiRMu5W/bto1t27YhhGDnzp22/7dt28a+ffto164dX375ZZXrUx5FQbmBY3gRHejLv8fT61z2/mL5phf3ZckXXXbf6TxZ5O7PyJZFLsC+c4VVSrcrNYOoIH/0rpwTV0d2njLFV19Zu3YtEyZMYOPGjaxYsQKTycTAgQNt7uGEEIwcOZIjR46wcOFCtm3bRlxcHP3797dzIffYY48xf/585s6dy/r168nLy2Po0KEufeitXr2a1atXM27cOH777Tfb/6tXr2b58uV88sknNGvWrEZ1U6b43ICQJJ7o04mnFq6jR5PGhPjUnRt++VaBXBpEeli2PMLlkmuV7TpNTkERj3y5iAcGdHGvbARcQtNOdYncU3wXO7z94osvCA8PZ8uWLVx99dUcPHiQjRs3smvXLlq1agXAxx9/THh4ON9//z1333032dnZfP7553zzzTc2D+/ffvstMTEx/PHHH7b4WM7whDu6y0pBCWGxDytQAySL80GlIzN0P2FibKcWrDxwnAk/ruS7sUMcBzZz8SXi6rojAko9Sbi810nYCcC5KXglbRqglUqvuRiIe6BTD6ziZlmhqdw1kUXrfIpSclBvf2+DSxNyAMmZGTggmVyMPh2YmQeoLbbzwujYrG7Oms1EBPgy8dqrXJaxMhyF1AhQidqH2lCoNhc7y9Xr9ej1zt/bsmjAwcHBgHXqDrAzVFCr1eh0OtavX8/dd9/Nli1bMBqNDBw40JYmKiqK1q1bs2HDhiopKIBNmzbx008/kZKSQkmJ/Tv8yy+/VCmP8ihTfG7AXzIjSRLTr+vNjpNn+HTDjjqTHaCW74s+QCff6+NvkOfbKkBGX3wBVXCz9OPf/3H7VR3cHkDT/7L6lHUvZd7M3X0AxMTEEBAQYDumTZvmvCxCMHHiRHr27Enr1q0BaNGiBXFxcTzzzDOcP3+ekpISXnvtNdLT0zl16hQA6enp6HS6ChtqIyIiSE+v2tLF3Llz6dGjB3v27GH+/PkYjUb27NnDqlWrCAio2XqooqDcwAmLtdMK8jbw+a0DmbJsI1tSM+pEdopRvkeYkiff5piUrKqtx7hd7jl51r4AUvKcj4K3HzvJf8dPcV3nVm6XnVrk9iwVqkBqairZ2dm245lnnnGa/qGHHmLHjh18//33tnNarZZ58+Zx4MABgoOD8fb2Zs2aNQwe/P/tnXl8FOX9xz8ze2+y2Vwkm5CQhJCEcMqpAgLK5S3aVqgWRa1oOaxKsR71p6UVtNZ6tlBFUauI1YIcKohyCYhIALlCgBAgCTkhyWaTvef5/bHJkiW7M5vNbJ6EPO/Xa5TMzD7feWZnn+883+d73ACFRAA/ISTol51Fixbh1Vdfxfr166FWq/H6668jPz8fd955Z0hBugBTULJzVXoS/jhxBO77eCNqG9mvmtFxvLD6Ozxw7QjEGyKkT2bIBiFcWDbAUxur5SZm3ps3bx7Wrl2LLVu2tErOOmzYMBw4cAC1tbUoKyvDhg0bfAJoTSYTHA4HampqfD5XWVmJxMTEoO5DYWGhtyikRqNBQ0MDOI7DY489FnIScKagZCCZ97W1zr1mCHISY7D42z1hl52ipLcukBJBz+6Tauw4R5SWpMTQG/zF7vf+06XYcrgQj988NjyyWY7JTgshBHPnzsWqVauwefNm0awNRqMRPXr0wIkTJ7B3715vDadhw4ZBpVJh06ZN3nPLyspw+PBhjBo1KqjriI2NRX19PQCgZ8+eOHz4MACgtrbWW9yxrTAFJQONxPc28rzHq+/TfcdhC7CYLRcNFDNJNLjoKUeLg455scFOz6zZIJLN4cUvtuC3E0bCFG0IeE67ZNMLt+sCeFITybm1ZWieM2cOPvroI6xYsQIGgwHl5eUoLy+H1XrRDP7ZZ59h69atXlfzSZMmYerUqV6nCKPRiAceeADz58/Hd999h/379+M3v/kNBg4c6PXqk+Kaa67xKrg777wTv//97/Hggw/i17/+NSZMmBD87WwBW/qUgVqiRCJ8B64r00xIMOiw/sgp/PKK7LDJrhE4mMLWuoRsu4AkvfR5YZFtdSLJ0PGv9TWNdpiMdDpdY3cjSd/6J2ux2bHhwHHse+n34ZPtBEz04rI7NbTdzJcsWQLAUxerJcuXL8fMmTMBeGZDjz/+OCoqKpCUlIR77rkHzz77rM/5r776KpRKJe68805YrVZMmDAB77//vuQ6VTNvvfUWbDbPssZTTz0FlUqFHTt24I477mglK1iYgpIBzk+qI47jcPfwXHy0Nz+sCqq7RqbQ6ndnvN/b84vQKz4amYlxtC+FQQGxYo3NPPLII3jkkUdEz9FqtXjzzTfx5ptvtvkaXC4X1q1b53VH53keTzzxBJ544ok2t9USpqAuQSqOyl+cVDZn80bMkhbxL9MGZeGvG39Eyfk69DRGwi0RR8KppEpHtB4e+zalOhJs4nE1gk3cYYMXiwly+zdr9YvkAKez/XFOWrH1JP/3JDchOFMWEXn7E4uRAny/y2ayUxK8kXC8W8SrTiIujXNIxEHZW39f/fT+9286eBwTB7a/KKEYuSzVUUBoz6A6A0qlEr/73e+Qn58va7tsDUoGCgX/g3uiQY/R6UlYc7gwbLJPUEx1dJxiDYbj1XTcvU+U10ifFCYC3e9NB09g0qDwzdIB4HgDS3XEEOfKK6/E/v37ZW2TzaBkwCXytnNL/95Yd+QUZo8eHCbZ9HBRTPvjopRnSWoWHE78+UgUVpxH8fk6jM0Nrd5OsLiZfgoIm0F5mD17NubPn4+SkhIMGzYMERG+Hq+DBg1qc5tMQcmAgQtszulpjES5JTQXy6Bk8/RGjigVxUwSGjqPrkFLL5NElKr1oLU+Lx+jc9IQqQ2vB4OBjRQMCaZNmwYAPmtdHMd5g32lks76gz12MhAjMo9ZfegkrkjuETbZsQp6b/SxFFMdxerpKIrYSHoBQZfeb5fbjSWbfsCi6deHX3ZwqQ+7JWwG5aGoqEj2NpmCkoGzRIMczr8TQkmdBbf27x022WecCuRSqgl1usGFfkY6I9fpmkb0C9JRQk7OVJvRN5mOt9zpBjf6GS/+ZFftOQwFz+O2MKQ2upQzViCXTgkuRhchLS1N9jaZk0SYidVrqZZoYFyeEELw6pffY971o6Hg2c+YJuFMddTV+M9//oPRo0cjOTkZZ86cAQC89tprWLNmTUjtsSdbBpK41qURmimptcCgCZ85qifFVEc9dfQ8CHtG0TG1JcfQm0b01F/8uW49egol5+twz9ihHSI7maU6CkiziU/urauxZMkSPP7447jxxhtRW1vrXXOKjo7Ga6+9FlKbzMTXRvzFSdkID4OfOlGldRYcLKvGzf3SPbWq7OKmOEEiDor3o+isbg4GjoA4Jeo9uST8/cTivwK8odsIgZHnpWtNSdXo4gK/Jwla/7nvGh2NMGj1ECRimcRitARe/PH3d1U2F0FUUz0of3FS3s9K1IOSumfEz3GrXUAUx4MQgpfXbsWsiVdCH4aXH87P920nBBzPsZpQjIC8+eabeOeddzB16lS8+OKL3v3Dhw/HH/7wh5DaZDMoGagh/hVLca0FJoMeUWH0/LrgpvcVnrfTG6wuNNApN3/BQqfMBwCcd3qU7ftb9+J4WTXmXj+6w2UzWsNmUB6KioowZMiQVvubM5uHAlNQYYQQAjdbf2LIiN3pwtMrvsZb909FTASdjO4Mhj8yMjJw4MCBVvu//vpr9OvXL6Q2mYlPBvpw/t+qBybFw+p0YWthCcZnpvg9p71kq+mF6vY10nt8ckzRVORmU/LgA4C+kTy2HilEhEaNKYPDmzmileyIrvdG31EwN3MPCxYswJw5c2Cz2UAIwZ49e/DJJ59g8eLFWLZsWUhtMgUlA2egQQZam5wiNSo8ed0wPLJ6G757+A7Eh8HUV+RUIFNNx838VL0LfaLouJkXVZmRmRBaGel2ya2sRWZijPSJYeBUA8GG/QWYckWO7CXdJWU3An1YHUSGCPfddx9cLheeeOIJNDY24q677kLPnj3x+uuvY/r06SG1yUx8MuAkgW/jrKsGYERqIu7/9Fs43fKv2TgpuqM6KK6XO8JwL4PB6aJXGMlBCDb+7FFQNGQz/EMQBjfzLjiDAoAHH3wQZ86cQWVlJcrLy1FcXIwHHngg5PaYgpKBCJFURxzH4Y3bx8Fsc+BPG3fLL5unpyUi/aTe6TDZGjoztwiKqY7MdfU4XVmD6wZkdrjsSEXXHDAZHU9lZSXy8/Nx/PhxVFVVtastZuK7BCKRhNRfyYt4XHQJ9vd5vVKJD6dPwsR/r8bI1ET8YqD/AUawB46namq81a44wkFwEwgSb/ZSbXN+yjh4jyn9K4MEFTxlJcRKdQCASnxQF3SBY4scev/mtGhlJBxqFQSFhGu+SEkMIuLeDgCcn+ys8dEGkCY3c4VI25xYKQ5AshwH/DxHeUeO4Zq+6YhUq6Vd92UmQS693MGmyY5AAAdB5hmP3O11BGazGXPmzMEnn3wCoSkcQaFQYNq0afjnP/8Jo7HtJnk2g5KBM0Q6irFXjAEv3ng1ntu0R9aM2Kfd9JKknbLQM3edrrxASS69cht5VY2YMrjjzXsAcMrKTHwMcX7729/ixx9/xJdffona2lrU1dVh/fr12Lt3Lx588MGQ2mQKqgO5tV8GLHYnfjhbQftSGF2MBpsDBeeqcD2F9SeGOCwOysOXX36J9957D1OmTEFUVBQMBgOmTJmCd955B19++WVIbTIFJQOJIqmOWqLkeTx57VDc++m3+FEmJWXi6bmZJ+noPT6JMVFU5JqiOz5BLQBsPVoId4MZfUx03NyTNF1vwOwoWC4+D3FxcX7NeEajETExoXm+MgUlA842vO08fNUA/GnCcEz7eCNK69pfFVasWGK4cVH04nNR8qZzhVDTRg42/nwc4/r36XD38mb8FUtkMFrypz/9CY8//jjKysq8+8rLy7FgwQI8++yzIbXJnCRk4AJRIZ4LfiZz3/Bc/FRciUVb8vDPqePaJbtaUCCepzNoVtkF9NDSecc5X9+AeGPHJ26trm9EfFTHBgTZHE6s3nMYrz8aurtue6lyEPRQd723+o6AQP7A2q74PrBkyRKcPHkSaWlp6NWrFwDg7Nmz0Gg0qKqqwr///W/vufv27QuqTaagKPH42Ctwzb9WYfH1V4c1Vx+j67P6pyPoERWBvmEsfMlgtJepU6fK3iZTUDKQGSDVkRh94ozoE2/EdydLcPuA0Asa9lEEt/4VDrIN9MptZCbRGaxprAG9/e2PeHDClciOpGeRz2apjgISjjWjrrgG9dxzz8neZrdTUFJxTqF8/izRIJ2Tzq59aazSuIxk7Cw6h9v6pgV1bf5KfZxpki31WWedWfzi/MR3NaOI8u+QcLpBQGYEHzBOyovEcUEkjqo55uhSiitrkG6KlyyZIWZ64Ym4aZRztVb+xZW16J0Q7TnuFHk5EIkrAyBdoqTp+zhw+hwOFZfhiz/OxBkbkNmsKDrYqnvGCmTq/ZfiaIlkOQ6WkaJbYLFYvLFQzUQFGEfEYE4SMuAI4TZW1DeitM4Cs1RwrpRskTRL4cbeTmXfHhxOOt6Ljg52znjn2x8xbdQVMOq1VO83TdmdHeZm7qGoqAg33XQTIiIivJ57MTExiI6ODtmLr9vNoMKBzk+xQike/XInqhps+Oet17RPtkiapXATQTH9jS6MVYrF0Ks7LjC6rtGGT3cdwHf/9xAAuvebpmxG1+Duu+8GALz33ntITEyUxeOUKSgZSETbZkHfnSzBzjPl2HjfzcjpEd1O2RKmojBi0tIbtOjFQXWcB9+K7/dhQKoJg9OTPbIp3m+TRDar7gxbg/Jw8OBB5OXlISdHvmByZuKTgdOQTnXUkme/3YO5Vw9AbkL7yzacDiLNUrgobKAXCHW6vJqK3FOVtR0ihxCCt7/9EbMmXuXdR/N+FzYyE18gCABB5q0r3u0RI0aguLhY1jbZDIoCuT1ioFXS84BjdH62Hz2FanMD7rhyAO1LYTCCYtmyZXj44YdRWlqKAQMGQKXyNYcPGjSozW0yBSUDPdpoZtOplLDJtNjeI8g0S+EgUUNvAt6DUsqhhA4K0v34+/24+5qh0LZY86J5vxNZkG5AmInPQ1VVFQoLC3Hfffd593EcB0IIOI6DO4QsLExByUBbpuMldRZ8VXAWK6ZNlEl213uQ5YCWtzLpIOPLD8fP4JV7b+kQWQyGHNx///0YMmQIPvnkE+Yk0Zmohgqx8Lg9C47AsymX1Y4XN+dhSmZPDO8RDZfVN3ZKoRb/OvzVoqqCBjGwA1LxKRJvL4ItcBwXr/Pfp4oGgliOAye1DCZVd0kI7DLO+4lFAoDzF2oQr1dJ1nQSU+CcIH5P/NWSqq6tRw+d53vinCKxb2IxUgDgCHy82tyAworzGJ6e5FM3qsIqIE5BZxZV4SCIU3PScU7dkHC4hXfFF88zZ85g7dq16NOnj2xtMieJDuZAxXlMyEimfRmMTszewmJkmeIQG6mnfSkMRtBcd911+Pnnn2Vts00KavHixRgxYgQMBgMSEhIwdepUFBQU+JxDCMHzzz+P5ORk6HQ6jB8/HkeOHPE5p6CgAKNHj0ZKSgoWLlzoc2z//v24+eabkZCQAK1Wi/T0dEybNg3V1XS8toIhAxJZA1owvX9vLN7xMzYXnZOlcGEG2p5mSS76UBw/e1MqO9Enoe1VQdvKnpNnMSIzpdX+LD29t2qasqWgNS7NnDmzqW1WbgMAbrnlFjz22GN4/vnn8b///Q9r16712UKhTQpq27ZtmDNnDnbv3o1NmzbB5XJh8uTJaGho8J7zt7/9Df/4xz/w1ltv4aeffoLJZMKkSZNQX1/vPWfOnDmYMWMG1qxZg3Xr1mHnzp0APLXsJ06ciPj4eGzcuBH5+fl47733kJSUhMbGxpA62BGUIfig0VnD+uL2vmmY9/UPeOun/A6VLTcl0tmdwsa5CxKpm8JEaU2D9Ent5OCZMgxJ79lqf0nw70GyQ1O2FLTGpcTExA7va2fm4YcfRklJCRYuXIhf/epXmDp1qne7/fbbQ2qzTWtQGzZs8Pl7+fLlSEhIQF5eHsaOHQtCCF577TU888wzuOOOOwAAH3zwARITE7FixQo89JAnIr62thZDhgzBoEGDkJycjLq6OgDArl27YDabsWzZMiiVnkvLyMjAddddF1LnOgpbG/Q8z3F4csxgDDbF4YG132NwYizGpye1QzY9d3UbvSQWsIms9YUTawekWKprtCEmUtdatkAASmsTNGVLQWtcGj58OJYuXcrWoJq4NPeeHLRrDar5C4yNjQXgycVUXl6OyZMne8/RaDQYN24cdu3a5d23cOFCTJo0CXq9HjzPY8qUKQAAk8kEl8uF1atXg3ShpJKaEFId3dAnBXf2z8COdlbWDUW2XGgphnJpOzDlkI9cVXg7TQhByfk6ROtbKyidSELfcENTdlvp6HFJIOHZujI2mzxT7pAVFCEEjz/+OMaMGYMBAzzBhOXl5QDQauqbmJjoPQYAN954I6qqqnDu3DmsXr0aCoXnR3/VVVfh6aefxl133YX4+HjccMMNePnll1FRIU959HDRE6HZuowaNZztfOsIVbYcpFBMf9Mzjk6qo5SY8BVJfOOr7/Hsyg2obWjEdf1bl2BJoZc0hKrsttCR41JlZWUH9apr4Ha78Ze//AU9e/ZEZGQkTp06BQB49tln8e6774bUZsgKau7cuTh48CA++eSTVscu9X9vDtRqiUajQY8erWv6vPDCCygvL8fSpUvRr18/LF26FH379sWhQ4dCvdSwcwqt33aDobiuAQkRbfjl83yr7RSnl3QxBwDB4RLdIAiBtwCcbF4W5DnxjQiiG+d2B9yUdovfrai41PNvR6PoxhN3wE3hdohunMvZaissu+D9N5yOwJvDLroRl7PV9qdPNuCVddtwdXYaNEoFiCD4bCcsF/8dTi6V21K29IeJ+BbCc9YWOnJcGj58uKcdls0cgOcevf/++/jb3/4Gtfri2vjAgQOxbNmykNoMKQ5q3rx5WLt2LbZv346UlIveRiaTCYDnjSUp6eK6SmVlZZsWFOPi4vCrX/0Kv/rVr7B48WIMGTIEf//73/HBBx+Ifq5Q0CJC0CCTs6IYGjgIDx3nRiKc3px18cQBAk/sEuDxwCuDGjbw0EBAT9i9CicOTvAAqprOTYcNlVChEQqoISAVdhRChzKoEUucUIGgnPN8ManEigucCg1QQgkB6cSKk0pP9oNowYGGhgZURMRg0MCBsHJAHa9GI6+BAgS9hQac5CNAwCGKOBFBXCjjdeCJCj1hRz0UMEPpfYRPEi0EcDDADSNcKIFnapMEB6zgUQslCCcgmzTiFHRwcRwiiRuxcOJsUxBTqsDDQTicFzzKLlvpxGm3Cg4CRFkJEjXAqSaFZNIALgKU2gGAIMdAcNbigs1NoFNy6KlX4qTZs0aUqFMAcKKiwfN3nxgtSusdsLoEaJU8ekVpUFDuMcnER2qg5HmUmz2eib3jI1FmrkeDwwm1QoGM+CgUVNQAAMxWB+qsdpRUeRbC03pE40J9I+ptDih5Hn2S4nCstApEoUJ0pB46jRpl52sBACk9YlDXYEVDvRkKjkdWchwKzlWDEAKjXguDToOS83VQ2BqREqOHxe5CbaMDzWPZ8Uoz3AKBUXAgVqvAabMnpiklUo1Gl4ALNhdgdyI3SomTFjecAoFBySNey6PI4lnDSuIJHAJQ7fTYcnIiOCx97H68uuEHzLtjIuwC8d7vRI3H5NN8v/vogVIbgVUg0PEcUrTAiaZcec0ZHyocnr8z9RzK7UCDm0DDc0jTAccbPMd6qDkoOaDM7vm7t45DpQOwOAlUPNBbBxQ0+RnUuYA6F8G5JstNmg644ATqXYCC8xQ0zLd42olRAhGKi040qVrA7PK0wRMgRw8UNHr6ZFQCUUqg2AaoDx1CWloaLBYLzp8/D47jvKXDg6Gjx6VBgwZ5zYkM4MMPP8Tbb7+NCRMm4OGHH/buHzRoEI4dOxZSm21SUIQQzJs3D6tXr8bWrVuRkZHhczwjIwMmkwmbNm3CkCFDAAAOhwPbtm3DSy+9FNIFqtVqZGZm+njkBCKTtyGK97yJpcPus6ab3VT1trmwX3NgLQD0usRMln2J63ZMi3NTLslcng0r4ogTcZznHAO5aHtNJnagRdt9XBc9hv66bS9MDieu0boBAujcVigUFxf++wi+/c0SLFA0KZMICDA1pVc6T5Re2f6u3wA3EuAEaSrO1xtWn9QX2cQzCqp5j/Lsobjo+dBb6ZGh1HluZO4l1q0rDATxag48z6F3lO+aUL+YFt6FOhXidBePZ0T7zhpzTb7muhj9xc/2ivP1Zc9N8qwrVFusMOo0iDReTLjbM87XBbxvzx5wqy7KMuhN3n/rtRoooy7aKHOS41t9VtlQ6/mcRoWkKF2TXBviIz1t8vWeZ61f3MUZdJRGAVOECmjwfCdZBt+fWD+j5z4Qm+d4gubiQ3pFnBbH8o9iUPwt0PBcwPsNABl6Di0f8H6Rvm/ccS1SE6XpIHpujOri3710AGlxTc3XUO0gMCo5GFtck/6S5bjcSM4nxUdui65HKoBkDdC8ZJpzSYhCbgSgHzgQgKewXXKyJ1bQbJb21qQ1LmVkZKCgoIClOmqitLTUb5CuIAhwOkNzamqTgpozZw5WrFiBNWvWwGAweO23RqMROp0OHMfh0UcfxaJFi5CVlYWsrCwsWrQIer0ed911l2T769evx8qVKzF9+nRkZ2eDEIJ169bhq6++wvLly0PqYEfQVjvpP386is2nz+Hru6d0uGw5obluzsuQRiUkuWHs9KLVW3DfuOGIjdT7NafRvd/0ZEtBa1z65ptvwt21LkX//v3x/fffIy0tzWf/Z5995n0xaCttUlBLliwBAIwfP95n//Lly71Ba0888QSsVitmz56NmpoaXHnllfjmm29gMEgn9+zXrx/0ej3mz5+P4uJiaDQaZGVlYdmyZZgxY0ZbLrVDqYLKZ5YVCEIIXtx5EB8fKsSKO8ajp6H9iUeDlR0OKuxALB1nOlSYGxHblvU72eTaEKuX3zvkcHE5vtx/DAdffjSwbJr3m6JsKWiNS2+99RZmz57tXV6Tky7kxIz7778fr7/+Op577jnMmDEDpaWlEAQBq1atQkFBAT788EOsX78+pLY50pX8uQNgNpthNBpR9MxMRGnFA1dJGPw3jxOd14QolovvtW378f7PJ7DyF9eiT2xrLzSpXHwKXesBuaVsMYhE9nS1SHZwZVys3/35FoLcSA68MVpcuE5cEQv6wB55gsq/Msgvu4DcpFi41eLpLFqa+C5F6RS/b80mPh+55XXINXlMiXx9TeAPN9QHPgaA2Hxl//qNT5AUbcA/7rnZc9zPDKr5fgMAF4RjTKhIyRb/sMTvS8QZQn//n1vta/5t19XVISqKjudmIJqvbd2uckREynttDRYzbhll6pT9vhSFQoGysjIkJCRg48aNWLRoEfLy8iAIAoYOHYr/+7//83HxbwssWawMpAeR6uhMbT3+sfswPv/VdX6VUzhlh4veNFMd9Qh/yiG/cuPldzM/cPocvjl4AodEZk8A5fvN0gIGRAAHQWavO7nbCyct5zhTpkzxxo/JAUsWKwOVkLZ9/G37Adya3QtDk+Ilz5Vbdrgop5jqqKKOTuqrcrP8LwSvfrUD940fhmSJMvY07zdN2Z0dlouvtQu/XHS7GZS/khUtCcUE2Ngi3ZA/00t+ZQ3W5Z/G9gduAS9SSbe9ssWQ6jenCNwOp/D/mDQSAZyClyynAV78Gjm3SLmNAPutjVbwUWoQpYRJN0C5DgDgHeImPs7ZWhk1Wq3gnE39cYl4JrnE1wWbzWjV9Q1Ys/cofvzLbMk4o8YWVtqOLnvhld31VwQYYSA7O1tSSV24cKHN7XY7BRUO1BLphhZtycOMoTnoZZTfPCQlO5xQLPAKtZKOcI3Mclfs/BkjM1OQHcTMWk3zfjNbS0BoO0ksXrwYq1atwrFjx6DT6TBq1Ci89NJLyMnJ8Z5jsVjw5JNP4osvvsD58+eRnp6ORx55BL/73e+854wfPx7btm3zaXvatGlYuXKl5DX8+c9/htEov9mdKSgZSBVJN3Sw7Dy2F53D3nm/6nDZ4SadYgmG9NjwpRwSlRsdWtYQfxBC8P62PCy4ZWxwsuUT3WZoymaI05zNfcSIEXC5XHjmmWcwefJkHD16FBERHgelxx57DFu2bMFHH32E9PR0fPPNN5g9ezaSk5Nx2223edt68MEHfUqN6HTBffHTp09HQkKCvB0DW4OShUKRVEebThRjclYqekSE5xcuJjvcFFjomXuOV9Ipt1FQLV+5jd0ni1FRZ8HU4f2COv94+Ct9dErZnR3aqY42bNiAmTNnon///hg8eDCWL1+Os2fPIi8vz3vODz/8gHvvvRfjx49Heno6Zs2ahcGDB2Pv3r0+ben1ephMJu8WzKwoXOtPAFNQYWfHmTKkxUjHWjC6H5/+cBC/GDkAOkqZ2RmdH7PZ7LPZ7dIWk0uzuQPAmDFjsHbtWpSWloIQgi1btuD48eOtPO4+/vhjxMfHo3///vjDH/7gUy8rEOGMVGImPhmIRevFcrvLjWc27sah8vN4fuKIDpXdUcSr6Zn44iLppFKPj5BHmbjcbqz+6Qg+mjMt6M/E0atNSVV2Zycc5TGa20tNTfXZ/9xzz+H5558P+Dl/2dwB4I033sCDDz6IlJQUKJVK8DyPZcuWYcyYMd5z7r77bm9aqMOHD+Opp57Czz//jE2bNolfaxgddpiCkgEVWj+df/7uJ+SVVmHLg1ORGh2+9RJ/sjsKqov2CjrC1TIFyG7NL4JKocCo7OCToVJ8H6AquztTXFzsE6ir0Yi/mDVnc9+xY4fP/jfeeAO7d+/G2rVrkZaWhu3bt2P27NlISkrCxIkTAXjWn5oZMGAAsrKyMHz4cOzbtw9Dhw6VsVfBwxSUDFRADWNTglaBEAx+/VOU1zfi+4duD6tyulR2R3PORhCtojNyldVZEa3r+Nf6c/V2ROvaP4v6/MfDuGNkfyjaoPDK7EA0JWsgTdmdnnDELTW1FxUVFXQmiUDZ3K1WK55++mmsXr0aN910EwBPhvEDBw7g73//u1dBXcrQoUOhUqlw4sQJpqC6DH6ns8RT2wjAh3nHUF7fiN6xUciJN/rEq7Q3BstfGiXCqSEQp2h8FQDwKonRRWygJIGm8IInM3rA482fF+8XEVlkDZTqSFAqIag0IBIxVgqRdEYKm/jKP2dr/VnO6bi438/xZohIjJTd6cLavflY84fw5JeUSoMkGUMV6PsKZq2hg+OzOgO03cylsrk7nU44nU7wlzwXCoVC1Dx35MgROJ1OnxIlHQ1TUDLQq0W6oTVHi7B48pW4f3jfDpGdSujMngAgQ0fRzZxSRd0MY/vXvjYdOoHoCC2G9+7Zps9RdTPvIhV1uyNS2dyjoqIwbtw4LFiwADqdDmlpadi2bRs+/PBD/OMf/wAAFBYW4uOPP8aNN96I+Ph4HD16FPPnz8eQIUMwevRoan1jXnwycL4p3ZBLEJBXWoVrMpLC6nrZkgscPbtLdeAkDWHnvIVODsJqa/udUj7ffQi/GDmgzc9INT1/GKqyOzvNufjk3oJlyZIlqKurw/jx45GUlOTdPv30U+85K1euxIgRI3D33XejX79+ePHFF/HCCy94Cwuq1Wp89913mDJlCnJycvDII49g8uTJ+Pbbb6EQyTITbtgMSgYamtINHS6/AAXPI1PGZLDSspUApWDdejcB2vBDklW2nY52rHe0z4Tlcrux8eAJrA3BvGehU1XFI1s8GT6DIsG4eZtMJtGaeqmpqa2ySHQGmIKSAVVTuqEvjpzCdZk927Tw3V6UFFMdqSgVDQQAFaW3OlU7K/f9VFgClYLH0PTkEGS3S3S7oOQL0yWgvQZ1OcNMfDKQDhtK6ix4d89RzLlqgPQH5JRNcQ2qD8USDJk96KxB9Ylu3xrUNwdPYMKAPq0WrIMhk+IaFE3ZjO4LU1AycAJ6LPz2J9zaPwNDkuUtpyHFSa79VXlDJb+B3mvesXKRYoFhJP9C+9a+Nh06iUmDskL67DGK6YaO0alu0iVg5TbCBzPxtZPC83VYW1GBjQVnsXvuL2lfDqMTU2m24MDpc5gwoA/tS2EwugRMQbURoUXp9D0llZj2yTeYdEV/fPLrSUjUayE4A69kS8VBCRJl2f193sjZQIgbvJ9y8D6flYiTEmyBZwbuev+fjXZycNcT8G7x65Y6zqlEAm4DrHPFRug8xyRisHhXYAcSzi4xLbC2nrLEcm7A6pFJ7IHvGXG2dnv77ufjGNTLhAR9aGbC2CAdNsNRKyq2eaTohnFOUoQz1VF3hymoEBEIwZMbduOx0YNw36ghMICOm5NWKkg2jOgoGoh1ajqPrl4Ruull06GTmDQgM+TP6+h5+1KV3dlhThLhg61Bhcj/Dp9CrdWOWSP7oZyjF8VYztOTXeqk9/iU1lioyC1pDO1FRBAIvj1S2C4FVUon9Msjm5V8Z1CAzaBCwOZyYfHWfXhy/FBolewWMqT5+WwZHC43RvZOkT6Z0aVoa/2mYNtksBlUSHx57Cz0aiV+OaA3ACCFoqt3ikDPvSpNTS96My2eTo2t9MjQbF2bDhfi2twMqCTWAsVIo+jqncZSHTEowBRUCOw/V4Vr0pPANy3g11JMN1TL0SvUU+Om9/jUNNCxOV2wt33NjxCCr38+jknt9N67QDHdEE3ZnR0BFx0lZNtod6qTwBRUCBwsv4DBpjjv3xaKllILR0+22U3PDGG20kl1ZHa2ffX6wx0HUFh5AbcMbV8C4XqKqY7qWaojBgW63QKKVEkLIuHq7RYEHKo4jwGmkd4cWDwE77/FXMk5ifQ8vFL82vx9XsFx4DgFIOHC7pbKXSfmPsz5f4/hXSq4Bad0uQ0JeF3gYGNO49+upYQAzu0EF+DavJ8Xc3EXKYkBAKSxtZu5wklAGj33Usw1v+V3NeeDdSAEMGhbzHal3LT8uNe3w4HQFynZfp4FBYE8r/WXoXsa8+ILH2wG1UaKLpjhcgvIjov27utN6K0DZVKU3UdJz+6TnWikIzei7VrinQemAvCEJnS0bLnIppjWitF9YQqqjRwsv4DchBioWpQcP8nTSzd0gqM3chS46K1/HSuvpSI339L2acTafcfw2/HDoFO3b63yGM3UUizVUUCaZ1Byb4xuaOJrL7vOlmPoJfn26D5L9N6qafab1g+4rWIJIfjuSCG+e+r+9sumeMPZgBkYgXAQZM6dJ3d7XRU2g2oDVqcLXxwtwh39e/vsjyL0TF1RoCc7mqO3ch6tpzN7i1G2beAor7Og0eFEWnx0u2VH03MWRTR7lWVQgD12beCrY2eQGKnH8J49fPZHEHoDNU3ZkRw9Z9hIDZ3ROrKNv5gvDxRgWHoyonTtLxUfYgiWLNCU3dlhThLhg82g2sCG42dxe7+MVqW6yyimGyqjmGapRKD3Sl9SQ6f2RLEt+JFDEAje+uZHPHjtCFlkl1BMdVTCUh0xKMAUVJDU2x3YcboMV/VKpH0pjC7ChoPHYbE7cOeVHVvEktGxMCeJ8NHtTHyCQ3zNxmqxguOAC1Y7jp+vwxWmOFidLsxavxMDekTjSlMcBIdvxKSJs0BoMrUpwmh6In6e2iRiBQGB/YJZ9LNSpTzUhrZ7A6byTfdSIhZJEiHwtQWKY+pl1IJzu8HxEiZOkbbhEo989VcyI1UJEKenTU6sKi7PY3P+aUwd3g/qULz3/HzXqRr/+zuCVJbqiEGBbqegxNhyqhT3/Pc72N0COACmSB1qbA4QQnB9nxT8fdJIb3qjllg4JbW1IJqyzYRHBKUyI2abk8o6lNkV/HrMuRozRmTKlxzW7G77GphsstvQ7+4GCUM9KDaD8sAUFIBXvv8ZL23fD71KiefHD8UNfVKgVSoQpVHj+Pk6cACy4gIHhtbzKiQKdIz0Zk6JREJHdh1RIImSgqqzOpFMIVa3zgUkB+nvcPBsOe4fP4yKbLmhKZvRfen2CooQgvfy8gEA22fdhmSNr/tytohiaoZmxAJN2TQXMAMU2u00cmsbrSiqqsHgXknyyZatpRBks7CcgBDCgcgctyR3e12Vbq+gdhdXwOkWULTgbkSoVXBZ2z4byXTRKZ4HAH0EOt5sAJCtpJOwFQD6Ukp11DfIpbpVe45gcC8TekTJl2WkL72EJUH3uzvC3MzDR7f34ttcWIobsnshoh1paE4pImW8orZRSDHV0QkXPTfzgkpxp5CwyQ0i5Y/LLeDl9d9j7pSr5ZVN710kqH4zGHLT7WdQNpcbUdr2ZSUQKM7GBY6jlnPITdHoJMi9Kh2s3CDEfn/8DGxOF6ZdNVBe2bK21kbZ7I0+IEIYnCTY/fbQ7WdQchAp0CvUYyD0ZEdRzCQRpaMze4sK4pXui735mDq8HxRibuihyKboRRdMvxkMubmsHjuO48FxPIhIfaJL15iIyw3SYr/bLh4nxata37JIzuaNgxKrB+Wyia/ZCE5xZeN2tD6u4pVoFFx+j7VEHSkeyCJoAw/4QoBaUlHEBcEl+L0nvg1IuH+J1WwKEMcUrVEAghucxMsB7xK55xL1oPwtBEQrcHHG6sdzwC0IWLP/GD783S/F2w6BoHPxSXk0SC1w+FGs0Wp4XmfF6oZ1U9gaVPhgMygZOKektw5UpjVQk11M6JXbOFtDZ1HkrF188N9x/AwAYHR2mvyyKaY6oimb0X25rGZQDAZtVv+Uj9uG9pXdvMfovLAZVPhgvyIZMLms1GQn2um5uCdz9NzMexr9l4IPu1x14JHDLQhYsy8ftw/vFx7ZFANlacpmdF+6tYKqtztR0dB+5WLl6a1e23h6k2Arxcen0Ukng0WjyBLMrhNnIRCCMWEw7wFAI73KKlRld3aavfjk3hjdWEHtPFuBEcvWYE3BWTjEFumDoI6ntxZTp6KXxbOG0FOONY10Zm81rsBrUKv35uO2oblQKsLzs6qh57BJVTaj+9It16COVNXg/rXf4/nxQ3BNLxO0SpYFk9E+3IKAL/Ly8e5vp9K+FEYHw9agwsflpaB4DuA5uMyBzXbF5824+39bMXtoDu5sYYqRctNuxp8reG9njTeIUixVUrAyAuG0tp41pDSUwwFAcIm7/2qixD0NFbq2z8RyeI9rl+AQn80owvBry02MAgBwEiUzYA/sfkZEjgGeEIRL6asGmkPPuBYvNrtPFMPtFnBNdlrYRpfclqmOOjg5Xq5caZYuw6R+giC/9z3z5vfQ7Ux8f911EGN7JWLOsL6ytXlaTc/VuyQyhprsQqn4pjByoqqejlyb/wF29d6juGVoTtjMewBwgmK6IZqyGd2XbqegDlfW4PbstFZl29uDu70F+9qBi6ZsiqmOXJRWkV1+skzbnS6sysvH7cPC4713UXZYm++0sjs7rKJu+GjT6LZ48WKMGDECBoMBCQkJmDp1KgoKCrzHnU4n/vjHP2LgwIGIiIhAcnIy7rnnHpw7d86nnYKCAowePRopKSlYuHChz7H9+/fj5ptvRkJCArRaLdLT0zFt2jRUV1e3o5sXsbvd0KnkXXOKECQyEoQRvVimhDATydFz7TJo6FinDYrWI8enPx5GtE6Da3Mzwiw7rM13WtlS0BqXZs6c2RHd69a0SUFt27YNc+bMwe7du7Fp0ya4XC5MnjwZDQ2eNMuNjY3Yt28fnn32Wezbtw+rVq3C8ePHceutt/q0M2fOHMyYMQNr1qzBunXrsHPnTgBAZWUlJk6ciPj4eGzcuBH5+fl47733kJSUhMbG9tkYHG437vl8C85ZrHC55TXwRrvoFAwEAKOdXgxWLOi5dsVF0DEvxil9FZQgELy+8Qf8/vpR4EXSXMkim17yeKqypaA1LiUmJgJgM6hw0qbX0A0bNvj8vXz5ciQkJCAvLw9jx46F0WjEpk2bfM558803MXLkSJw9exa9evUCANTW1mLIkCEYNGgQkpOTUVdXBwDYtWsXzGYzli1bBqXSc2kZGRm47rrrQu5gM18WnMXGkyUAAGM7s5dfSqk6Epn2OlnbDJayCCPS689TkX2WaJDD0cmBc/pCg9dRokPl2nnk6i6+4Gw4eAK1jTZMv3JA+GXbZHRW6EKypaA1Lg0fPhxLly7tgB52X9q1gNH8BcbGxoqew3EcoqOjvfsWLlyISZMmQa/Xg+d5TJkyBQBgMpngcrmwevVqEJlfIYpqLi6q94np+IGNcXny6sZdmDPxSmikEuYyOoyOHpcEhCFQV9Y70nUJWUERQvD4449jzJgxGDDA/9ujzWbDk08+ibvuugtRUReVwo033oiqqiqcO3cOq1evhkLhMXBfddVVePrpp3HXXXchPj4eN9xwA15++WVUVFSEepkAALvLjaV78jF9YCb2338L1DJ7WiU46bk4xdvopTpK4uitvSVH0Ul1lKy6OHTsPlmMQ8UVeGDc0I6RTTHdEE3ZbaEjx6XKysoO6VN3JuTXvrlz5+LgwYPYsWOH3+NOpxPTp0+HIAj417/+1eq4RqNBjx49Wu1/4YUX8Pjjj2Pz5s3YvXs3li5dikWLFmH79u0YOFC8ABxxu0HcbjgsvmanygYr6uwOLBw9GFrOc56c2HkOkU2BC0QkgEGsFAcA8BIBw7yytWJtUOqgcXEgEl5taoP4gK6KClwVmFf7N4m6XRxUyiBmulIek4rA/eYClNtwOJ3gNDxAJJRkQ2B3dKFBokStn+/LzsH7Wvfaxh/wwPhhMEaEQVn6uWd2QqjFEdmbH2upJLiUA3g6clx64YUXAHiUotwWH7nb66pwJIQ7MW/ePHzxxRfYvn07MjJaey45nU7ceeedOHXqFDZv3oy4uLiQL9DhcGDIkCEYPnw4PvjgA7/nmM1mGI1GfPvUw4jQapBUVYZzqgg4OB464sa3B49gl02BBVf2R5yjEQTAeZVnUEmz1aFCHQEbr4RGcMHkaMAZrREAEOu0ggdBtcoT5JpqN6NaqYNVoYJacKOnox5F2mhUqvTIsV6Akgio5D0Brz1tZtSotGhUqKEkbqTazDiljQYAGJ02aAQXKjUepZBsq4dZqUGjWgseBOm2Ou+5BrcdercTFepIEEJgstejQaFGvVIDDgQEHHgiwA0OkS4HDC67twRHot0CG69EnUoLjUGPLMGCIl4PF3hEEBdiiQPFvKdvveJ0cBAO590eZZGtcuK0SwkH4RCpViBRKeCUw3PMpBLgJsDPViWSVQKyNG4UO3nYBA46niBZJaDQ7jk3QSmA16hR2eRsmKkDzjkAqxvQ8kCqFjip8vS1h04JJQeUNXqcLzKNapQJajQ43FArePSO1eNYlWfGaLY5kZtgQGmdx0kkI0aH6kYH6u1uqHgOWfEROFppAWepQ6yah17JoaQpoVxahAI1DoLa2nooOCBbR3DMyoEQIFrp8dQrbiqrkaIhsLg51Lo8uoEQQMEBJbUWPLp8NdbMuQN2jed+99QAVgG44ATA8+irBwqtgJMAkQogXuVZywGAJDXgIMD5Jv2arQdOWz379Dxg0gKnmibmiRqP2edAvUdGHz1QYgdsbkCrAFI0wMmmcxPUnuusaPLbydQDZXZPLj0ND/TSASea9HK8ClByQHnTd5OhA6ocgMUNqDigt+5iqfc6l2cN6lxTu2laTz/r3Z77ka0H8hsACAJiVEAE77lGwPMdm12eNngOyNF72hUIYFR6iiEW2wD1+F8iLS0NFosF58+fB8dx6NWrF4xGI+rq6nxmO/7o6HFp0KBBKCgowEv/rYVOL++ygbXRjD/eGR1Uvy9n2qSgCCGYN28eVq9eja1btyIrK6vVOc0PwYkTJ7Blyxa/byNt5dZbb4Varcbnn3/u93izgip6ZiaitGo0VtT4HP/7j4dR2WDD364bLvvsCQAKtdHItNUCAAQ/2QeakZrlSM2g/M3OTuli0NtaI9m2vodR9Lg2MT7wdQWYQeXbFMjVSt9PXituH+ITTAGPkQj/P86jlfXol2AARIpTAgB3PrAZxl1bE/BYIPIbOeTqCeb+50u4BWDp/bf6P7G95Tb8zJTyLQS5kTLMoEJ4O89vCNJJoh0zKP39f261r/m3LTZQ0xqXbrjhBmzYsIEpqDDSJhPfnDlzsGLFCqxZswYGgwHl5eUAAKPRCJ1OB5fLhV/+8pfYt28f1q9fD7fb7T0nNjYW6gADXTPr16/HypUrMX36dGRnZ4MQgnXr1uGrr77C8uXLQ+wiwIFDOCfMGU3KiQbp1rYPsnKRo6EXB9W3R2CTZDjJ0RFU1Fmwcvdh7Pi/BztWNkUvuhx6NTkloTUuffPNNwA870hyWzYl3ru6DW16zVuyZAnq6uowfvx4JCUlebdPP/0UAFBSUoK1a9eipKQEV1xxhc85u3btkmy/X79+0Ov1mD9/Pq644gpcddVV+O9//4tly5ZhxowZofUQgILj2p2xXIwSiqmOSrX03q6KHPSyWJy6QMcxpcjOYcnmn3BdbgZyk9v/Ft4m2fRC3qjKloLWuPTWW2+Fu2tBIRWoDAAWiwVz585FSkoKdDodcnNzsWTJEp9z7HY75s2bh/j4eERERODWW29FSUlJR3alFW2aQUlZA9PT09u1uNe7d2+8/fbbIX8+EMOS4vDuzyfwws6DeGJkLpQyVzt1UqwH5eToyXb4SfvTYbJlDrYOFrPdhXe25uG/c+/scNkOim/Vjk68Zk9rXDKbzZg9ezb1bObNgcojRoyAy+XCM888g8mTJ+Po0aOIiPBMux977DFs2bIFH330EdLT0/HNN99g9uzZSE5Oxm233QYAePTRR7Fu3TqsXLkScXFxmD9/Pm6++Wbk5eV5PRo7mm6Ri29UzwTMGdYXS/cXoNgs/5u33k3P3VpHUXYET2/UilDT+cFsP3wCOUnxGNUntcNlR1BMN0RTNkOcDRs2YObMmejfvz8GDx6M5cuX4+zZs8jLy/Oe88MPP+Dee+/F+PHjkZ6ejlmzZmHw4MHYu3cvAE9c2LvvvotXXnkFEydOxJAhQ/DRRx/h0KFD+Pbbb2l17fJXUHa3G7/9aide2HUQAHDOIr+CiqNY8j2OYgxWopLeK70psuMDcxwuN97ZsBWPTrlK1mTDwZJIMRYpkV5Nzk5PZ6uo6y9QecyYMVi7di1KS0tBCMGWLVtw/PhxbzByXl4enE4nJk+e7P1McnIyBgwYEJQZNFxcVuHvHMeD43goW6QyOlRWjZ0lldg68yasOXYGI9JMUCgDdzuUWKSzfCSyBOmAWWejeM6+ULz8zqgM6OOsl4yxUkWKr7ArdIFjeTiN/9HpdAOQq4f0CrGUSVWknhSn9n/sVHUD+sVHADZxBS00Bo51kvLo5FW+Ceg+33sIffpk4eYhfcHxfPs99drIqUYgVw7fECnl6se+dMoapBdfJ4+T6mqYzWafvzUaDTSawG8qgQKV33jjDTz44INISUmBUqkEz/NYtmwZxowZAwAoLy+HWq1GTIxv+Z7ExESvQwkNLvsZVG6PGOhUSlyw2vHEmMHQiSgnBiMQhBC8uuEH3DAoC4oOVkyMzk04k8WmpqbCaDR6t8WLF4teS3Og8ieffOKz/4033sDu3buxdu1a5OXl4ZVXXsHs2bMlzXeEECrWgmYu+9Faq1RgcGIsjp+vw9WpiWGRkUDoJEwFgB5uerJNFM0+SREdK3zjoZM4b2nE1H4pHSq3JSaKJj6a33VnhwhE0voRSpsAUFxc7BMHJTZ7mjdvHtauXYvt27cjJeXic2q1WvH0009j9erVuOmmmwAAgwYNwoEDB/D3v/8dEydOhMlkgsPhQE1Njc8sqrKyEqNGjZK1b22hW7wKZsUZseV0Wdjad1G8jW6aRQMpena5OjgVzNtb9uLh60aImofDDStY2P2Iiory2fwpKEII5s6di1WrVmHz5s2tsmg4nU44nU7wl8z8FQoFhCaT67Bhw6BSqXyyvpeVleHw4cNMQYWbuSP7YdvpMpy8YJY+OQQucPReLy8o6L1WV9NzIERVY8cJJ4Tgx8ISTB7QB9VOei8E1fRqU1L9rjs7tJ0k5syZg48++ggrVqzwBiqXl5fDavU4b0VFRWHcuHFYsGABtm7diqKiIrz//vv48MMPcfvttwPwBDU/8MADmD9/Pr777jvs378fv/nNbzBw4EBMnDgxHLctKC57Ex8A9IjQYVRqIr49VYpZV2TTvhxGF+Pn4nLYnC70T0lAIUUlwWD4ozngdvz48T77ly9f7q36u3LlSjz11FO4++67ceHCBaSlpeGFF17Aww8/7D3/1VdfhVKpxJ133gmr1YoJEybg/fffpxYDBXQTBQUAQ5PikF9VG5a2MwSJrNhhJN1Jr9xGFsX0N9mxHSf81a934d4xV0CtVCDLT8n3joLm/aYpu7NDO1A3mCBkk8kkmS5Oq9XizTffxJtvvhm88DBzWSkoTqkAp1RAFaFtdSw1Pho/llVDqRWvXX2pa/GlqAytf6llbhXSFR4biNhiqdtxQbRtqYVWf/0q43ToRcIbh8UFyFVW3CCgd4THtb9diLnIB3AjP2t2oHeUGnBKTGnEfrxSdhSOQ/H5OqzbX4CDi+d6/rZ5sn7ToNgGZFBSFDT7zei+XFYKSowkgx7nwpBFAgBsFFP+2CkuI9pk9lxqk+wOWrU/cKYM2UnxSIn1ZIO3UQzj6a6yOzuCQCDI/FuQu72uSrdwkgAAlyDIXkm3GR1H72HSgl5Gcb1EcHA40Sk7RnZBeTVyTBdLkegopvyhKrvbjBSMzkS3mUFZHE5EqsXNd6GSxNNzcTIR8ewU4SRZR09B9YwIz3d5KQVl1chJuqigkinGA7GS750T2mtQlzPd5r0oM86I/MoaVDfKH9h6yk1v1DrN0Vu9PtlAz+5zsq5j3OmOl1Uju4WCKqRYdqKQXtpFqv3u7IQzk0R3p9soqEGmOFyb2RN/3pwnfTKDAY931KUzKAaD0XF0GwXFcRzuHdYXPxRXtKs2jD8SeHrrQPGEXmBOoobe45OoD/+CTHmtBRa7A30SL2aFTqBo4uuusjs7AiFh2RjdSEEBwMheiaizOXCg/DztS2F0AY6VVSE9Pga6MK1dMhgMcS4rJwmO58DxHPgA5SEiNWr07RGDU7UWDE3xnziWkyi3AT8xP5VEhdgmTz63PbCxXnCJz7QUEgOhQtu6XxcEHeJ5K9w28ZmU4BR35HBbA183H+G/zkKFVUCckgdijKJtSyJW9iJA5vAKm4A4fRjjrwAcK6tGtinOZ1+lA4iTQ1+FkCG60gHEUZrJBN3vblhOgwieTe42Gd1sBgV4EsfuKa6kfRmMLsCmQycxJieN9mUwGN2WbqegpvbPwHeFJbK22VtBbx0onaNXbqNPBD038z7G8E4lbE4XtuQX4ZYhfX32Z1LMppBJMd0QzX53dggICJF5A1uDArqhgsqJj8Y5cyOqGuTzmy0T6FlKy0Fv9fqcjd6PqLTBFdb2z1sa4XC50TvBt8LoOYrJYs/RC3mjKpvRfel2Cspk0OO6zJ54cet+2dq0EorphijKbqTnvAirK7xGerPVjkiNulX1XCvNPtOUzdZEAkIEz9KbnBtbg/LQ7RQUALx4/VVYe+w03tt7TJb2tBSn4xqO3pOspZh6R6sIr3mxqr4RMRGt7Vpair+Y7iq7syO7ea9pY3RTBZVqjMTH0ybir1vy8JfNe9sdc5CioJfqqCfo2V56aemtQfUyhNe0+V1+kV8HidTWCeU7jO4qm9F96ZYKCgBGpiRg43034fPDp/C/w6fa1dZJiqmOThF6q9fHG+i95R2vDa9i/urgSdzkp7jlCYrphrqr7M4O7Yq6lzOXVRwUOA7gePAiveJbxBrlJPfAc5OvxDNf/wBTtAHjMntKxnE4za0LBLo5PZzE8wt2ijhfCM72Ley7/LTt5hVwCVa4bOKzOIVa/KvmlIGLLioi/LuPEQcHwUqgiI7xe7wZwSB+nHeJeB4EqvfEOwGlCnCE7rUQqI5VYeUFnKqqwcSBWa3jsHgE91oXQpxTl6C9cU7MdMVoA5eXggqBXwzMhMXuxD0rN2H6FVn4y+SRULexxHEcoWfii6WY6iheRW+w6aEPX3aHT/ccwcR+vWHQtU7hHU8xqUQ8xXRDNPvd2SECkSw2GkqbjG5s4muG4zjMHJGLrb+7A3uKK/Dwqq1tbkNB0UlCCXpOEh1Uksm/7DDVohIEgo92HcSMUYP8y6XZ524qm9F96fYKqpmM2Ch8PuNGfHO8GMeratv02UqO3qttJUdv9brcQW/UKrOEZ+a47fhpWJ1OXD+wj9/j5RTjoMopxiLR7Hdnh5XbCB9MQbUgLkKLMelJ2HqqlPalMCixveAMrh/YB6o2mnkZDIb8dPs1qEsZYIrDkYoLbfpMGqFXza2XQM+9qreW3mteZnR4Zo7556oxOis14PHeFFP+9KaY6ohmvzs7gkAgyLxmJHd7XRU2g7qEnB7ROFFd26bPVFFMN1TN05Nd4aRn4itvCI9jyrGyauQm9Qh4vIKiqauCoomPZr87OyxQN3wwBXUJsXot6iRKV1xKI0fPHNRIcRLcQDH1ToNTfuE2pwunqmrQV6SCLtU+d1PZjO7LZWXi45QKTz0nIfCbvSJArahm1A4XLDYH7HX+44L8xjKpCWyOes9xkZpPLgnF53aKmwqVmtbHBR1Bg7UOnIRXm9KPy3RLeFXgR0Gw+X91V7kUEGxuKOwSGdUN4ocFZeDvhA8Qd6NWKQGFUrKmk1g80qW1vw6eKUOUToOUHtHgOM7vZ9U8oRbjpJbrdTKEt3N1sF2WaJtchvWiWD2o8MFmUJcQp9egutEGqyv4oNqUJuVEg2RrHTXZ6Sp6r9W9Y+RfFPn8pyO4ZUiORzkFIIPiWkx3lc3ovjAFdQm9ow2I02vxU2l10J8p0rSzomw7OKMXz9IQTo476E3Aj1XL6xzicgv4fM8R3HnlANHzCgIn3Ag7VGWzVEcBEQgJy8ZgCqoVHMfhxj4pmLl2Oz4+VEj7chgdxLZjp8FxHMb1Tad9KQwGo4nLag1KLp4fOwQTM5Lx8Je7UGd3YPbwXNHzo1303KuMTnoVdeMU9Azl8TKnOvr0x0P45Yj+reo/XUocxXRDVGWzVEcBCYfXHfPi88BmUH7gOA7X9DLh019ci9d+PIKleeJ1o9SE3lqMWqAom6NYB0sh36NrdTixdt8xTL9K3LwHABqKKX+oymYjBYMC7LETYUBCDP77i2vx1k9H8W3RuYDnVaroRVBWaSKoyS5z0XOvL62Xb9b69cETSDRGYkhakuS53bXsOiv5HpjmQF25N8ZlZuLjeAU4hULctVjii7/UTbxXpB5RGhUsNgcElxsOS2tXbxfRwdFUCkMsC7FbInbH7RA/7s9F16V0wWl1QKUTt/9IuaFzEqYtUSRch0XLaQAgisD2I6L0f4woFCBKFTheXElyisCPePM39emPRzDt6kHgVL6y/N4TXvDuvxxdpgH4/z6Fpq29buSX4cAbjtx5zMLngc2gJPjg0EkY1Crc0LtnwHNounonNdCTnaZqX32r9pARI8+stabBim8OncCdV/nPXt5Kro6enS2doqt3Oquoy6AAU1AS5MZHo6LBKlreoVZFb+So09CTfd5N7/Gpbmx/7p3vj53GrHdWY2CqCX1MccHJpZjy5zy9smM4T+9dpNNDCPHWhJJtY1MoAJeZiU9uzpob8G3ROah4HgIBFAF0VKNSDVCy0TeKZGEINxaBByjVo6q3t2/EtNjsuP7F5QCAF6ZNDl6umwCgM4uqp6gk6l0AxJORMBiyw2ZQfrC73Xhs0x6M/+hr1Noc+PCWa6AQmUEpKOYlUdKUTdGLT9XOgoUOlxv9UxIBANcPzg5eLsVS7qxgYeeEhCFIl82gPDAF5Ye1x4uRV34eO++5EW/fOAo5ceKZIno11nbMhfkhxVJDTXYfNT0X96z4yHZ9PjZSjz1/nY3kmChU1FmClxtBb6SmKptiqQ9G94UpKD+cszRiiCkWSZHB/SqLImLDfEWBOW0Ibu0kHByz07MQH61sf/7DczVmlNfW44og3Mu9ci30Zqz5Fnpv1fkU0yx1dmRff2raGExB+UWt4OFiD8hlz86CMxjUywSjnrmoMRidkcvKSYLjOU+8j0hsDCdh2+WVCpRYrFDwPHhl63YU6ta3LBpOv/uD+azPtRnETTgKdeuYoAQlD73aIFouAwCUevEVbpUh8GyRV/t3xIjlOPAqHkKDuIlM8i0o3iR1RmvZuqZ7IVWaXWStaufxMxidk9Y2uSp5zGyhxJ3Faog3nq2jY7Bimm53d4xzkiIcMx42g/LAZlB+uGC1t6nkj06g516lpZjqSM/T+xHpVe3PYrGj4AzG5KS3TS695Bl0ZbORIiACCc/GYArKL/cM6oOdxRVBn1+hbt+CfXuooJhmqdRB7/EpMbcvSW51fQOOnavC1Vm92ibXRm/koCm7lKU6YlCgTSPM4sWLMWLECBgMBiQkJGDq1KkoKCgIeP5DDz0EjuPw2muv+ewvKCjA6NGjkZKSgoULF/oc279/P26++WYkJCRAq9UiPT0d06ZNQ3V18PWZ2ssVibGosTlw8oK5w2QyOpYdx4rQNykePaLo5TJkyAOtcWnmzJkAmJNEOGmTgtq2bRvmzJmD3bt3Y9OmTXC5XJg8eTIaGlq7+HzxxRf48ccfkZyc3OrYnDlzMGPGDKxZswbr1q3Dzp07AQCVlZWYOHEi4uPjsXHjRuTn5+O9995DUlISGhs7rmJahFqFMamJogliW5Jkp1hR1xG8i7Tc9FLT82hLi25fBo2VOw7gpqF92y5XS8/Vm6bsXp3Yj4TWuJSYmBj2vnV32uQksWHDBp+/ly9fjoSEBOTl5WHs2LHe/aWlpZg7dy42btyIm266qVU7tbW1GDJkCAYNGoTk5GTU1Xnyye3atQtmsxnLli2DUum5tIyMDFx33XVt7lh7EAjBvrLzeHhYcANYvVIDnYPOOpRZoYbO1TqBbUdQ6+YQoaDzpldrdSIiCMcUf5yrMePr/QVY/OK8tst1ARGUXIuoy6a4BiYGrXFp+PDhWLp0KasHFUbatYjQ/AXGxl6MAxIEATNmzMCCBQvQv39/v59buHAhJk2aBL1eD57nMWXKFACAyWSCy+XC6tWrqX5BXNN/Iv14zfnDoqCYboiibLOb3ht9XTtSHX2wdS+uHZCJ9B4xbZfrovdc0pRt7kK5+C7Xcak7ErKCIoTg8ccfx5gxYzBgwMVCby+99BKUSiUeeeSRgJ+98cYbUVVVhXPnzmH16tVQNLkKX3XVVXj66adx1113IT4+HjfccANefvllVFQE77AgBxzHITc+GvnVtUGdz1NMN8RT/MEoKKY6UoSYcsjlduP9LXvxwHUjQ5NLKQ8fbdntzCzVYXTkuFRZWQnAU51E/npQ4b1PXYWQDQZz587FwYMHsWPHDu++vLw8vP7669i3bx84iQFEo9GgR48erfa/8MILePzxx7F582bs3r0bS5cuxaJFi7B9+3YMHDhQtM0ChwqRvApZGjfOOnjYCQc9T5CkFFDo8DxsCbynDlBVU7G9TLULpS4FbAIHLUcQp1WjiI/A8CFX4GR9LSw6Pao4TwxRmtCASl4LK6+EighIcVpQpI4CB6BOGQUVEVCt9iy6p7gaUKPQoIFTQgkBaa4GnFJHAQCMghNa4kaFwmPY7+myoo5XwaJQQQGC3u4GnFREggCIIk5EEBfKeB04BY9kwQYLp4SZU4IDkCM0oFDfA0ShgIG4YIQLJZyn3SRihxU8ajkVOJ5HNmfFKaKFCxwi4UYMXChuygDak/BwEA7niee+ZCscOO1WwQEOkfUOJCjcKHJ6ZpQmhRsuAE63AkfsQCbMKBFUsBEOOo4giXfilFvddL/dIMIFVJEm0whvR5mggg08NBCQwjtxtt7za+yhVUDJAWVWj+t8pkGFck6LBocbagWH3jE6HKv2rEXG61UwW6worfGYNzOi1Ki2ulDvFKDkOWRHa3D0gg2wEsSqOegVHIqtbqz96QhijFEY3C8H+Y0CFOCQE8nhmEWAACBaySFKCZy1EQACUrUcLC6gxkXAAciN5HG8gcBFCAw8QawKONNkYe2pBaxu4IITAM8hN4LDyUbASQgMCg7xaqDI6lHqyRoChwBUOz1/50RwOG0F7AJBhIKDSQMUNnqOmdQcBABuEBy1EGTpOZTYOFgFAh3PIUULnGg6N1HNAURARZPXXaYeKLMDjW5PVdxeOuBE09JMvMqTY6+8KUN7hg6ocgAWJ6Digd5aoKBp6TdWBdS5gFKr53edpiG44OJQ7/a0kaUjOGblQAiHGCUQoSAosXvO7aUhqHNzqHMBPAhytAQFNg4C4WBUEBgVBGcdPDSHDiEtLQ0WiwXnz58Hx3Ho1attXpYdOS698MILbbq2cLF48WKsWrUKx44dg06nw6hRo/DSSy8hJyfHe06gfv/tb3/DggULAADjx4/Htm3bfI5PmzYNK1euDN/FS8CREOas8+bNwxdffIHt27cjIyPDu/+1117D448/Dr5FEKLb7QbP80hNTcXp06fbfIEOhwNDhgzB8OHD8cEHH/g9x2w2w2g0ovTvjyJKJx6QStzicUPWc57Z2p8358HmcmPxZN83bWdja3/bU+oo9HZ4PP78Bfc2IxWcKVlU0E/bJ/kI9BEawEsErPr7bEuU+sBOB4E+W+BQIUftBHGK14GQ8kjSZmUFPmjwnwcxv7oBufERgF1i/a3e8704XC4Mnv8P1DY0Ys2T92Nkn14grrbXrzhmEdA3UrpgYbsKQAYhWwzJYFqpn7yfzx9rBPrqpX8/kog8CxEPtR7wm3/bdXV1iIqKEm26o8elQYMGoaCgADP+dApqraHNbYi2b6vHf/7aO6h+X3/99Zg+fTpGjBgBl8uFZ555BocOHcLRo0cREeF5YS4vL/f5zNdff40HHngAJ0+eRO/evQF4FFR2draPB6NOp4PRKJ6LNJy0aQZFCMG8efOwevVqbN261echAIAZM2Zg4sSJPvumTJmCGTNm4L777gvpAtVqNTIzM/165ISTC1Y7Uo3BuSDTtEp3W9ltFC4IBKerLmDXC/MwJCNw8UnJdkL+ZPuhKbszL73QGpcyMjJE3dk7imCcREwm32wta9aswbXXXutVTs3o9fpW59KkTQpqzpw5WLFiBdasWQODweDVykajETqdDnFxcYiL801eqlKpYDKZfKabgVi/fj1WrlyJ6dOnIzs7G4QQrFu3Dl999RWWL1/elkttNxesdgwOsoidwU2vklwUobd6beTpDZnR2rZZp/+zPQ99eybgivTW7sVtkkux7gRN2cZOnBSN1rj0zTffAAhvqiOz2TcWU6PRQKMRtxL5cxJpSUVFBb788ku/FqmPP/4YH330ERITE3HDDTfgueeeg8Eg7+ywLbTpsVuyZAkAz1SwJcuXL/cGrbWHfv36Qa/XY/78+SguLoZGo0FWVhaWLVuGGTNmtLv9tnDBakesRP66ZiIFegoqkqKCMlBUUFGa4H2eCSF48+sdeOr2CZJrEJJyKQ7U3VW2FLTGpbfeeguzZ88Oq4JKTU312f/cc8/h+eefD/y5AE4iLfnggw9gMBhwxx13+Oy/++67kZGRAZPJhMOHD+Opp57Czz//jE2bNrWvM+2gzSa+ttIW+27v3r3x9ttvt1lGOLjQaEOMxHpWM2UqPTIddLJOnOO1yBLo1EIocSnRV01HOZ+ts6Nfj+Ae3x3HinC+vhG3j/T/g22TXBtBv0g6MxmasottQG4nTbpBa1wym82YPXt2m2W3heLiYp81KKnZkz8nkUt57733cPfdd0Or9Y2+fvDBB73/HjBgALKysjB8+HDs27cPQ4cODbEH7aMTvxfRxU0IS1R4mfDe5j2465oh0AYZ18ZgtAUBniq4crcJAFFRUZJOEs3MmzcPa9euxfbt25GSkuL3nO+//x4FBQX49NNPJdsbOnQoVCoVTpw4wRSULPCcZMAGB3HTkCrKk/i1Z7QBVW7B+7f383685VJBoNZ4krYKzsAmNykvPUh4fvnz1OsJJ3iFQtJLT0q2YHcEPEZs/k15JsLD5RDgahD3pONV4oqB2AKnseICvDGm6gA4bICEJ975egu+2HMYO/86B0SGeLXUFumG2uWpx0l81s+1plJMdZTS9DVI9VnSg7CrBFR1IaScRFry7rvvYtiwYRg8eLBku0eOHIHT6URSUvAFPeXm8lJQMpJsjERpkKXAGzgFIgmdshcNUCASdGRboEAkJd+yeocAg1pcKd/y5yXYU1CEwenJyE2RJ2+axQUYKP1qqMp205Pd2aFdD0rKSaQZs9mMzz77DK+88kqrNgoLC/Hxxx/jxhtvRHx8PI4ePYr58+djyJAhGD16dPs7FCLMihWAZGMEztUFt7ZjBj3TUR1Hb9QwE3qya4NIdVRVV4+UOCP+9dvbZZNbQzHdEE3ZtV0o1VF3Y8mSJairq8P48eORlJTk3S41461cuRKEEPz6179u1YZarcZ3332HKVOmICcnB4888ggmT56Mb7/91ptRgwbsnSgAKUYDvsovCvJsynkDqUGx30F4440dkAW71Srb7Amge7+pymaWuYDQThYb7LmzZs3CrFmz/B5LTU1tlUWiM8BmUAGY0jcNP5wuQ2EQ+fiySMeVAulMsrP59hUNbA+5sdL1H0qqa9Ervu0JYUXlBpHJIVzQlN2XXl1MRjeGKagApMYYcMegPnjpu72SbyiFHL1fbyHaVxepPZwk9IoEFdRIK8djJeXo2zNBVrnHG+jNGqnKpvce1OkhsieKZQULm2EKSoQnJ4zAzqJzeOyLbagX8XITKBpf3BRtLwKh2G+JH7DL7cbJsir07dk68Wd7cFHM+UNTtpuNlwFhFXXDB1NQIqTGGPD1Q1NRWF2Hq19biQ35p/2eFwmK2RwoZpKI5Oh4DwJAlIQH36nyavAch7QQaj6JyqWYboimbObBx6BBt3vs/MUxtUQZ6Rsu3zsyAl89+mt8/ONhPPzZd/jd6EH444QRPufEEQ5KzmNqc1tbZztvRipGJJS4mugm5SiZCb0dnjjE6f9tLpo4QTgiGb+lkMjIIXptdv/3M5YXPMcCxEHtOnQCA3olQSF2bSHEIsUG67Ap1XYIn49Vk6C8FaS7JRWr5E82PK+zEtnMw5HFvbND20nicqb7PU0hwHEcfnPVQHz1yHS89N3eVua+YiG4lEjhoJijtw7UXEuKBqctgWeOhBAs3bgTM68dEfCckOVa6Q0cpxvpyT5Dzx+G0Y1hCqoNDOzZA0NTEjDns81wuOiZtxji7MwvwtmqGvz6mmG0L4XRDSCCEJaNwRRUm+A4Dp/NvAnFtfX47aebvNPwZD6wA0W4SSKBTYphlw16/U7RB7ZO/3PDDsy8biQitGr55dJMN0RRdk96k2VGN4YpqDYSo9di1f23YO/ZCnx7/CwAwEro3UYbxa/QSlF2o9v/G+bZ6hp8lXcUD00JT3qWRooT50aKL9VWZjAIiNwu5s0bgymokIjRazG5bxp2nDoHAKihmPKnhqOXZqmWoo/NBbv/0frfG3fhhqG5snvveeUGcBjpCC44KMpmqY4YFGAKKkRsTjeiwmBCYoTOmaoaLNu0G/NuGkv7UhjdiGYvPrk3Rjd0M5eLc3UWjM3sCYBuyh+qaZYgXmYjnOQaW88c//Lfjbht5ACM7hu43EC75UbQWwfKpVSsEGCpjsSgnc38cuayUlAcx4OTCgKRihdSid8ShcYza3IRAigUUGjUKHSpkKlsiscRebAEiRgSSfx49pyGFhmwSfabl6gXFQqnBA1683a/dapa0nzPAkEcgZ0tuAD386TFjaxIBUiLe1paXYM7rx4s2l57OdkIZDWHyrUn1imEukgnLQRZwSgpQfy6QomTKrQS9NFzLGsso0NhJr4QuW1wFl7bshd1VjsoLkvASfErdFJMdeRsMYa6BQEvr92GH06cxZCM5PDKpWh6oSqbeT0HhKU6Ch9MQYXIQ2MGIz3OiFkrNiKCUtE+AIigVKwQoJvqyNAi7c9z//0GH27PwzfP/BaD08KroAwKmumG6MmOvKxsLYyuAlNQIaLgeSy7+3oUVFzAx9v3ULuOOIiXPA+rbI6ea1e8xjNYE0Lwya4DePXeWzAiMzX8cin6xcSr6SmoeHrOop0eAQIEIvNG8aW3M8EUVDuI0Wvx719PxqayethddAbrs6CX6ugMxRRPRQ2eH3Dyw39FWU09rI6OUdRFFFMdFVEMhDpNzx+G0Y1hCqqdjExPAiHAqeo62pfSLalr9HhQ1jSwEZRBByKEYx2Kdq86B0xBtROO42CrvYBqC50BMhH0Uh2ZeHrmxWQtB3OTcspIiMUtw/p1jFwNPTNbspbezzWJpTpiUODyWvrkuZDcd1siVZaC17Y2qSXHxeBoVS3GDegj/lkJN/NQPHfcggIK3t3+chsiZRKUav8LL4Kbh1LR/lc90cSYfu4ZIQTLNudh0cr1GJWdhi+fvB8aifCAkPDjj+0gwZW8aO9z6A+HQIBgimNKyQ7BDd0JAo7nIPmESnkaXoZJUFkcVPi4vBQUJX51zXA8+q//oKD8PJ6dciVi9B23LnSBKNGDkiffeTePHjIoqLZACMFjH6xDQQPw1ZP348qsXh0qv9pBkEBpFuWRTUU0qp30+t3ZYfWgwgdTUDJwRaoJu566D3/47yYMe/FD9EuKw7DURMwdNxQ9DCwEv70IgoB3Nu/Bn1ZuRK/4aNQ12rD8ydm4MtlA+9IYDEYYYQpKBrI1bigSYrBqzq+w9fBJFNeYserACUx8879Y/7s7kBoTFTbZfXh6a1BZqvB7LlaZG3DvP1fiVOUF3Hn1IHx9oADrnrgPfZMiwy7bHzkU0w3lRNJbg8qhmOKpsyMIAgSZTZdyt9dVYQpKBs44ePTWCOA4DuOyPLE4dw3vh/mrtuCmf/0PH957E65ISQiL7LOCGhkKOnWZzriU6B0mJdVod+DFtdvw4fb9GNevNz599DcwtCgdX9ggIJPCoHm6kVCRS122FchkxgBGB8MUlAzY/aT84XkOr9xxLRZ+vQvXvb4SboEgJyEGH8y4AX0TY2WT7Qhm0TxMBFv9weUWUFBxHjWNNthdbjjcAjgAqTEG9Io1IrrFuXWNNvxl1Wb8cOIseJ7Du7/7FSYM6APuEseEANU2wg4tuR7Z9NYl7ME6aHRDmJNE+GAKSgYieP8PE89zeP6m0XhozGD0XfguCiprcNUrK/DHiSPw8JjBsjhT6ClGnOs5//1udDjx/cli7Dldhj1F55B3thyEAD0MeqgUPDRKBVwCQUmNGRa7E/GGCIzMTEFFnQUHzpRheO+euHloX8yaMBLxRv/m0Qj5c98GBS25Htn0FARN2YzuC1NQMpCoFFcSScZILP31ZDz8yTcAgJe+/QnZCTH4xRXZ7ZadwNNLN5So9PUePFpWjfd2HcSnP+UjJkKLq3v3xNQrsrH49vHINcVBcYkrOyEENY02FDY6sPdUKeIMelyTk46UOKOkbBOl8ue05AKAiaIXnYnFQQWEEAFE5shaudvrqlxWCorjeXAi8TyyyPBTOuJ0I4dcjWc2ESje6DfjhmPdkSJ8efAE+iXHY3h2OhQREb4nhfBQnrArkKtxS5oEJOOkAsQ6eT7r/56eaeSQrnTgfz8dxfLt+3DgbDl+MaIfVj92F0b27uk1y4nFOcVH6JGg1eLqnHT/JwS47sIGAf0MQXzXMptKChsI+hmarqkdsU6SZWH8cKrRjX5RMjzfErNA4sdkXdjoRj+DQvK6iVvihSnMv0/G5cVlpaA6O3+9fTwUPIe9p89h8PNvIyM+GgNTEtAnIRaRGjUMGhWMeg16xRqREW+EKSoSfBgCPuWgrLYen+47g3e++BpJ0ZG4f+wwfP7IdMRE6GhfGoPRobA1qPDBFJQMJKqCe5j6JMTi4wdvBwBU1TfgUEklDpZU4nR1LUprzKi3OVDbaMPZC3Uora2HRqnEyIxk/GJoX/x6ZD/o1K1TSkuZF+WmqKoGr234AR/t+hk3jhyM/86dhlFZqa2cGMKNSUPnTZyqmY2qeZHNfAISjvpNTEEBYApKFkJRET0MEbguNwPX5bYoT97CxGd3ulB0vg7bCs7g7e378Od132P+5Csx59phPms5HfUcn6q8gBfXf4/P9xzB1GG52P6nB5CYkECtDINA6HiV0VwZcFMcs2jdb0b3hikoGahycogPchYVLBqVEn1NcehrisOssUOw+dhpzP/vd1hz4DiW/uYGZDW5qle5ecQrw5fq6HR1Lf721U58uvsQfjVyAPYufBi9Ezyy8xvl73ewVDoI4iks3FfaCbW6TFV2gh6UnBVo3e+uQHMNJ7nbZLBs5l0CjuMwITcDu566F8PTkjDu5f/gUEllWGW63AKe+fw7DPu/pXC63djz54fw7/tv9SonBoPBCDdsBiUDfXQdM4vQq1V46ZfXwSUIeH7d91jx29vQJwwVXi02B2a+sxqnq2ux89nfIren/ywYHdVvf2RF0Hm3yqKY8ieLYqojWve7K8CcJMIHe+pkoKSD0+E9ecMoVJkbcPWLH+BfP51EaU29bG2fqzFjyssfwuZ04ds/3ou+SfEBz+3ofvvIplTZtsRGb+AottIz+9C634zuzeU1g+K44Gr1yIxNgHf9mBOpSyR2LCianCMSIyKw5f9mYcWun7H1XB2e/3gNRvbuiWdvG4+xfdMDCBe/L3nFFdh08AT+/e2PuP6KHLwx8xaoleLXa3MI4NS8/wJCLUWLHkVI8UTWYN8wxdqWasPPZ61Eos0gkQrE9BdvZOugqip+ZQvEs7+9mTQuw7UVQgTxmmYhtslgMyhZ0FJIf6NSKnDv2KH4v9vG4vgrj+OGQdm4441PsCW/qM1tfbjjAK5f9B6OlFTg9Zm3YskDUyWVEwDoKMZo0ZJNtc8U0yzRlM3ovlxeMyhKpIRhHSho2RpApYvE4zeMQk2DFZ/8cBCDU03IO30OFpsdDXYnrA4nGp0uz/8dTjQ27Suvs2D/mTLYnS6smj8DY1u6vAcju+PqMraWraOjKGjJ9cim9z5JU3Znh61BhQ+moGTgpBXIjZA+L9yybxvaF9e9uByf7TmCXnFGROu10GvU0KtV0KtV0Hn/r4QxOhL9UxIw/8bRGNzLhMjItnfgRCNBP0r1kU40COhn6PjXelpyAeCERUC/qO4nu7PDcvGFD6agLiOG9+6Jgy/MgUBIa3dwCmtzDAaD0R6YgpKBBIomvktlp/eI6TDZiZQCVgEgkVLqHVpyASCBYpolmrI7O4IACDKb5FhBXQ/MsCwDNH+6bNjoPtCcBLMJOIMGTEHJQAWdiuudQDa9hdwKSqVtackFgAqKMVg0ZXd2iCCEZWNcbiY+npOuNxOOV0GeAM0VR0kYf8h+r50AHCdaz4kRgE5ayoQ6/u4L37Q/hNgxX5ijBSN4Li8FRYlMfXeVTW+Az6SUeoeWXADIpJjqKJNmrftODnMzDx/MxCcDZRRT/tCUXU5TNiWTEy25AFBmo2f2oSmb0X1hMygZaOygFDSdTXaDm16NoAZKxZFoyQWARolq6mGVTbMYVSeHxUGFDzaDkgGaxUbpyqZn4qMlm26fqYmm2u/OTrOJT+4tWBYvXowRI0bAYDAgISEBU6dORUFBgc85HMf53V5++WXvOXa7HfPmzUN8fDwiIiJw6623oqSkRLb7FApMQclAL133lJ1GVTadAZOWXABI09P7udKUzRBn27ZtmDNnDnbv3o1NmzbB5XJh8uTJaGho8J5TVlbms7333nvgOA6/+MUvvOc8+uijWL16NVauXIkdO3bAYrHg5ptvhttNz0zDTHwycKIByI3sfrKPN9BLdXScUsohWnIB4DjFdEPHLW70i2LDhT/C4RbelvY2bNjg8/fy5cuRkJCAvLw8jB07FgBgMpl8zlmzZg2uvfZa9O7dGwBQV1eHd999F//5z38wceJEAMBHH32E1NRUfPvtt5gyZUp7uhMyl9UTV29zeFyupVzNZcZiF1Cvar/MUB5yi5XArJChzxIlM/zLFmBW0nmztjQIMIf7e/bj1m+xumFW0FES9VY36ppk+yuJIRt+zHkWqwtmpVLazbwdOM3mVvvMfvZ1NtyuBumTQmzz0v5rNBpoNBrRz9bV1QEAYmP9V7+uqKjAl19+iQ8++MC7Ly8vD06nE5MnT/buS05OxoABA7Br1y5qCgrkMsBqtRKTyUQAsI1tbLvMNpPJRKxWK+1hphXhHnciIyNb7XvuuedEr0kQBHLLLbeQMWPGBDznpZdeIjExMT739OOPPyZqtbrVuZMmTSKzZs0K+R61l8tiBqXValFUVASHg2JaBQaDERbUajW0Woq1XQIQ7nGHEALuklm81Oxp7ty5OHjwIHbs2BHwnPfeew933313UPfU3zV0JJeFggI8D0tnfIgZDMblS2cad+bNm4e1a9di+/btSElJ8XvO999/j4KCAnz66ac++00mExwOB2pqahATczHhdGVlJUaNGhXW6xaDueYwGAxGF4YQgrlz52LVqlXYvHkzMjICFx599913MWzYMAwePNhn/7Bhw6BSqbBp0ybvvrKyMhw+fJiqgrpsZlAMBoPRHZkzZw5WrFiBNWvWwGAwoLy8HABgNBqh012MBTGbzfjss8/wyiuvtGrDaDTigQcewPz58xEXF4fY2Fj84Q9/wMCBA71efTTgCAlndlMGg8FghJNAa0TLly/HzJkzvX+//fbbePTRR1FWVgaj0djqfJvNhgULFmDFihWwWq2YMGEC/vWvfyE1NTVcly4JU1AMBoPB6JSwNSgGg8FgdEqYgvLD4sWLwXEcHn30Ub/HH3roIXAch9dee81nf0FBAUaPHo2UlBQsXLjQ51h6errfXFgvvvhimHrRdgL1Oz8/H7feeiuMRiMMBgOuuuoqnD171nu8K/fbX58tFgvmzp2LlJQU6HQ65ObmYsmSJT6f62p9fv7551tdT8vsAoQQPP/880hOToZOp8P48eNx5MgRnza6Wp8ZlwHUIrA6KXv27CHp6elk0KBB5Pe//32r46tXryaDBw8mycnJ5NVXX/U5NmHCBLJkyRKyd+9eMnz4cLJjxw7vsbS0NLJw4UJSVlbms1ksljD3KDgC9fvkyZMkNjaWLFiwgOzbt48UFhaS9evXk4qKCu85XbXfgfr829/+lmRmZpItW7aQoqIi8u9//5soFAryxRdfeM/pan1+7rnnSP/+/X2up7Ky0nv8xRdfJAaDgfzvf/8jhw4dItOmTSNJSUnEbDZ7z+lqfWZ0fZgXXwssFgvuvvtuvPPOO/jrX//a6nhpaSnmzp2LjRs34qabbmp1vLa2FkOGDMGgQYOQnJzsTTnSjMFgaJUTqzMg1u9nnnkGN954I/72t7959zXn72qmK/ZbrM8//PAD7r33XowfPx4AMGvWLPz73//G3r17cdtttwHomn1WKpV+r4kQgtdeew3PPPMM7rjjDgDABx98gMTERKxYsQIPPfQQgK7ZZ0bXhpn4WjBnzhzcdNNNft0qBUHAjBkzsGDBAvTv39/v5xcuXIhJkyZBr9eD53l6+avaSKB+C4KAL7/8EtnZ2ZgyZQoSEhJw5ZVX4osvvvA5ryv2W+y7HjNmDNauXYvS0lIQQrBlyxYcP37cp19dsc8nTpxAcnIyMjIyMH36dJw6dQoAUFRUhPLycp88bBqNBuPGjcOuXbu8+7pinxldHNpTuM7CJ598QgYMGODNTzVu3Dgfs8+iRYvIpEmTiCAIhBCPSeNSEx8hhNhsNh/TSTNpaWlErVaTiIgIn23Lli3h6E7QiPW7rKyMACB6vZ784x//IPv37yeLFy8mHMeRrVu3+rTTlfot9V3b7XZyzz33EABEqVQStVpNPvzww1btdKU+f/XVV+Tzzz8nBw8eJJs2bSLjxo0jiYmJpLq6muzcuZMAIKWlpT6fefDBB8nkyZN99nWlPjO6PszEB6C4uBi///3v8c033/hNW5KXl4fXX38d+/btk8xLpdFo0KNHD7/HFixY4BOXAAA9e/YM+brbi1S/habs6rfddhsee+wxAMAVV1yBXbt2YenSpRg3bpz33K7Sb6k+A8Abb7yB3bt3Y+3atUhLS8P27dsxe/ZsJCUl+cy4ukqfAeCGG27w/nvgwIG4+uqrkZmZiQ8++ABXXXUVgNbxNCRALriu0mfGZQBtDdkZWL16NQFAFAqFdwNAOI4jCoWC/P3vf/f+u+VxnudJWlpaUDICzbhoItVvm81GlEol+ctf/uLzuSeeeIKMGjUqKBmdrd9SfbZYLESlUpH169f7fO6BBx4gU6ZMCUpGZ+tzICZOnEgefvhhUlhYSACQffv2+Ry/9dZbyT333BNUW12lz4yuBZtBAZgwYQIOHTrks+++++5D37598cc//hFJSUmt7O1TpkzBjBkzcN9993XkpcqKVL81Gg1GjBjRqnz08ePHkZaW1pGXKhtSfXa73XA6neAvqTWlUCi8M8rLAbvdjvz8fFxzzTXIyMiAyWTCpk2bMGTIEACAw+HAtm3b8NJLL1G+UkZ3hikoeLyPBgwY4LMvIiICcXFx3v1xcXE+x1UqFUwmE3JycoKWU19f782T1Yxer0dUVFSIV94+gun3ggULMG3aNIwdOxbXXnstNmzYgHXr1mHr1q1By+lM/Q6mz+PGjcOCBQug0+mQlpaGbdu24cMPP8Q//vGPoOV0pj4DwB/+8Afccsst6NWrFyorK/HXv/4VZrMZ9957rzcObNGiRcjKykJWVhYWLVoEvV6Pu+66K2gZna3PjMsA2lO4zsqlC+eX0laTRlpamt+iZA899FD7L1ZG/PX73XffJX369CFarZYMHjzYJx5Iiq7Q70v7XFZWRmbOnEmSk5OJVqslOTk55JVXXvE6yEjRGfvcHNekUqlIcnIyueOOO8iRI0e8xwVBIM899xwxmUxEo9GQsWPHkkOHDgXdfmfsM6Prw3LxMRgMBqNTwuKgGAwGg9EpYQqKwWAwGJ0SpqAYDAaD0SlhCorBYDAYnRKmoBgMBoPRKWEKisFgMBidEqagGAwGg9EpYQqKwWAwGJ0SpqAYDAaD0SlhCorBYDAYnRKmoBgMBoPRKWEKisFgMBidkv8HJ+AGEZfAygsAAAAASUVORK5CYII=",
- "text/plain": [
- "
"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "# Select the first day of t2m_mean\n",
- "variable=\"mean\"\n",
- "t2m_mean_day1 = daily_aggregated[\"t2m_\" + variable].isel(valid_time=0)\n",
- "\n",
- "# Set the absolute min and max for the color bar\n",
- "vmin = 270 # Minimum value (e.g., 270 K)\n",
- "vmax = 310 # Maximum value (e.g., 310 K)\n",
- "\n",
- "# Create a plot with Cartopy\n",
- "plt.figure(figsize=(10, 6))\n",
- "ax = plt.axes(projection=ccrs.PlateCarree()) # Use PlateCarree projection for latitude/longitude data\n",
- "\n",
- "# Plot the data\n",
- "t2m_mean_day1.plot(ax=ax, cmap=\"coolwarm\", transform=ccrs.PlateCarree(), vmin=vmin, vmax=vmax, cbar_kwargs={\"label\": \"Temperature (K)\"})\n",
- "\n",
- "# Add Madagascar's border using Cartopy's built-in features\n",
- "ax.add_feature(cfeature.BORDERS, edgecolor=\"black\", linewidth=1) # Add country borders\n",
- "ax.add_feature(cfeature.COASTLINE, edgecolor=\"black\", linewidth=0.8) # Add coastlines\n",
- "\n",
- "# Optionally, zoom in on Madagascar\n",
- "ax.set_extent([43, 51, -26, -11], crs=ccrs.PlateCarree()) # Longitude and latitude bounds for Madagascar\n",
- "\n",
- "# Add gridlines\n",
- "ax.gridlines(draw_labels=True, linewidth=0.5, color=\"gray\", alpha=0.5, linestyle=\"--\")\n",
- "\n",
- "# Add a title\n",
- "plt.title(\"Mean Daily {} Temperature (Day 1)\".format(variable))\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Looks great. Now, we need to see if we can do a spatial aggregation:\n",
- "\n",
- ">A mathematical aggregation like mean() involves summarizing data values (e.g., averaging) across a specific dimension, such as time, without considering spatial relationships. For example, calculating the daily mean temperature from hourly data is purely numerical. \n",
- "In contrast, a spatial aggregation using rasters and polygons involves summarizing data based on spatial boundaries. For example, when aggregating raster data (e.g., temperature) over a polygon (e.g., a country's boundary), the process involves selecting raster cells that fall within the polygon and computing a summary statistic (e.g., mean, sum) for those spatially defined areas. This type of aggregation accounts for geographic context and spatial relationships."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "To do this, we'll need to read in the shapefile that defines the shape of the polygon (ie the physical ground) and find the pixels of data that fall within the polygon. We can then use the `xarray` library to group the data by time using a resampler method. We can then use the `mean` function to calculate the average value for each day."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import geopandas as gpd\n",
- "\n",
- "# we learned how to read in shapefiles in the kenya demo notebook\n",
- "zip_url_or_path = here() / \"data/testing/gadm41_MDG.gpkg\"\n",
- "\n",
- "shape = gpd.read_file(zip_url_or_path, layer = \"ADM_ADM_1\")\n"
- ]
- },
- {
- "attachments": {
- "image.png": {
- "image/png": ""
- }
- },
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We are using the layer 1 of this shapefile from GADM.org. This refers to the states in red:\n",
- "\n",
- "\n",
- "\n",
- "When we read in the shapefile, the data in the `geometry` column is a specification of the polygons that represent geographic boundaries."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
GID_1
\n",
- "
GID_0
\n",
- "
COUNTRY
\n",
- "
NAME_1
\n",
- "
VARNAME_1
\n",
- "
NL_NAME_1
\n",
- "
TYPE_1
\n",
- "
ENGTYPE_1
\n",
- "
CC_1
\n",
- "
HASC_1
\n",
- "
ISO_1
\n",
- "
geometry
\n",
- "
\n",
- " \n",
- " \n",
- "
\n",
- "
0
\n",
- "
MDG.1_1
\n",
- "
MDG
\n",
- "
Madagascar
\n",
- "
Antananarivo
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
MG-T
\n",
- "
MULTIPOLYGON (((46.93914 -20.17013, 46.9386 -2...
\n",
- "
\n",
- "
\n",
- "
1
\n",
- "
MDG.2_1
\n",
- "
MDG
\n",
- "
Madagascar
\n",
- "
Antsiranana
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
MG-D
\n",
- "
MULTIPOLYGON (((50.15347 -16.0032, 50.15347 -1...
\n",
- "
\n",
- "
\n",
- "
2
\n",
- "
MDG.3_1
\n",
- "
MDG
\n",
- "
Madagascar
\n",
- "
Fianarantsoa
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
MG-F
\n",
- "
MULTIPOLYGON (((47.04934 -24.08504, 47.04925 -...
\n",
- "
\n",
- "
\n",
- "
3
\n",
- "
MDG.4_1
\n",
- "
MDG
\n",
- "
Madagascar
\n",
- "
Mahajanga
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
MG-M
\n",
- "
MULTIPOLYGON (((44.23931 -18.96421, 44.23931 -...
\n",
- "
\n",
- "
\n",
- "
4
\n",
- "
MDG.5_1
\n",
- "
MDG
\n",
- "
Madagascar
\n",
- "
Toamasina
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
MG-A
\n",
- "
MULTIPOLYGON (((47.67118 -20.36464, 47.6713 -2...
\n",
- "
\n",
- "
\n",
- "
5
\n",
- "
MDG.6_1
\n",
- "
MDG
\n",
- "
Madagascar
\n",
- "
Toliary
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
NA
\n",
- "
MULTIPOLYGON (((44.33236 -25.26931, 44.33236 -...
\n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " GID_1 GID_0 COUNTRY NAME_1 VARNAME_1 NL_NAME_1 TYPE_1 \\\n",
- "0 MDG.1_1 MDG Madagascar Antananarivo NA NA NA \n",
- "1 MDG.2_1 MDG Madagascar Antsiranana NA NA NA \n",
- "2 MDG.3_1 MDG Madagascar Fianarantsoa NA NA NA \n",
- "3 MDG.4_1 MDG Madagascar Mahajanga NA NA NA \n",
- "4 MDG.5_1 MDG Madagascar Toamasina NA NA NA \n",
- "5 MDG.6_1 MDG Madagascar Toliary NA NA NA \n",
- "\n",
- " ENGTYPE_1 CC_1 HASC_1 ISO_1 \\\n",
- "0 NA NA NA MG-T \n",
- "1 NA NA NA MG-D \n",
- "2 NA NA NA MG-F \n",
- "3 NA NA NA MG-M \n",
- "4 NA NA NA MG-A \n",
- "5 NA NA NA NA \n",
- "\n",
- " geometry \n",
- "0 MULTIPOLYGON (((46.93914 -20.17013, 46.9386 -2... \n",
- "1 MULTIPOLYGON (((50.15347 -16.0032, 50.15347 -1... \n",
- "2 MULTIPOLYGON (((47.04934 -24.08504, 47.04925 -... \n",
- "3 MULTIPOLYGON (((44.23931 -18.96421, 44.23931 -... \n",
- "4 MULTIPOLYGON (((47.67118 -20.36464, 47.6713 -2... \n",
- "5 MULTIPOLYGON (((44.33236 -25.26931, 44.33236 -... "
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "shape"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "In a vector image such as a shapefile, the steps between each value are not guaranteed to be equal (unlike on a cartesian plane), so we need to think about how those values \"project\" onto a known Coordinate Reference System (CRS) that has equal steps.\n",
- "\n",
- "A quick note about CRS:\n",
- "\n",
- "> The WGS 84 (World Geodetic System 1984) is a widely used global Coordinate Reference System (CRS). It is the standard CRS for GPS (Global Positioning System) and is commonly used in geospatial applications. WGS 84 defines a geographic coordinate system based on a specific ellipsoid model of the Earth. \n",
- "\n",
- "> Key Features of WGS 84 \n",
- "Type: Geographic Coordinate System (GCS). \n",
- "Coordinates are represented in latitude, longitude, and optionally altitude. \n",
- "Units: Degrees (for latitude and longitude). \n",
- "Ellipsoid: WGS 84 uses a reference ellipsoid with: \n",
- "Semi-major axis: 6,378,137 meters. \n",
- "Flattening: 1 / 298.257223563. \n",
- "Datum: The WGS 84 datum defines the origin and orientation of the coordinate system. \n",
- "EPSG Code: The EPSG code for WGS 84 is 4326. \n",
- "\n",
- "Spatial geometry is complicated and silly, hence [all maps are wrong](https://youtu.be/kIID5FDi2JQ?si=OZASX3i6Aglqwa4u).\n",
- "\n",
- "Nevertheless, we can see that the shapefile has a CRS of EPSG:4326, which is what we want:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "\n",
- "Name: WGS 84\n",
- "Axis Info [ellipsoidal]:\n",
- "- Lat[north]: Geodetic latitude (degree)\n",
- "- Lon[east]: Geodetic longitude (degree)\n",
- "Area of Use:\n",
- "- name: World.\n",
- "- bounds: (-180.0, -90.0, 180.0, 90.0)\n",
- "Datum: World Geodetic System 1984 ensemble\n",
- "- Ellipsoid: WGS 84\n",
- "- Prime Meridian: Greenwich"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "shape.crs"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Were this different, we'd have to find some way to adjust these projections. For our netCDF file, however, we don't need to worry about this because the data themselves are created using a rasterized netCDF file, which is a standard format for storing gridded data. The data is already in a grid format, and the pixel values are already aligned with the geographic coordinates of the raster. In spatial geometry, we use degrees to represent the latitude and longitude of the corners of each pixel. This means that the data is already in a format that can be easily manipulated and analyzed using xarray and geopandas, because we refer to where the pixel is located in the world using degrees. It is essentially an absolute reference system.\n",
- "\n",
- "In the ERA5 dataset, the resolution is said to be 0.25 degrees, which means that each pixel represents a square area of approximately 25 km x 25 km at the equator. So at every unit of 0.25 degrees north-south or east-west, we have a new pixel of data, with a value for temperature or dewpoint or whatever. You can physically see each of these on the plot.\n",
- "\n",
- "Learn more about ERA5's resolution [here](https://confluence.ecmwf.int/display/CKB/ERA5%3A+What+is+the+spatial+reference)."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now, in order to aggregate data spatially, we're pasting in a utility here for finding the intersecting values between our netcdf data and the polygons represented in our shapefile (ie the states, regions, etc.).\n",
- "\n",
- "Source: https://github.com/NSAPH-Data-Processing/air_pollution__aqdh/blob/main/utils/faster_zonal_stats.py"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "from tqdm import tqdm\n",
- "from math import ceil, floor\n",
- "\n",
- "from rasterstats.io import Raster\n",
- "from rasterstats.utils import boxify_points, rasterize_geom"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This function indexes each pixel and maps it to the polygon it falls within. A few notes about this function:\n",
- "\n",
- "- It uses the `rasterstats.io` library to read in a raster tiff file\n",
- "- It uses affine transformations to convert the pixel coordinates to geographic coordinates\n",
- "- It needs to know where there is no data in the raster file, so we need to set a `nodata` value\n",
- "- `all_touched` is a boolean that determines whether to include all pixels that touch the polygon or just the ones that are fully contained within it; this is a domain specific choice"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def polygon_to_raster_cells(\n",
- " vectors,\n",
- " raster,\n",
- " band=1,\n",
- " nodata=None,\n",
- " affine=None,\n",
- " all_touched=False,\n",
- " verbose=False,\n",
- " **kwargs,\n",
- "):\n",
- " \"\"\"Returns an index map for each vector geometry to indices in the raster source.\n",
- "\n",
- " Parameters\n",
- " ----------\n",
- " vectors: list of geometries\n",
- "\n",
- " raster: ndarray\n",
- "\n",
- " nodata: float\n",
- "\n",
- " affine: Affine instance\n",
- "\n",
- " all_touched: bool, optional\n",
- " Whether to include every raster cell touched by a geometry, or only\n",
- " those having a center point within the polygon.\n",
- " defaults to `False`\n",
- "\n",
- " Returns\n",
- " -------\n",
- " dict\n",
- " A dictionary mapping vector the ids of geometries to locations (indices) in the raster source.\n",
- " \"\"\"\n",
- "\n",
- " cell_map = []\n",
- "\n",
- " with Raster(raster, affine, nodata, band) as rast:\n",
- " # used later to crop raster and find start row and col\n",
- " min_lon, dlon = affine.c, affine.a\n",
- " max_lat, dlat = affine.f, -affine.e\n",
- " H, W = rast.shape\n",
- "\n",
- " for geom in tqdm(vectors, disable=(not verbose)):\n",
- " if \"Point\" in geom.geom_type:\n",
- " geom = boxify_points(geom, rast)\n",
- "\n",
- " # find geometry bounds to crop raster\n",
- " # the raster and geometry must be in the same lon/lat coordinate system\n",
- " start_row = max(0, min(H - 1, floor((max_lat - geom.bounds[3]) / dlat)))\n",
- " start_col = min(W - 1, max(0, floor((geom.bounds[0] - min_lon) / dlon)))\n",
- " end_col = max(0, min(W - 1, ceil((geom.bounds[2] - min_lon) / dlon)))\n",
- " end_row = min(H - 1, max(0, ceil((max_lat - geom.bounds[1]) / dlat)))\n",
- " geom_bounds = (\n",
- " min_lon + dlon * start_col, # left\n",
- " max_lat - dlat * end_row - 1e-12, # bottom\n",
- " min_lon + dlon * end_col + 1e-12, # right\n",
- " max_lat - dlat * start_row, # top\n",
- " )\n",
- "\n",
- " # crop raster to area of interest and rasterize\n",
- " fsrc = rast.read(bounds=geom_bounds)\n",
- " rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched)\n",
- " indices = np.nonzero(rv_array)\n",
- "\n",
- " if len(indices[0]) > 0:\n",
- " indices = (indices[0] + start_row, indices[1] + start_col)\n",
- " assert 0 <= indices[0].min() < rast.shape[0]\n",
- " assert 0 <= indices[1].min() < rast.shape[1]\n",
- " else:\n",
- " pass # stop here for debug\n",
- "\n",
- " cell_map.append(indices)\n",
- "\n",
- " return cell_map"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "So to implement this we need to first convert the netcdf to a tiff so that we can rasterize it to each of the polygons in the shapefile. We do this with `rioxarray`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import rioxarray as rxr"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "First, we pick our variable of interest, then we set the spatial properties to make sure it conforms to the CRS we wanted"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "temperature = daily_aggregated['t2m_mean']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "temp_set = temperature.rio.set_spatial_dims(x_dim=\"longitude\", y_dim=\"latitude\")\n",
- "temp_set = temp_set.rio.write_crs(\"EPSG:4326\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Write it out to tiff and read it back in (there's no way to do this in memory)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "temp_set.rio.to_raster(\"temp.tif\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now we can investigate the tiff and see that it has all the properties necessary for the function"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import rasterio\n",
- "\n",
- "src = rasterio.open(\"temp.tif\")\n",
- "raster = src.read(1) # Numpy array\n",
- "profile = src.profile # Metadata\n",
- "transform = src.transform"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "31"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# the number of data points\n",
- "src.count\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Affine(0.25, 0.0, 42.575,\n",
- " 0.0, -0.25000000000000006, -11.475)"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# the affine transformation matrix:\n",
- "# Pixel size (resolution in x and y).\n",
- "# Origin (top-left corner in spatial coordinates).\n",
- "# Rotation (if the raster is not north-up). \n",
- "src.transform"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# any missing data locations\n",
- "src.nodata"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "33 59\n"
- ]
- }
- ],
- "source": [
- "# the number of rows and columns\n",
- "print(src.width, src.height)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Fetch the array of data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "raster_array = src.read(1)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Function go brrrr"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- " 0%| | 0/6 [00:00, ?it/s]"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/n/home03/ttapera/.conda/envs/era5_sandbox/lib/python3.11/site-packages/rasterstats/io.py:335: NodataWarning: Setting nodata to -999; specify nodata explicitly\n",
- " warnings.warn(\n",
- "100%|██████████| 6/6 [00:00<00:00, 37.66it/s]\n"
- ]
- }
- ],
- "source": [
- "res_poly2cell=polygon_to_raster_cells(\n",
- " vectors = shape.geometry.values, # the geometries of the shapefile of the regions\n",
- " raster=raster_array, # the raster data above\n",
- " band=1, # the value of the day that we're using\n",
- " nodata=src.nodata, # any intersections with no data, may have to be np.nan\n",
- " affine=src.transform, # some math thing need to revise\n",
- " all_touched=True, \n",
- " verbose=True\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The data below maps which grid entries fall into each of the regions in the shapefile (e.g. which pixel is in which state)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[(array([24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26,\n",
- " 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28,\n",
- " 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29,\n",
- " 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31,\n",
- " 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,\n",
- " 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34,\n",
- " 35, 35, 35, 35]),\n",
- " array([16, 13, 14, 15, 16, 17, 18, 19, 20, 21, 12, 13, 14, 15, 16, 17, 18,\n",
- " 19, 20, 21, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 11, 12, 13,\n",
- " 14, 15, 16, 17, 18, 19, 20, 21, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n",
- " 20, 21, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 12, 13, 14, 15,\n",
- " 16, 17, 18, 19, 20, 21, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 12,\n",
- " 13, 14, 15, 16, 17, 18, 19, 20, 21, 13, 14, 15, 16, 17, 18, 19, 20,\n",
- " 14, 15, 16, 17])),\n",
- " (array([ 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6,\n",
- " 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,\n",
- " 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10,\n",
- " 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,\n",
- " 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15,\n",
- " 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19]),\n",
- " array([26, 24, 25, 26, 27, 24, 25, 26, 27, 24, 25, 26, 27, 28, 23, 24, 25,\n",
- " 26, 27, 28, 29, 22, 23, 24, 25, 26, 27, 28, 29, 22, 23, 24, 25, 26,\n",
- " 27, 28, 29, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 21, 22, 23,\n",
- " 24, 25, 26, 27, 28, 29, 30, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,\n",
- " 24, 25, 26, 27, 28, 29, 30, 27, 28, 29, 30, 27, 28, 29, 30, 28, 29,\n",
- " 30, 31, 29, 30, 31, 29, 30, 31, 29, 30, 31, 30])),\n",
- " (array([33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35,\n",
- " 35, 35, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 36, 36,\n",
- " 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 38,\n",
- " 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39,\n",
- " 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,\n",
- " 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42,\n",
- " 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 43,\n",
- " 43, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,\n",
- " 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46,\n",
- " 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 48, 48, 48,\n",
- " 48, 48, 48, 49, 49, 49, 49, 49, 49, 50, 50, 50, 50, 51]),\n",
- " array([12, 13, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 12, 13, 14, 15, 16,\n",
- " 17, 18, 19, 20, 21, 22, 23, 24, 12, 13, 14, 15, 16, 17, 18, 19, 20,\n",
- " 21, 22, 23, 24, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 12,\n",
- " 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 11, 12, 13, 14, 15, 16,\n",
- " 17, 18, 19, 20, 21, 22, 23, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,\n",
- " 21, 22, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 10, 11,\n",
- " 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 10, 11, 12, 13, 14, 15,\n",
- " 16, 17, 18, 19, 20, 21, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n",
- " 20, 21, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 9, 10,\n",
- " 11, 14, 15, 16, 17, 18, 19, 20, 15, 16, 17, 18, 19, 20, 15, 16, 17,\n",
- " 18, 19, 20, 15, 16, 17, 18, 19, 20, 16, 17, 18, 19, 17])),\n",
- " (array([10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,\n",
- " 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14,\n",
- " 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16,\n",
- " 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17,\n",
- " 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,\n",
- " 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,\n",
- " 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20,\n",
- " 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21,\n",
- " 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22,\n",
- " 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23,\n",
- " 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24,\n",
- " 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25,\n",
- " 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26,\n",
- " 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27,\n",
- " 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28,\n",
- " 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 32, 32,\n",
- " 32]),\n",
- " array([20, 21, 22, 20, 21, 22, 23, 24, 25, 26, 20, 21, 22, 23, 24, 25, 26,\n",
- " 27, 19, 20, 21, 22, 23, 24, 25, 26, 27, 18, 19, 20, 21, 22, 23, 24,\n",
- " 25, 26, 27, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 15, 16, 17,\n",
- " 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 13, 14, 15, 16, 17, 18, 19,\n",
- " 20, 21, 22, 23, 24, 25, 26, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n",
- " 20, 21, 22, 23, 24, 25, 26, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n",
- " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 5, 7, 8, 9, 10, 11, 12,\n",
- " 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 7, 8, 9,\n",
- " 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 6, 7,\n",
- " 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 6, 7, 8,\n",
- " 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 5, 6, 7,\n",
- " 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 4, 5,\n",
- " 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 5,\n",
- " 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 5,\n",
- " 6, 7, 8, 9, 10, 11, 12, 13, 4, 5, 6, 7, 8, 9, 10, 11, 12,\n",
- " 4, 5, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 10, 8, 9,\n",
- " 10])),\n",
- " (array([13, 13, 14, 14, 14, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 17, 17,\n",
- " 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20,\n",
- " 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22,\n",
- " 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 24, 24,\n",
- " 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26,\n",
- " 26, 26, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 29, 29,\n",
- " 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 32, 32,\n",
- " 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35,\n",
- " 35, 35]),\n",
- " array([27, 28, 27, 28, 29, 25, 26, 27, 28, 29, 25, 26, 27, 28, 29, 25, 26,\n",
- " 27, 28, 29, 26, 27, 28, 29, 21, 22, 23, 24, 26, 27, 28, 29, 21, 22,\n",
- " 23, 24, 25, 26, 27, 28, 29, 21, 22, 23, 24, 25, 26, 27, 28, 29, 21,\n",
- " 22, 23, 24, 25, 26, 27, 28, 29, 21, 22, 23, 24, 25, 26, 27, 21, 22,\n",
- " 23, 24, 25, 26, 27, 21, 22, 23, 24, 25, 26, 27, 21, 22, 23, 24, 25,\n",
- " 26, 27, 21, 22, 23, 24, 25, 26, 27, 21, 22, 23, 24, 25, 26, 21, 22,\n",
- " 23, 24, 25, 26, 21, 22, 23, 24, 25, 26, 21, 22, 23, 24, 25, 21, 22,\n",
- " 23, 24, 25, 20, 21, 22, 23, 24, 25, 20, 21, 22, 23, 24, 20, 21, 22,\n",
- " 23, 24])),\n",
- " (array([26, 26, 27, 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 30, 30, 30,\n",
- " 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32,\n",
- " 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 35, 35,\n",
- " 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 36, 36, 37, 37,\n",
- " 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 38, 39,\n",
- " 39, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40,\n",
- " 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42,\n",
- " 42, 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44,\n",
- " 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,\n",
- " 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47,\n",
- " 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, 48, 48,\n",
- " 48, 48, 48, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49,\n",
- " 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 51,\n",
- " 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 52, 52,\n",
- " 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 53, 53, 53, 53,\n",
- " 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 54, 54, 54,\n",
- " 54, 54, 54, 54, 54, 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 55, 56,\n",
- " 56, 56, 56]),\n",
- " array([ 9, 10, 8, 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 6, 7, 8,\n",
- " 9, 10, 11, 12, 6, 7, 8, 9, 10, 11, 12, 13, 7, 8, 9, 10, 11,\n",
- " 12, 13, 7, 8, 9, 10, 11, 12, 6, 7, 8, 9, 10, 11, 12, 6, 7,\n",
- " 8, 9, 10, 11, 12, 13, 5, 6, 7, 8, 9, 10, 11, 12, 13, 4, 5,\n",
- " 6, 7, 8, 9, 10, 11, 12, 4, 5, 6, 7, 8, 9, 10, 11, 12, 3,\n",
- " 4, 5, 6, 7, 8, 9, 10, 11, 12, 3, 4, 5, 6, 7, 8, 9, 10,\n",
- " 11, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 2, 3, 4, 5, 6, 7,\n",
- " 8, 9, 10, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2, 3, 4, 5, 6,\n",
- " 7, 8, 9, 10, 12, 13, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14,\n",
- " 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4, 5, 6, 7,\n",
- " 8, 9, 10, 11, 12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 12,\n",
- " 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18,\n",
- " 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 4,\n",
- " 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 4, 5,\n",
- " 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 5, 6, 7, 8,\n",
- " 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 5, 6, 7, 8, 9, 10, 11,\n",
- " 12, 13, 14, 15, 16, 17, 18, 6, 7, 8, 9, 10, 11, 12, 13, 14, 9,\n",
- " 10, 11, 12]))]"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "res_poly2cell"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "6"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "len(res_poly2cell)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Look familiar?\n",
- "\n",
- "These are the 6 states in the shapefile. The values in the array are the indexes of the pixels in the netcdf file that fall within the polygon.\n",
- "Now, within each of these we can aggregate mathematically eg min max mean etc."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([[300.87808, 300.36603, 299.68634, ..., 300.49612, 300.53604,\n",
- " 300.56213],\n",
- " [300.89908, 300.4085 , 299.88904, ..., 300.50095, 300.53467,\n",
- " 300.54422],\n",
- " [300.9091 , 300.75906, 300.43503, ..., 300.51334, 300.5268 ,\n",
- " 300.52884],\n",
- " ...,\n",
- " [299.65125, 299.639 , 299.6254 , ..., 298.7499 , 298.93378,\n",
- " 299.17337],\n",
- " [299.4636 , 299.4493 , 299.42276, ..., 298.63046, 298.7523 ,\n",
- " 298.9044 ],\n",
- " [299.2504 , 299.22964, 299.1964 , ..., 298.59427, 298.63202,\n",
- " 298.73044]], shape=(59, 33), dtype=float32)"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# the values themselves\n",
- "raster_array"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "stats = []\n",
- "for indices in res_poly2cell:\n",
- " if len(indices[0]) == 0:\n",
- " # no cells found for this polygon\n",
- " stats.append(np.nan)\n",
- " else:\n",
- " cells = raster[indices]\n",
- " if sum(~np.isnan(cells)) == 0:\n",
- " # no valid cells found for this polygon\n",
- " stats.append(np.nan)\n",
- " continue\n",
- " else:\n",
- " # compute MEAN of valid cells\n",
- " # but this stat can be ANYTHING\n",
- " stats.append(np.nanmean(cells))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[np.float32(296.20612),\n",
- " np.float32(298.52927),\n",
- " np.float32(298.51355),\n",
- " np.float32(299.2492),\n",
- " np.float32(296.9399),\n",
- " np.float32(301.9361)]"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "stats"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Looks like it worked!"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "# now we plot it using the shape.geometry to get the shapefile's location for each region\n",
- "gdf = gpd.GeoDataFrame(df, geometry=healthsheds.geometry.values, crs=shape.crs)\n",
- "gdf.plot(column=\"temp_vals\", legend=True)\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now that we've demonstrated how this could work, we can substitute the GADM shapefiles for our healthsheds, and put it in a pipeline!!!"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Nepal\n",
- "\n",
- "We've modified the pipeline to now download Nepal as well. We'll test out an aggregation using the aggregation shapefiles we were provided by Dimeji. We probably want to decide on where to centralize data storage for files like this"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "try: from era5_sandbox.core import GoogleDriver, _get_callable, describe\n",
- "except: from core import GoogleDriver, _get_callable, describe\n",
- "\n",
- "try: from era5_sandbox.download import download_raw_era5\n",
- "except: from download import download_raw_era5\n",
- "\n",
- "try: from era5_sandbox.aggregate import resample_netcdf, netcdf_to_tiff, polygon_to_raster_cells, aggregate_to_healthsheds\n",
- "except: from aggregate import resample_netcdf, netcdf_to_tiff, polygon_to_raster_cells, aggregate_to_healthsheds"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-04-22 15:40:17,129 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.\n",
- "2025-04-22 15:40:17,130 WARNING [2024-06-16T00:00:00] CDS API syntax is changed and some keys or parameter names may have also changed. To avoid requests failing, please use the \"Show API request code\" tool on the dataset Download Form to check you are using the correct syntax for your API request.\n",
- "2025-04-22 15:40:21,113 INFO Request ID is ef1937bd-855a-4651-af70-550b72415172\n",
- "2025-04-22 15:40:21,266 INFO status has been updated to accepted\n",
- "2025-04-22 15:40:35,703 INFO status has been updated to successful\n",
- " "
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Done\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\r"
- ]
- }
- ],
- "source": [
- "from hydra import initialize, compose\n",
- "from omegaconf import OmegaConf\n",
- "\n",
- "# unfortunately, we have to use the initialize function to load the config file\n",
- "# this is because the @hydra decorator does not work with Notebooks very well\n",
- "# this is a known issue with Hydra: https://gist.github.com/bdsaglam/586704a98336a0cf0a65a6e7c247d248\n",
- "# \n",
- "# just use the relative path from the notebook to the config dir\n",
- "with initialize(version_base=None, config_path=\"../../conf\"):\n",
- " cfg = compose(config_name='config.yaml')\n",
- "\n",
- "cfg.development_mode = False\n",
- "cfg.query['year'] = 2023\n",
- "cfg.query['month'] = 10\n",
- "cfg.query['day'] = 1\n",
- "cfg.query['time'] = \"00:00\"\n",
- "cfg.query['geography'] = \"nepal\"\n",
- "download_raw_era5(cfg)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now let's read it in and run the aggregation:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Load the NetCDF file\n",
- "fpath = here() / \"data/input/nepal_2023_10.nc\"\n",
- "ds = xr.open_dataset(fpath)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
"
- ],
- "text/plain": [
- " healthshed mean_temperature \\\n",
- "0 1 292.330078 \n",
- "1 7 286.162109 \n",
- "2 8 282.460938 \n",
- "3 23 293.310547 \n",
- "4 24 295.396484 \n",
- "\n",
- " geometry \n",
- "0 POLYGON ((87.60719 27.37069, 87.60841 27.36969... \n",
- "1 POLYGON ((88.04438 27.4203, 88.04365 27.41925,... \n",
- "2 POLYGON ((88.14528 27.67003, 88.14526 27.66966... \n",
- "3 POLYGON ((88.0766 27.03545, 88.07695 27.03533,... \n",
- "4 POLYGON ((87.76435 26.92431, 87.76435 26.924, ... "
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "result = aggregate_to_healthsheds(\n",
- " res_poly2cell=res_poly2cell,\n",
- " raster=resampled_tiff,\n",
- " shapes=healthsheds,\n",
- " names_column=\"fid\",\n",
- " aggregation_func=np.nanmean,\n",
- " aggregation_name=\"mean_temperature\"\n",
- ")\n",
- "result.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "result.plot(column=\"mean_temperature\", legend=True)\n",
- "plt.title(\"Mean Temperature (K) by Health Shed October 2023\")\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This should work by slotting right into the pipeline, only changing the function for the names column"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "python3",
- "language": "python",
- "name": "python3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notes/sidebar.yml b/notes/sidebar.yml
index c80eda4..caf3166 100644
--- a/notes/sidebar.yml
+++ b/notes/sidebar.yml
@@ -2,5 +2,15 @@ website:
sidebar:
contents:
- index.ipynb
+ - section: "Snakemake Modules"
- 00_core.ipynb
- 01_download_raw_data.ipynb
+ - 02_aggregate.ipynb
+ - 03_publish.ipynb
+ - section: "PyTask Modules"
+ - 20_pytask_config.ipynb
+ - 20_pytask_logger.ipynb
+ - 21_pytask_download.ipynb
+ - 22_pytask_aggregate.ipynb
+ - section: "PyTask Demo"
+ - 10_pytask_demo.ipynb
diff --git a/notes_qmd/00_core.qmd b/notes_qmd/00_core.qmd
new file mode 100644
index 0000000..8159d54
--- /dev/null
+++ b/notes_qmd/00_core.qmd
@@ -0,0 +1,574 @@
+---
+title: "Core Module: Internal functions and testing"
+exec_all: true
+---
+
+## core
+
+> This is a core library for the ERA5 dataset pipeline. It defines a few helpful functions such as an API tester to test your API key and connection.
+
+
+```{python}
+#| default_exp core:
+#
+```
+
+```{python}
+#| hide:
+#
+from nbdev.showdoc import *
+```
+
+```{python}
+#| exports:
+#
+import os
+import cdsapi
+import hydra
+import json
+import tempfile
+import argparse
+import zipfile
+import shutil
+import geopandas as gpd
+from pathlib import Path
+from pydrive2.auth import GoogleAuth
+from pydrive2.drive import GoogleDrive
+from omegaconf import DictConfig, OmegaConf
+from pyprojroot import here
+from importlib import import_module
+```
+
+## Utilities
+
+Some utilities are provided to help you with the ERA5 dataset.
+
+```{python}
+#| exports:
+#
+def describe(
+ cfg: DictConfig=None, # Configuration file
+ )-> None:
+ "Describe the configuration file used by Hydra for the pipeline"
+
+ if cfg is None:
+ print("No configuration file provided. Generating default configuration file.")
+ cfg = OmegaConf.create()
+
+ print("This package fetches ERA5 data. The following is the config file used by Hydra for the pipeline:\n")
+ print(OmegaConf.to_yaml(cfg))
+```
+
+```{python}
+#| exporti:
+#
+def _expand_path(
+ path: str # Path on user's machine
+ )-> str: # Expanded path
+ "Expand the path on the user's machine for cross compatibility"
+
+ # Expand ~ to the user's home directory
+ path = os.path.expanduser(path)
+ # Expand environment variables
+ path = os.path.expandvars(path)
+ # Convert to absolute path
+ path = os.path.abspath(path)
+ return path
+```
+
+```{python}
+#| exporti:
+#
+def _get_callable(func_path):
+ """Dynamically import a callable from a string path."""
+ module_name, func_name = func_path.rsplit(".", 1)
+ module = import_module(module_name)
+ return getattr(module, func_name)
+```
+
+```{python}
+#| exporti:
+# a directory structure creator
+def _create_directory_structure(
+ base_path: str, # The base directory where the structure will be created
+ structure: dict # A dictionary representing the directory structure
+ )->None:
+ """
+ Recursively creates a directory structure from a dictionary.
+ """
+ for folder, substructure in structure.items():
+ # Create the current directory
+ current_path = os.path.join(base_path, folder)
+ os.makedirs(current_path, exist_ok=True)
+
+ # Recursively create subdirectories if substructure is a dictionary
+ if isinstance(substructure, dict):
+ _create_directory_structure(current_path, substructure)
+```
+
+In addition, we've defined 3 private functions to help with path expansion `_expand_path`, dynamic function importing `_get_callable`, and directory structure creation `_create_directory_structure`.
+
+### A Simple Temperature Conversion Function
+
+```{python}
+#| export:
+#
+def kelvin_to_celsius(
+ kelvin: float # Temperature in Kelvin
+ ) -> float: # Temperature in Celsius
+ """
+ Convert temperature from Kelvin to Celsius.
+ """
+ return kelvin - 273.15
+```
+
+### A Class for Authenticating Google Drive
+
+We're going to use a class to authenticate and interact with google drive. The goal is to have a simple interface to fetch the healthshed files dynamically from google drive in the pipeline.
+
+::: {.callout-important}
+This class was implemented when all of our data
+was stored on a private Google Drive. Since we
+have moved all of our data to FASRC, this will
+likely be deprecated in the near future.
+:::
+
+```{python}
+#| export:
+#
+class GoogleDriver:
+ """
+ A class to handle Google Drive authentication and file management.
+ This class uses the PyDrive2 library to authenticate with Google Drive using a service account.
+
+ It provides three methods: authenticating the account, getting the drive object, and downloading the healthshed files for madagascar.
+ """
+ def __init__(self, json_key_path=None):
+ self.json_key_path = json_key_path or os.getenv("GOOGLE_DRIVE_AUTH_JSON")
+ if not self.json_key_path or not os.path.isfile(self.json_key_path):
+ raise FileNotFoundError(f"Service account key file not found: {self.json_key_path}")
+ self.drive = self._authenticate()
+
+ def _authenticate(self):
+
+ settings = {
+ "client_config_backend": "service",
+ "service_config": {
+ "client_json_file_path": self.json_key_path
+ }
+ }
+ gauth = GoogleAuth(settings=settings)
+
+ gauth.ServiceAuth()
+
+ return GoogleDrive(gauth)
+
+ def get_drive(self):
+ return self.drive
+```
+
+Here's how we use it. The credentials for the data-pipeline service account are
+available in the sandbox folder, and the path to said folder is set in the config:
+
+```{python}
+from hydra import initialize, compose
+from omegaconf import OmegaConf
+```
+
+```{python}
+# unfortunately, we have to use the initialize function to load the config file
+# this is because the @hydra decorator does not work with Notebooks very well
+# this is a known issue with Hydra: https://gist.github.com/bdsaglam/586704a98336a0cf0a65a6e7c247d248
+#
+# just use the relative path from the notebook to the config dir
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+```
+
+::: {.callout-important}
+If we continue with `pytask`, we will not need to
+use hydra at all, and so the above strategy
+may get deprecated.
+:::
+
+```{python}
+auth = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)
+drive = auth.get_drive()
+```
+
+Here's how we might check that the healthsheds are accessible in the drive:
+
+```{python}
+# we're using the madagascar healthshed folder as an example
+folder_id = cfg.geographies.madagascar.healthsheds
+folder_name = "healthsheds2022.zip"
+file_list = drive.ListFile({'q': f" title='{folder_name}' and trashed = false "}).GetList()
+
+for file in file_list:
+ print(f"{file['title']} - {file['mimeType']}")
+```
+
+That being said, we can read in the healthsheds into geopandas by downloading them to a temp directory. The healthsheds must be a zipped shapefiles package with the files at the root of the zip directory.
+
+```{python}
+with tempfile.TemporaryDirectory() as temp_dir:
+ # Create a temporary directory to store the downloaded file
+ zip_path = os.path.join(temp_dir, folder_name)
+
+ # Download file from Google Drive
+ file_obj = drive.CreateFile({'id': file_list[0]['id']})
+ file_obj.GetContentFile(zip_path)
+
+ # Read shapefile directly from ZIP
+ gdf = gpd.read_file(f"zip://{zip_path}")
+```
+
+That works! So now we can patch the class to include this workflow:
+
+```{python}
+#| export:
+#
+from fastcore.basics import patch
+```
+
+```{python}
+#| export:
+#
+@patch
+def read_healthsheds(self:GoogleDriver, healthshed_zip_name):
+
+ file_list = self.drive.ListFile({'q': f" title='{healthshed_zip_name}' and trashed = false "}).GetList()
+
+ with tempfile.TemporaryDirectory() as temp_dir:
+ # Create a temporary directory to store the downloaded file
+ zip_path = os.path.join(temp_dir, healthshed_zip_name)
+
+ # Download file from Google Drive
+ file_obj = self.drive.CreateFile({'id': file_list[0]['id']})
+ file_obj.GetContentFile(zip_path)
+
+ # Read shapefile directly from ZIP
+ gdf = gpd.read_file(f"zip://{zip_path}")
+
+ # we need to ensure that the healthsheds only contain valid polygons
+ gdf = gdf[gdf.geometry.notnull()]
+ gdf.reset_index(drop=True, inplace=True)
+
+ return gdf
+```
+
+And to check that it works:
+
+```{python}
+driver = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)
+drive = driver.get_drive()
+healthsheds = driver.read_healthsheds("healthsheds2022.zip")
+
+healthsheds.describe()
+```
+
+## CDS File Handler Type
+
+::: {.callout-important}
+This section may also be deprecated. Since adding `swvl1` to the pipeline, we have not needed to use this class. We leave it here for now for reference.
+:::
+
+We're going to make a file handler type to help deal with CDS files. This is to fix [NSAPH-Data-Processing/era5_sandbox#13](https://github.com/NSAPH-Data-Processing/era5_sandbox/issues/13).
+
+Usually, when you download data, it comes out as a simple .nc file that can be opened with xarray. However, the CDS API has a few different file types that are not .nc files. For example, the ERA5 data is stored in a .grib file format. This is a common format for meteorological data, and it is used by the ECMWF. When a query has multiple variables, sometimes they are downloaded as a .zip file to separat the grib from the netcdf.
+
+So, below, we define a class that can handle the file no matter what the type is. It will check the file type and then use the appropriate method to open it. The class will also have a method to check if the file is a .zip file, and if so, it will unzip it and return the path to the unzipped file.
+
+```{python}
+#| export:
+#
+class ClimateDataFileHandler:
+ """
+ A class to handle file operations for the Climate Data Store (CDS).
+ This class provides unpack files downloaded from the CDS API. It must be able to
+ handle the unpacking of files downloaded from the CDS API. This means that
+ if the file is a basic netcdf, it should be passed to the netcdf handler. If
+ the file is a zip, it should be handled by the zip handler in temp and the
+ data returned as required.
+ """
+
+ def __init__(self, input_path: str):
+ """
+ Core initialization. It requires only the path. The flags are
+ then used to do the handling logic.
+ """
+ self.original_path = Path(input_path)
+
+ # major flag here for logic
+ self.is_zip = False
+
+ # the unzipping directory
+ self.unzipped_dir = None
+
+ # the instant data, such as temperature
+ self.instant_file = None
+
+ # the cumulative data, such as precipitation
+ self.accum_file = None
+
+ # any extraneous data
+ self.other_files = []
+
+ # ready to be used
+ self._prepared = False
+
+ def prepare(self):
+ """
+ Inspect the file and prepare the appropriate NetCDF paths.
+ """
+ if self._prepared:
+ return
+
+ if not self.original_path.exists():
+ raise FileNotFoundError(f"{self.original_path} does not exist")
+
+ # Detect ZIP by magic number
+ # chatgpt implementation here; this is a common way to check for zip files
+ with open(self.original_path, "rb") as f:
+ sig = f.read(4)
+ self.is_zip = sig == b'PK\x03\x04'
+
+ if self.is_zip:
+ self._unzip_and_scan()
+ else:
+ self.instant_file = str(self.original_path)
+
+ self._prepared = True
+
+ def _unzip_and_scan(self):
+ """Extract and identify stepType-specific NetCDFs from ZIP."""
+ self.unzipped_dir = tempfile.TemporaryDirectory()
+ with zipfile.ZipFile(self.original_path, 'r') as zip_ref:
+ zip_ref.extractall(self.unzipped_dir.name)
+
+ for f in Path(self.unzipped_dir.name).glob("*.nc"):
+ if "stepType-instant" in f.name:
+ self.instant_file = str(f)
+ elif "stepType-accum" in f.name:
+ self.accum_file = str(f)
+ else:
+ self.other_files.append(str(f))
+
+ def get_dataset(self, type: str = "instant") -> str:
+ """Get the appropriate dataset path ('instant' or 'accum')."""
+ self.prepare()
+
+ if type == "instant" and self.instant_file:
+ return self.instant_file
+ elif type == "accum" and self.accum_file:
+ return self.accum_file
+ elif type == "any":
+ return self.instant_file or self.accum_file or (self.other_files[0] if self.other_files else None)
+ else:
+ raise ValueError(f"No file found for requested type '{type}'")
+
+ def cleanup(self):
+ """Clean up any temporary directories created during unzip."""
+ if self.unzipped_dir is not None:
+ self.unzipped_dir.cleanup()
+```
+
+```{python}
+import xarray as xr
+from fastcore.test import test_fail
+```
+
+```{python}
+
+eg_file = here() / "bld/2019_5_madagascar.nc"
+
+# this fails because the nc file downloaded has grib and netcdf in it, so
+# xr cannot handle it.
+def wont_work(multilayer_file):
+
+ ds = xr.open_dataset(multilayer_file)
+
+test_fail(
+ wont_work,
+ args=(eg_file)
+)
+
+# equivalent to saying try: wont_work(eg_file) Except: some error handling
+```
+
+The above fails because the download contains temperature and precipitation data, which get encoded silently as different formats. Even though it is one file, it contains both grib and netcdf data and is encoded as a .zip file. So we use the class to read it instead:
+
+```{python}
+handler = ClimateDataFileHandler(eg_file)
+handler.prepare()
+ds1 = xr.open_dataset(handler.get_dataset("instant"))
+#ds2 = xr.open_dataset(handler.get_dataset("accum"))
+```
+
+::: {.callout-important}
+The above line for `ds2` is commented out because the example file does not separate accumulation data.
+:::
+
+```{python}
+ds1
+```
+
+```{python}
+#ds2
+```
+
+```{python}
+handler.cleanup()
+```
+
+Great! Let's add a context handler and this can be added to the pipeline,
+so that with the entry and exit methods, we can now use the class in a `with` statement:
+
+
+```{python}
+#| exporti:
+#
+@patch
+def __enter__(self:ClimateDataFileHandler):
+ self.prepare()
+ return self
+
+@patch
+def __exit__(self:ClimateDataFileHandler, exc_type, exc_val, exc_tb):
+ self.cleanup()
+```
+
+
+```{python}
+with ClimateDataFileHandler(eg_file) as handler:
+ ds1 = xr.open_dataset(handler.get_dataset("instant"))
+ #ds2 = xr.open_dataset(handler.get_dataset("accum"))
+
+ print(ds1)
+ #print(ds2)
+```
+
+## Tests and Main
+
+In `nbdev`, our tests are embedded in the notebook. Whenever you export the notebook, all the cells that are specified to run are run, and hence, the tests are executed. The tests are also exported. This is a great way to ensure that your documentation is always up-to-date. For this module, we're using the `testAPI()` function as our main test.
+
+```{python}
+#| exports:
+#
+def testAPI(
+ cfg: DictConfig=None,
+ dataset:str="reanalysis-era5-pressure-levels"
+ )-> bool:
+
+ # parse config
+ testing=cfg.development_mode
+ output_path=here("data") / "testing"
+
+ print(OmegaConf.to_yaml(cfg))
+
+ try:
+ client = cdsapi.Client()
+
+ # build request
+ request = {
+ 'product_type': ['reanalysis'],
+ 'variable': ['geopotential'],
+ 'year': ['2024'],
+ 'month': ['03'],
+ 'day': ['01'],
+ 'time': ['13:00'],
+ 'pressure_level': ['1000'],
+ 'data_format': 'grib',
+ }
+
+ target = output_path / 'test_download.grib'
+
+ print("Testing API connection by downloading a dummy dataset to {}...".format(output_path))
+
+ client.retrieve(dataset, request, target)
+
+ if not testing:
+ os.remove(target)
+
+ print("API connection test successful.")
+ return True
+
+ except Exception as e:
+ print("API connection test failed.")
+ print("Did you set up your API key with CDS? If not, please visit https://cds.climate.copernicus.eu/how-to-api#install-the-cds-api-client")
+ print("Error: {}".format(e))
+ return False
+```
+
+We can see that this API tester tool works with Hydra configuration:
+
+```{python}
+from hydra import initialize, compose
+from omegaconf import OmegaConf
+```
+
+```{python}
+# unfortunately, we have to use the initialize function to load the config file
+# this is because the @hydra decorator does not work with Notebooks very well
+# this is a known issue with Hydra: https://gist.github.com/bdsaglam/586704a98336a0cf0a65a6e7c247d248
+#
+# just use the relative path from the notebook to the config dir
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+
+describe(cfg)
+```
+
+### Importing the Main Function
+
+::: {.callout-important}
+As mentioned, if we continue with `pytask`, we will not need to use hydra at all, and so the main function
+may get deprecated as `pytask` will handle the pipeline execution without `__main__` scripts.
+:::
+
+Important: using `__main__` in nbdev and Hydra is a little bit tricky. We need to define the main function in the module ONLY ONCE and then when we export the notebook to script, we need to add the `nbdev.imports.IN_NOTEBOOK` variable. This way, the main function will only be executed when we run the notebook and not when we import the module.
+
+```python
+from nbdev.imports import IN_NOTEBOOK
+```
+
+You'll see this listed throughout the notebooks.
+
+```{python}
+#| exports:
+#
+@hydra.main(version_base=None, config_path="../../conf", config_name="config")
+def main(cfg: DictConfig) -> None:
+
+ # Create the directory structure
+ _create_directory_structure(here() / "data", cfg.datapaths)
+
+ # test the api
+ testAPI(cfg=cfg)
+```
+
+```{python}
+#| export: null
+#| eval: false
+try: from nbdev.imports import IN_NOTEBOOK
+except: IN_NOTEBOOK=False
+
+if __name__ == "__main__" and not IN_NOTEBOOK:
+ main()
+```
+
+```{python}
+#| hide:
+#
+import nbdev; nbdev.nbdev_export()
+```
\ No newline at end of file
diff --git a/notes_qmd/01_download_raw_data.qmd b/notes_qmd/01_download_raw_data.qmd
new file mode 100644
index 0000000..f55e2ab
--- /dev/null
+++ b/notes_qmd/01_download_raw_data.qmd
@@ -0,0 +1,259 @@
+---
+title: "Download Module: Downloading Raw Data from CDSAPI"
+engine: jupyter
+---
+
+## download
+
+> This module downloads the raw data from CDS and saves it in the local directory
+
+```{python}
+#| default_exp download:
+#
+```
+
+```{python}
+#| hide:
+#
+from nbdev.showdoc import *
+```
+
+We use a similar approach to the one in the tutorial to download the data
+to local storage.
+
+```{python}
+#| export:
+#
+import os
+import hydra
+import cdsapi
+import tempfile
+import zipfile
+import requests
+import geopandas as gpd
+from pathlib import Path
+from pyprojroot import here
+from shapely.geometry import box
+from omegaconf import DictConfig, ListConfig, OmegaConf
+
+try: from era5_sandbox.core import _expand_path
+except: from core import _expand_path
+```
+
+```{python}
+#| exporti:
+#
+def _validate_query(
+ query_body: DictConfig
+ )->bool:
+ '''
+ Check that the query is valid
+ ###TODO Not a good idea to overwrite components of the query body because the user may believe something and the function may give somehting else back
+ Better to just tell them something is wrong
+ '''
+
+ required_keys = ['product_type', 'variable', 'year', 'month', 'day', 'time', 'area', 'data_format', 'download_format']
+ if not all([key in query_body.keys() for key in required_keys]):
+ print(f"Missing required key in query. Required keys are {required_keys}")
+ print("Query validation failed")
+ raise ValueError("Invalid query")
+
+ if isinstance(query_body['year'], ListConfig):
+ query_body['year'] = [str(x).zfill(2) for x in query_body['year']]
+ else:
+ query_body['year'] = str(query_body['year'])
+ if isinstance(query_body['month'], ListConfig):
+ query_body['month'] = [str(x).zfill(2) for x in query_body['month']]
+ else:
+ query_body['month'] = str(query_body['month']).zfill(2)
+
+ if isinstance(query_body['day'], ListConfig):
+ query_body['day'] = [str(x).zfill(2) for x in query_body['day']]
+ else:
+ query_body['day'] = str(query_body['day']).zfill(2)
+
+ return OmegaConf.to_container(query_body, resolve=True)
+```
+
+The background functionality in this module involves downloading the
+bounding box of a region of interest, and sending that to the
+CDS API query. As such, we define two helper functions to
+fetch the OCHA/HDX shapefiles for a geographic region, and
+another to create the bounding box from the files.
+
+```{python}
+#| export:
+#
+def fetch_GADM(
+ url: str="https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/gadm41_MDG.gpkg", # URL to fetch the GADM data for Madagascar
+ output_file: str="gadm41_MDG.gpkg" # file path to save the GADM data
+ )-> str:
+ '''
+ Fetch the GADM bounding box for geographic region
+ '''
+
+ output_file_path = _expand_path(output_file)
+ if os.path.exists(output_file_path):
+ print("GADM data already exists")
+ return output_file_path
+
+ print("Fetching GADM bounding box data for region")
+ os.system("curl --output {} {}".format(output_file, url))
+ print("GADM data fetched")
+
+ return output_file_path
+```
+
+```{python}
+#| exports:
+#
+def create_bounding_box(
+ zip_url_or_path: str, # URL or local path to the zipped shapefile.
+ buffer_km: float = 50, # Buffer distance in kilometers to expand the bounding box.
+ round_to: int = 1 # Number of decimal places to round the bounding box coordinates.
+) -> list: # Bounding box in the CDS API area format [North, West, South, East]
+ '''
+ Create a bounding box from OCHA/HDX shapefile data with a buffer.
+ '''
+ with tempfile.TemporaryDirectory() as tmpdir:
+ # Download if it's a URL
+ if zip_url_or_path.startswith("http"):
+ response = requests.get(zip_url_or_path)
+ zip_path = os.path.join(tmpdir, "ocha_data.zip")
+ with open(zip_path, "wb") as f:
+ f.write(response.content)
+ else:
+ zip_path = zip_url_or_path
+
+ # Unzip
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+ zip_ref.extractall(tmpdir)
+
+ # Find the .shp file
+ shp_files = list(Path(tmpdir).rglob("*.shp"))
+ if not shp_files:
+ raise FileNotFoundError("No shapefile (.shp) found in the extracted archive.")
+ shp_path = str(shp_files[0]) # Use first found .shp
+
+ # Read shapefile
+ shape = gpd.read_file(shp_path)
+
+ # Reproject to projected CRS (you may want to detect the correct UTM zone)
+ shape_proj = shape.to_crs(epsg=32738)
+
+ # Apply buffer
+ buffered = shape_proj.geometry.buffer(buffer_km * 1000)
+
+ # Convert back to geographic coordinates
+ buffered_geo = gpd.GeoSeries(buffered, crs=shape_proj.crs).to_crs(epsg=4326)
+
+ # Get bounding box
+ bounds = buffered_geo.total_bounds # [min_x, min_y, max_x, max_y]
+ bbox = [
+ round(bounds[3], round_to), # North
+ round(bounds[0], round_to), # West
+ round(bounds[1], round_to), # South
+ round(bounds[2], round_to) # East
+ ]
+
+ return bbox
+```
+
+The primary function to download the data from CDSAPI is defined below.
+
+```{python}
+#| exports:
+#
+def download_raw_era5(
+ cfg: DictConfig # hydra configuration file
+ )->None:
+ '''
+ Send the query to the API and download the data
+ '''
+
+ # parse the cfg
+ testing = cfg.development_mode # for testing
+ output_dir = here("data/input") # output directory
+
+ geography = cfg.query.geography
+
+ target = os.path.join(_expand_path(output_dir), "{}_{}_{}.nc".format(geography, cfg.query['year'], cfg.query['month']))
+
+ client = cdsapi.Client()
+
+ query = _validate_query(cfg.query)
+
+ dataset = cfg.dataset
+ # to make sure the query is valid at the end
+ del query['geography']
+
+ # Send the query to the client
+ if not testing:
+ bounds = create_bounding_box(cfg.geographies[geography]['shapefile'])
+ query['area'] = bounds
+ client.retrieve(dataset, query).download(target)
+
+ print("Downloaded file to: {}".format(target))
+ else:
+ print(f"Testing mode. Not downloading data. Query is {query}")
+
+ print("Done")
+```
+
+## Tests and Main
+
+Here we define some tests and the main function that will be used to download the data.
+
+```{python}
+#| eval: false
+from hydra import initialize, compose
+from omegaconf import OmegaConf
+
+# unfortunately, we have to use the initialize function to load the config file
+# this is because the @hydra decorator does not work with Notebooks very well
+# this is a known issue with Hydra: https://gist.github.com/bdsaglam/586704a98336a0cf0a65a6e7c247d248
+#
+# just use the relative path from the notebook to the config dir
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+
+cfg.development_mode = False
+cfg.query['year'] = 2017
+cfg.query['month'] = 11
+#cfg.query['day'] = 1
+#cfg.query['time'] = "00:00"
+cfg.query['geography'] = "nepal"
+download_raw_era5(cfg)
+```
+
+```{python}
+#| exports:
+#
+@hydra.main(config_path="../../conf", config_name="config", version_base=None)
+def main(cfg: DictConfig) -> None:
+ download_raw_era5(cfg=cfg)
+ # better approach would be to have the function only use the specific arguments of the config
+```
+
+```{python}
+#| export:
+#| eval: false
+try: from nbdev.imports import IN_NOTEBOOK
+except: IN_NOTEBOOK=False
+
+if __name__ == "__main__" and not IN_NOTEBOOK:
+ print('Running from __main__ ...')
+
+ main()
+```
+
+```{python}
+#| hide:
+#
+import nbdev; nbdev.nbdev_export()
+```
\ No newline at end of file
diff --git a/notes_qmd/02_aggregate.qmd b/notes_qmd/02_aggregate.qmd
new file mode 100644
index 0000000..29f43fe
--- /dev/null
+++ b/notes_qmd/02_aggregate.qmd
@@ -0,0 +1,621 @@
+---
+title: "Aggregate Module: Spatial Aggregation to Healthsheds"
+execute:
+ freeze: auto
+engine: jupyter
+---
+
+## aggregate
+
+> This module aggregates the downloaded data into the respective output dataframes.
+
+```{python}
+#| default_exp aggregate:
+#
+```
+
+```{python}
+#| hide:
+#
+from nbdev.showdoc import *
+```
+
+We prototyped the code in this module using a Jupyter notebook. The notebook is available in `notes/prototypes/learning_aggregations_w_michelle_20250328.ipynb`. The code in this module is a cleaned-up version of the code in that notebook. The notebook contains additional comments and explanations of the code, which may be helpful for understanding the code in this module.
+
+The basic process is as follows:
+
+1. Load the netCDF data in memory
+2. Statistically aggregate the hourly data to daily data per exposure using resample()
+3. Write out the data to tiff
+4. Read the tiff data back in
+5. Read in the shapefile that defines the healthsheds
+6. Spatially aggregate the exposure data to the healthsheds
+7. Quality check the aggregations
+8. Write out final aggregations to tiff
+
+```{python}
+#| exports:
+#
+import tempfile
+import rasterio
+import hydra
+import argparse
+import os
+
+import pandas as pd
+import geopandas as gpd
+import numpy as np
+import xarray as xr
+import matplotlib.pyplot as plt
+
+from dataclasses import dataclass, field
+from typing import Optional, Tuple
+from pyprojroot import here
+from hydra import initialize, compose
+from omegaconf import OmegaConf, DictConfig
+from tqdm import tqdm
+from math import ceil, floor
+from rasterstats.io import Raster
+from rasterstats.utils import boxify_points, rasterize_geom
+
+try: from era5_sandbox.core import GoogleDriver, _get_callable, describe, ClimateDataFileHandler, kelvin_to_celsius
+except: from core import GoogleDriver, _get_callable, describe, ClimateDataFileHandler, kelvin_to_celsius
+```
+
+```{python}
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+```
+
+We're going to write a function that aggregates the data for a single exposure from a file. This file should be the single month data we got from the previous step in the pipeline.
+
+```{python}
+eg_file = here() / "bld/2009_01_nepal.nc"
+```
+
+```{python}
+#| export:
+#
+def resample_netcdf(
+ fpath: str, # Path to the netCDF file.
+ resample: str = "1D", # Resampling frequency (e.g., '1H', '1D')
+ agg_func: callable = np.mean, # Aggregation function (e.g., np.mean, np.sum).
+ time_dim: str = "valid_time", # Name of the time dimension in the dataset.
+ **xr_open_kwargs # keywords for python's xarray module
+ ) -> xr.Dataset:
+ """
+ Resample a netCDF file to a specified frequency and aggregation method.
+
+ Args:
+ fpath (str): Path to the netCDF file.
+ resample (str): Resampling frequency (e.g., '1H', '1D').
+ agg_func (callable): Aggregation function (e.g., np.mean, np.sum).
+
+ Returns:
+ xarray.Dataset: Resampled dataset.
+ """
+
+ ds = xr.open_dataset(fpath, **xr_open_kwargs)
+
+ if callable(agg_func):
+ # Use xarray's reduce method with the callable
+ return ds.resample({time_dim: resample}).reduce(agg_func)
+ else:
+ raise TypeError("agg_func must be a callable function like np.mean, np.max, etc.")
+```
+
+We pull the aggregation function from the config file:
+
+```{python}
+var = 'swvl1'
+agg_func = _get_callable(cfg['aggregation']['aggregation'][var]['hourly_to_daily'][0]['function'])
+```
+
+```{python}
+with ClimateDataFileHandler(eg_file) as handler:
+
+ ds_path = handler.get_dataset("instant")
+ resampled_data = resample_netcdf(ds_path, agg_func=agg_func)
+```
+
+I'm going to use a dataclass to represent the tiff data. This will allow us to easily pass around the data and metadata associated with the tiff file. Why? I've never used dataclasses and I'm curious about them — ChatGPT thinks this will make the code cleaner and easier to read.
+
+```{python}
+#| exports:
+#
+@dataclass
+class RasterFile:
+ path: str
+ band: int # note that this is 1-indexed
+ data: Optional[np.ndarray] = field(default=None, init=False)
+ transform: Optional[rasterio.Affine] = field(default=None, init=False)
+ crs: Optional[str] = field(default=None, init=False)
+ nodata: Optional[float] = field(default=None, init=False)
+ bounds: Optional[Tuple[float, float, float, float]] = field(default=None, init=False)
+
+ def load(self):
+ """Load raster data and basic metadata."""
+ with rasterio.open(self.path) as src:
+ self.data = src.read(self.band) # each day gets one rasterfile
+ self.transform = src.transform
+ self.crs = src.crs
+ self.nodata = src.nodata
+ self.bounds = src.bounds
+ return self
+
+ def shape(self) -> Optional[Tuple[int, int]]:
+ """Return the shape of the raster data."""
+ return self.data.shape if self.data is not None else None
+
+ def __str__(self):
+ return f"RasterFile(path='{self.path}', shape={self.shape()}, crs='{self.crs}')"
+```
+
+Next, a function to write and read the netCDF to tiff:
+
+```{python}
+#| exports:
+#
+def netcdf_to_tiff(
+ ds: xr.Dataset, # The aggregated xarray dataset to convert.
+ band: int, # The day to rasterise; 1 indexed just like human english
+ variable: str, # The variable name to convert.
+ crs: str = "EPSG:4326", # Coordinate reference system (default is WGS84).
+ ):
+
+ """
+ Convert a netCDF file to a GeoTIFF file.
+ """
+
+ with tempfile.TemporaryDirectory() as tmpdirname:
+
+ # Select the variable and time index
+ variable = ds[variable]
+ ds_ = variable.rio.set_spatial_dims(x_dim="longitude", y_dim="latitude")
+ ds_ = ds_.rio.write_crs(crs)
+ # Save as GeoTIFF
+ ds_.rio.to_raster(f"{tmpdirname}/output.tif")
+ # Load the raster file
+ raster_file = RasterFile(path=f"{tmpdirname}/output.tif", band=band).load()
+
+ return raster_file
+```
+
+Now to test it:
+
+```{python}
+with ClimateDataFileHandler(eg_file) as handler:
+ ds_path = handler.get_dataset("instant")
+ resampled_nc = resample_netcdf(ds_path)
+
+print(resampled_nc)
+resampled_tiff = netcdf_to_tiff(
+ ds=resampled_nc,
+ band=28,
+ variable="swvl1",
+ crs="EPSG:4326"
+)
+```
+
+```{python}
+resampled_tiff.data.shape, resampled_tiff.transform, resampled_tiff.crs, resampled_tiff.bounds
+```
+
+Super cool! The tiff file is created and the data is read back in correctly. Now we can move on to the next step, which is to aggregate the data by healthshed.
+
+## Polygon to Raster Cells
+
+This function was initially shared from a previous NSAPH aggregation pipeline [here](https://github.com/NSAPH-Data-Processing/air_pollution__aqdh/blob/2a8109075fe7a8fbf7c435cc34ffa97b63f5e133/utils/faster_zonal_stats.py#L17). To better understand this, here is a ChatGPT explanation of the code:
+
+> This function, `polygon_to_raster_cells`, is doing a crucial first step in spatial alignment: it determines which raster cells are “touched” by each polygon geometry (e.g., administrative areas, watersheds, etc.).
+Essentially, this function helps figure out which pixels from a raster image fall inside each polygon (like a district, region, or shape). It does this by looking at each polygon one by one, zooming in on just the part of the raster that overlaps with that shape, and marking the pixels that are inside. This is kind of like placing a cookie cutter (the polygon) on a pixelated map (the raster) and seeing which pixels get cut.
+The result is a list where each item tells you the pixel locations that match a specific polygon. You can then use those pixel locations to pull out data from the raster, like temperatures or rainfall, and calculate statistics (like the average) for each shape. This is a key step when you want to summarize raster data within specific regions, like figuring out the average temperature in each county or how much vegetation is in each park.
+
+```{python}
+#| exports:
+#
+def polygon_to_raster_cells(
+ vectors, # list of geometries from a shapefile
+ raster, # the raster data as a numpy array
+ nodata=None, # the nodata value of the raster
+ affine=None, # the affine transform of the raster
+ all_touched=False, # whether to include all touched pixels
+ verbose=False,
+ **kwargs,
+) -> list: # A dictionary mapping vector the ids of geometries to locations (indices) in the raster source.
+ """Returns an index map for each vector geometry to indices in the raster source."""
+
+ cell_map = []
+
+ with Raster(raster, affine, nodata) as rast:
+ # used later to crop raster and find start row and col
+ min_lon, dlon = affine.c, affine.a
+ max_lat, dlat = affine.f, -affine.e
+ H, W = rast.shape
+
+ for geom in tqdm(vectors, disable=(not verbose)):
+ if "Point" in geom.geom_type:
+ geom = boxify_points(geom, rast)
+
+ # find geometry bounds to crop raster
+ # the raster and geometry must be in the same lon/lat coordinate system
+ start_row = max(0, min(H - 1, floor((max_lat - geom.bounds[3]) / dlat)))
+ start_col = min(W - 1, max(0, floor((geom.bounds[0] - min_lon) / dlon)))
+ end_col = max(0, min(W - 1, ceil((geom.bounds[2] - min_lon) / dlon)))
+ end_row = min(H - 1, max(0, ceil((max_lat - geom.bounds[1]) / dlat)))
+ geom_bounds = (
+ min_lon + dlon * start_col, # left
+ max_lat - dlat * end_row - 1e-12, # bottom
+ min_lon + dlon * end_col + 1e-12, # right
+ max_lat - dlat * start_row, # top
+ )
+
+ # crop raster to area of interest and rasterize
+ fsrc = rast.read(bounds=geom_bounds)
+ rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched)
+ indices = np.nonzero(rv_array)
+
+ if len(indices[0]) > 0:
+ indices = (indices[0] + start_row, indices[1] + start_col)
+ assert 0 <= indices[0].min() < rast.shape[0]
+ assert 0 <= indices[1].min() < rast.shape[1]
+ else:
+ pass # stop here for debug
+
+ cell_map.append(indices)
+
+ return cell_map
+```
+
+To use this, we must define the polygon and raster data. The polygon data is the healthshed shapefile, and the raster data is the tiff file we created earlier. We can use the `GoogleDriver` class we defined in `core` to read in the shapefile.
+
+```{python}
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+
+driver = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)
+drive = driver.get_drive()
+healthsheds = driver.read_healthsheds("Nepal_Healthsheds2024.zip")
+```
+
+```{python}
+res_poly2cell=polygon_to_raster_cells(
+ vectors = healthsheds.geometry.values, # the geometries of the shapefile of the regions
+ raster=resampled_tiff.data, # the raster data above
+ nodata=resampled_tiff.nodata, # any intersections with no data, may have to be np.nan
+ affine=resampled_tiff.transform, # some math thing need to revise
+ all_touched=True,
+ verbose=True
+)
+```
+
+The data below maps which grid entries fall into each of the regions in the shapefile (e.g. which pixel is in which state)
+
+```{python}
+res_poly2cell[:5]
+```
+
+Last but not least, we aggregate these data to the healthshed level. We can use the `rasterstats` package to do this.
+
+```{python}
+#| exports:
+#
+def aggregate_to_healthsheds(
+ res_poly2cell: list, # the result of polygon_to_raster_cells
+ raster: RasterFile, # the raster data
+ shapes: gpd.GeoDataFrame, # the shapes of the health sheds
+ names_column: str = "fs_uid", # the unique identifier column name of the health sheds
+ aggregation_func: callable = np.nanmean, # the aggregation function
+ aggregation_name: str = "mean" # the name of the aggregation function
+ ) -> gpd.GeoDataFrame:
+ """
+ Aggregate the raster data to the health sheds.
+ """
+
+ stats = []
+
+ for indices in res_poly2cell:
+ if len(indices[0]) == 0:
+ # no cells found for this polygon
+ stats.append(np.nan)
+ else:
+ cells = raster.data[indices]
+ if sum(~np.isnan(cells)) == 0:
+ # no valid cells found for this polygon
+ stats.append(np.nan)
+ continue
+ else:
+ # compute MEAN of valid cells
+ # but this stat can be ANYTHING
+ stats.append(aggregation_func(cells))
+
+ # clean up the result into a dataframe
+ stats = pd.Series(stats)
+ shapes[aggregation_name] = stats
+ df = pd.DataFrame(
+ {"healthshed": shapes[names_column], aggregation_name: stats}
+ )
+ gdf = gpd.GeoDataFrame(df, geometry=shapes.geometry.values, crs=shapes.crs)
+ return gdf
+```
+
+And now we apply it:
+
+```{python}
+result = aggregate_to_healthsheds(
+ res_poly2cell=res_poly2cell,
+ raster=resampled_tiff,
+ shapes=healthsheds,
+ names_column="fid",
+ aggregation_func=np.nanmean,
+ aggregation_name="mean_soil_moisture"
+)
+result.head()
+```
+
+And plot for QA:
+
+```{python}
+result.plot(column="mean_soil_moisture", legend=True)
+plt.title("Mean Soil Moisture (m^3 m^-3) by Health Shed Nov 2017 day 1")
+plt.show()
+```
+
+That looks great! The data is aggregated to the healthshed level, and we can see the differences in exposure across the healthsheds. We can also see that the data is not uniform across the healthsheds, which is what we expect.
+
+## Tests and Main
+
+Now we can wrap this up in a main function that will simply take in the input file and generate this output. We can also add some tests to make sure the data is aggregated correctly; tests will run automatically in this notebook.
+
+```{python}
+import random
+```
+
+
+```{python}
+#| eval: false
+
+
+# variables = ["t2m", "d2m"]
+# years = ["20{:02d}".format(m) for m in range(9, 24)]
+# months = [str(m) for m in range(1, 13)]
+# aggregations = [
+# ("Mean", np.nanmean),
+# ("Max", np.nanmax),
+# ("Min", np.nanmin)
+# ]
+
+# exposure_variable = random.choice(variables)
+# year = random.choice(years)
+# month = random.choice(months)
+# aggregation_str, agg_func = random.choice(aggregations)
+# input_file = here() / "data/input/{}_{}.nc".format(year, month)
+
+# with initialize(version_base=None, config_path="../conf"):
+# cfg = compose(config_name='config.yaml')
+
+# driver = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)
+# drive = driver.get_drive()
+# healthsheds = driver.read_healthsheds(cfg.GOOGLE_DRIVE_AUTH_JSON.healthsheds_id)
+
+# with ClimateDataFileHandler(input_file) as handler:
+# ds_path = handler.get_dataset("instant")
+# resampled_nc_file = resample_netcdf(ds_path, agg_func=agg_func)
+
+# days = len(resampled_nc_file.valid_time.values)
+# day = random.choice(range(1, days + 1))
+
+# resampled_tiff = netcdf_to_tiff(
+# ds=resampled_nc_file,
+# band=day, # the day we're aggregating
+# variable=exposure_variable,
+# crs="EPSG:4326"
+# )
+
+# res_poly2cell=polygon_to_raster_cells(
+# vectors = healthsheds.geometry.values, # the geometries of the shapefile of the regions
+# raster=resampled_tiff.data, # the raster data above
+# nodata=resampled_tiff.nodata, # any intersections with no data, may have to be np.nan
+# affine=resampled_tiff.transform, # some math thing need to revise
+# all_touched=True,
+# verbose=True
+# )
+
+# result = aggregate_to_healthsheds(
+# res_poly2cell=res_poly2cell,
+# raster=resampled_tiff,
+# shapes=healthsheds,
+# names_column="fs_uid",
+# aggregation_func=agg_func,
+# aggregation_name=exposure_variable
+# )
+
+# result.plot(column=exposure_variable, legend=True)
+# plt.title("{} {} (K) by Health Shed {}".format(aggregation_str, exposure_variable, input_file.stem))
+# plt.suptitle("Aggregation: {}, Day: {}".format(aggregation_str, str(day)))
+# plt.show()
+```
+
+::: {.callout-note}
+**Note:** The above code is commented out to prevent execution during documentation generation. You can uncomment and run it in an appropriate environment to test the aggregation process.
+:::
+
+3.2 seconds per aggregation is pretty cool!
+
+```{python}
+#| eval: false
+result.to_parquet(here() / "data/testing/test_aggregation.parquet")
+```
+
+```{python}
+#| exports:
+#
+def aggregate_data(
+ cfg: DictConfig, # the hydra config
+ input_file: str, # the input netcdf file
+ output_file: str, # the output parquet file
+ exposure_variable: str # Which variable in the dataset to aggregate
+ ) -> None:
+ '''
+ Aggregate raster data day-by-day and store all days and statistics as separate columns in a single Parquet file.
+ '''
+
+ if cfg.development_mode:
+ describe(cfg)
+ return None
+
+ geography = cfg['query'].geography
+ year = cfg['query']['year']
+ month = cfg['query']['month']
+ daily_aggs = cfg['aggregation']['aggregation'][exposure_variable]['hourly_to_daily']
+ healthshed_aggs = cfg['aggregation']['aggregation'][exposure_variable]['daily_to_healthshed']
+
+ # Load healthsheds
+ driver = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)
+ drive = driver.get_drive()
+ healthsheds = driver.read_healthsheds(cfg.geographies[geography].healthsheds)
+
+ # Initialize output DataFrame
+ result_df = healthsheds[[cfg.geographies[geography].unique_id, "geometry"]].copy()
+
+ for daily_agg in daily_aggs:
+ print(f"Processing daily aggregation: {daily_agg['name']}...")
+
+ daily_agg_func = _get_callable(daily_agg['function'])
+
+ with ClimateDataFileHandler(input_file) as handler:
+ if exposure_variable in ["t2m", "d2m", "swvl1"]:
+ ds_path = handler.get_dataset("instant")
+ else:
+ ds_path = handler.get_dataset("accum")
+ resampled_nc_file = resample_netcdf(ds_path, agg_func=daily_agg_func)
+
+ for healthshed_agg in healthshed_aggs:
+ print(f"Aggregating to healthshed by: {healthshed_agg['name']}...")
+
+ # Get the number of days in the dataset
+ days = len(resampled_nc_file.valid_time.values)
+
+ # Get the aggregation function for healthshed
+ healthshed_agg_func = _get_callable(healthshed_agg['function'])
+ days = len(resampled_nc_file.valid_time.values)
+
+ for day in range(1, days + 1):
+ print(f"Processing day {day}...")
+
+ day_col = f"day_{day:02d}_daily_{daily_agg['name']}"
+ resampled_tiff = netcdf_to_tiff(
+ ds=resampled_nc_file,
+ band=day,
+ variable=exposure_variable,
+ crs="EPSG:4326"
+ )
+
+ result_poly2cell = polygon_to_raster_cells(
+ vectors=healthsheds.geometry.values,
+ raster=resampled_tiff.data,
+ nodata=resampled_tiff.nodata,
+ affine=resampled_tiff.transform,
+ all_touched=True,
+ verbose=True
+ )
+
+ res = aggregate_to_healthsheds(
+ res_poly2cell=result_poly2cell,
+ raster=resampled_tiff,
+ shapes=healthsheds,
+ names_column=cfg.geographies[geography].unique_id,
+ aggregation_func=healthshed_agg_func,
+ aggregation_name=exposure_variable
+ )
+
+ result_df[day_col] = res[exposure_variable]
+
+ print(f"Saving final monthly parquet file: {output_file}")
+ result_df.to_parquet(output_file, compression="snappy")
+ # return(result_df)
+```
+
+```{python}
+#| eval: false
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+
+cfg.development_mode = False
+cfg.query['year'] = 2017
+cfg.query['month'] = 11
+cfg.query['geography'] = "nepal"
+
+variable = "swvl1"
+
+aggregate_data(cfg, here() / "bld/2017_11_nepal.nc", here() / "data/testing/test_nepal_aggregation.parquet", exposure_variable=variable)
+```
+
+```{python}
+#| eval: false
+parquet_file = gpd.read_parquet(here() / "data/testing/test_nepal_aggregation.parquet")
+```
+
+```{python}
+#| eval: false
+parquet_file
+```
+
+```{python}
+#| eval: false
+parquet_file.plot(column="day_22_daily_mean", legend=True)
+```
+
+```{python}
+#| exports:
+#
+@hydra.main(version_base=None, config_path="../../conf", config_name="config")
+def main(cfg: DictConfig) -> None:
+ # Parse command-line arguments
+ input_file = str(snakemake.input[0]) # First input file
+ output_file = str(snakemake.output[0])
+ geography = str(snakemake.params.geography)
+ aggregation_variable = str(snakemake.params.variable)
+
+ variables_dict = {
+ "2m_temperature": "t2m",
+ "2m_dewpoint_temperature": "d2m",
+ "volumetric_soil_water_layer_1": "swvl1",
+ "total_precipitation": "tp"
+ }
+
+ cfg['query']['geography'] = geography
+
+ aggregate_data(cfg, input_file=input_file, output_file=output_file, exposure_variable=variables_dict[aggregation_variable])
+```
+
+```{python}
+#| export:
+#| eval: false
+try: from nbdev.imports import IN_NOTEBOOK
+except: IN_NOTEBOOK=False
+
+if __name__ == "__main__" and not IN_NOTEBOOK:
+ main()
+```
+
+```{python}
+#| hide:
+#
+import nbdev; nbdev.nbdev_export()
+```
\ No newline at end of file
diff --git a/notes_qmd/03_publish.qmd b/notes_qmd/03_publish.qmd
new file mode 100644
index 0000000..5fb502c
--- /dev/null
+++ b/notes_qmd/03_publish.qmd
@@ -0,0 +1,456 @@
+---
+title: "Publish: Gather the Aggregated Data and Publish to DataVerse"
+engine: jupyter
+---
+
+## publish
+
+> This is the `publish` module for the ERA5 dataset pipeline. It defines a functions that make use of the `pyDataverse` library and API to publish our outputs to the Harvard Dataverse.
+
+```{python}
+#| default_exp publish:
+#
+```
+
+```{python}
+#| hide:
+#
+from nbdev.showdoc import *
+```
+
+First, we'll test out the API by pinging the Harvard DataVerse
+
+```{python}
+#| exports:
+#
+import hydra
+import yaml
+import json
+from tqdm import tqdm
+from pyprojroot import here
+```
+
+```{python}
+api_token_file = here() / "sandbox/dataverse_api_key.yml"
+with open(api_token_file, "r") as f:
+ config = yaml.load(f, Loader=yaml.BaseLoader)
+```
+
+Now, following the [docs]() for the dataverse tutorial, load a NativeAPI up:
+
+```{python}
+#| exports:
+#
+from pyDataverse.api import NativeApi
+```
+
+The NativeAPI is a catchall API object to be able to do general stuff:
+
+```{python}
+api = NativeApi(config['base_url'], config['api_token'])
+resp=api.get_info_version()
+#resp.text()
+```
+
+```{python}
+resp.json()
+```
+
+Looks good! Now that we know that it works, we can think more
+about how to publish data there.
+
+## Harvard Dataverse
+
+Let's create a dummy dataset with the components we're
+planning to upload, and then upload and promptly delete it.
+
+To do that, we must import the `models` module and create a Dataset object:
+
+```{python}
+from pyDataverse.models import Dataset
+```
+
+```{python}
+ds = Dataset()
+```
+
+This `ds` object is pretty straightforward since it doesn't contain anything yet:
+
+```{python}
+ds.get()
+```
+
+We can populate the object from the dummy data on the github repo:
+
+```{python}
+from pyDataverse.utils import read_file
+from urllib.request import urlretrieve
+import tempfile
+```
+
+```{python}
+# url for dummy data
+url = "https://raw.githubusercontent.com/gdcc/pyDataverse/refs/heads/main/tests/data/user-guide/dataset.json"
+
+
+with tempfile.NamedTemporaryFile(mode='w+') as tmp:
+ urlretrieve(url, tmp.name)
+ ds.from_json(read_file(tmp.name))
+```
+
+We have to validate the JSON correctly:
+
+```{python}
+ds.validate_json()
+```
+
+Modifying it is easy:
+
+```{python}
+ds.set({"title": "Youth from Austria 2005"})
+ds.get()
+```
+
+Now, to create the dataset we use the API:
+
+```{python}
+#| eval: false
+# this is only run in interactive sessions for demo purposes
+resp = api.create_dataset(":root", ds.json())
+```
+
+If you caught the `resp` object, it contains the PID for the newly created dataset.
+
+However, if you didn't you can use the SearchAPI to find it:
+
+```{python}
+#| exports:
+#
+from pyDataverse.api import SearchApi
+```
+
+```{python}
+search_api = SearchApi(config['base_url'], config['api_token'])
+```
+
+```{python}
+#| eval: false
+#
+
+resp = search_api.search("Youth from Austria", data_type="dataset")
+results = resp.json()['data']['items']
+result = [x for x in results if "Youth from Austria" in x['name']][0]
+result
+```
+
+```{python}
+#| eval: false
+pid = result['global_id']
+```
+
+Now to look at the data we created using the NativeAPI again, and delete the dataset:
+
+```{python}
+#| eval: false
+
+uploaded_ds = api.get_dataset(pid)
+uploaded_ds.json()['data']
+
+resp = api.delete_dataset(pid)
+resp.json()
+```
+
+With that understanding, we can develop a quick module to do the following:
+
+1. Make the dataset LEGO Compatible
+2. Upload and publish the data to dataverse
+
+## LEGO Compatibility
+
+Let's take an example file to use as a model for LEGO compatibility
+
+```{python}
+#| exports:
+#
+import geopandas as gpd
+import pandas as pd
+import re
+import glob
+```
+
+```{python}
+ex = gpd.read_parquet(here() / "bld/2009_06_madagascar_day_swvl1_mean.parquet")
+ex.describe()
+```
+
+We know that the LEGO data model should look like this:
+
+```
+/lego
+├──
+│ ├── __
+│ │ ├── __
+│ │ │ ├── _yyyy.parquet
+```
+
+So, for the above file, we'll end up with the LEGO path `data/environmental/exposures_era5/healthshed_monthly/dewpoint_2024.parquet`. In it, we should have the following columns:
+
+
+```
+healthshed_id year month day stat_1 stat_2 ... stat_n
+```
+
+
+This means we should read in all of the exposures for a single timepoint at once.
+I think the smart thing to do is use a glob string to gather all of the pertinent files.
+This will be the first function we export to the library:
+
+```{python}
+#| exports:
+#
+
+def gather_exposure_geodataframes(
+ glob_string: str, # string for the path to search for the pertinent files
+ polygon_id: str, # the string signifying the healthshed ID of the polygon
+ exposure: str # the exposure name
+ )-> list:
+ "Read in a list of geo dataframes from the same time frame and merge them"
+
+ # first get the initial one so we have the polygon ID and geometry
+ frames = glob.glob(str(glob_string))
+ initial_gdf=gpd.read_parquet(frames[0])
+ merged_df = []
+
+ for f in tqdm(frames, desc="Processing files"):
+ # read in as a regular dataframe by ignoring geometry
+ df = gpd.read_parquet(f).drop(["geometry"], axis=1)
+
+ # get the year and month
+ # Extract year and month
+ search_str = rf'_{exposure}_(\d{{4}})_(\d{{1,2}})\.parquet$'
+ match = re.search(search_str, f)
+
+ if match:
+ year = int(match.group(1))
+ month = int(match.group(2))
+ #print(f"Year: {year}, Month: {month}")
+ else:
+ raise ValueError(f"Could not extract year and month from filename: {search_str} {f}")
+
+ df['exposure'] = exposure
+ df['month'] = month
+ df['year'] = year
+
+ # Step 1: Melt all day columns (leave 'month' and 'year' as identifiers)
+ df_long = df.melt(id_vars=[polygon_id, "exposure", "year", "month"], var_name="day_stat", value_name="value")
+
+ # Step 2: Extract day and stat type from column names
+ # Example column: "day_01_daily_mean"
+ df_long[["day", "stat"]] = df_long["day_stat"].str.extract(r"day_(\d{2})_daily_(mean|max|min|total)")
+
+ # Optional: convert 'day' and month to integer
+ df_long["day"] = df_long["day"].astype(int)
+ df_long["month"] = df_long["month"].astype(int)
+
+ # Drop the original combined column
+ df_long = df_long.drop(columns="day_stat")
+
+ # Reorder columns
+ df_long = df_long[[polygon_id, "exposure", "year", "month", "day", "stat", "value"]]
+
+ df_long = df_long.sort_values(by=["year", "month", "day"])
+ df_clean = df_long.pivot(index=[polygon_id, "exposure", "year", "month", "day"], columns="stat", values="value").reset_index()
+ merged_df.append(df_clean)
+
+ return [pd.concat(merged_df).reset_index(drop=True), initial_gdf[[polygon_id, "geometry"]]]
+```
+
+```{python}
+frames = here() / "data" / "testing" / "*madagascar*"
+
+merged = gather_exposure_geodataframes(frames, "fs_uid", "2m_dewpoint_temperature")
+merged[0].describe()
+```
+
+This returns one file with all of the geometries and one file
+with the statistics and exposures.
+
+Now, with this, we can move on. The dataset was created in the UI and is available via search and test out how to upload it:
+
+```{python}
+resp = search_api.search("ERA5", data_type="dataset")
+
+results = resp.json()['data']['items']
+
+result = [x for x in results if "ERA5" in x['name']][0]
+era5_pid = result['global_id']
+result
+```
+
+```{python}
+#| exports:
+#
+
+from pyDataverse.models import Datafile
+import os
+import pathlib
+```
+
+We'll upload directly from file. In the case of ERA5 vs. LEGO, we
+store the file on disk as LEGO hierarchy, but to upload it to dataverse
+using a flat filename (since creating subdatasets to represent directories might be
+a bit of a hassle)
+
+```{python}
+# assuming the file has a path on disk like:
+f_out = "environmental/exposures_era5/healthshed_daily/dewpoint_2024.parquet"
+os.makedirs(here() / "data" / "testing" / os.path.dirname(f_out), exist_ok=True)
+aggregations, geo = merged
+aggregations.to_parquet(here() / "data" / "testing" / f_out, index=False)
+
+datafile = Datafile()
+datafile.set({
+ # the id of the era5 dataset
+ "pid": era5_pid,
+ # the path to the file on disk goes here
+ "filename": str(here() / "data" / "testing" / f_out),
+ # use the "label" to name the file
+ "label": f_out.replace("/", "-")
+})
+```
+
+```{python}
+#| eval: false
+resp = api.upload_datafile(era5_pid, str(here() / "data" / "testing" / f_out), datafile.json())
+```
+
+Pretty simple!
+
+Now, we just need a main function to upload this data. The final upload is one file per
+exposure per year, so these should be the variables we gather data for.
+
+We should get some functionality to gather the groups of these files automatically, based on
+the hydra config:
+
+```{python}
+#| exports:
+#
+from hydra import initialize, compose
+from omegaconf import OmegaConf, DictConfig
+from tqdm import tqdm
+```
+
+```{python}
+target_dir = here() / "data" / "intermediate"
+
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+
+cfg.development_mode = False
+#cfg.query['year'] = 2017
+#cfg.query['month'] = 11
+#cfg.query['geography'] = "nepal"
+```
+
+```{python}
+#| exports:
+#
+
+@hydra.main(version_base=None, config_path="../../conf", config_name="config")
+def main(cfg: DictConfig) -> None:
+
+ variables_dict = {
+ "2m_temperature": "t2m",
+ "2m_dewpoint_temperature": "d2m",
+ "volumetric_soil_water_layer_1": "swvl1",
+ "total_precipitation": "tp"
+ }
+
+ print(OmegaConf.to_yaml(cfg))
+
+ #prep dataverse
+ api_token_file = here() / "sandbox/dataverse_api_key.yml"
+ with open(api_token_file, "r") as f:
+ apiconfig = yaml.load(f, Loader=yaml.BaseLoader)
+ api = NativeApi(apiconfig['base_url'], apiconfig['api_token'])
+ search_api = SearchApi(apiconfig['base_url'], apiconfig['api_token'])
+ resp = search_api.search("ERA5", data_type="dataset")
+
+ results = resp.json()['data']['items']
+
+ result = [x for x in results if "ERA5" in x['name']][0]
+ era5_pid = result['global_id']
+
+ for geography in cfg.geographies:
+ for year in cfg.query['year']:
+ for variable, v in variables_dict.items():
+
+ print(f"Processing {geography} for {variable} in {year}")
+ glob_string = here() / "data" / "intermediate" / f"*{geography}*{variable}*{year}*"
+ print(f"Glob: {glob_string}")
+ polygon_id = cfg.geographies[geography]['unique_id']
+ print(f"polygon_id: {polygon_id}")
+ merged = gather_exposure_geodataframes(glob_string, polygon_id, variable)
+ print(merged[0].head())
+ print(merged[1].head())
+
+ output_dir = here() / "data" / "output"
+
+ f_out = f"environmental/exposures_era5/healthshed_daily/{geography}_{v}_{year}.parquet"
+ os.makedirs(output_dir / os.path.dirname(f_out), exist_ok=True)
+ output_path = output_dir / f_out
+
+ print(f"Writing to {output_path}")
+ merged[0].to_parquet(output_path, index=False)
+
+
+ print(f"Uploading {f_out.replace('/', '-')} to Dataverse...")
+ # upload to dataverse
+ datafile = Datafile()
+ datafile.set({
+ "pid": era5_pid,
+ "filename": str(output_path),
+ "label": f_out.replace("/", "-")
+ })
+
+ resp = api.upload_datafile(era5_pid, output_path, datafile.json())
+ assert resp.json()['status'] == "OK", f"Failed to upload datafile: {resp.text}"
+
+ # also save the geometry for the region
+ merged[1].to_parquet(output_path.parent / f"{geography}_geometry.parquet", index=False)
+
+ # and upload it to dataverse
+ datafile = Datafile()
+ datafile.set({
+ "pid": era5_pid,
+ "filename": str(output_path.parent / f"{geography}_geometry.parquet"),
+ "label": f"{geography}_geometry.parquet"
+ })
+
+ resp = api.upload_datafile(era5_pid, output_path.parent / f"{geography}_geometry.parquet", datafile.json())
+ assert resp.json()['status'] == "OK", f"Failed to upload geometry datafile: {resp.text}"
+
+ print("All files processed and uploaded successfully.")
+
+```
+
+```{python}
+#| export:
+#| eval: false
+try: from nbdev.imports import IN_NOTEBOOK
+except: IN_NOTEBOOK=False
+
+if __name__ == "__main__" and not IN_NOTEBOOK:
+ main()
+```
+
+```{python}
+#| hide:
+#
+import nbdev; nbdev.nbdev_export()
+```
\ No newline at end of file
diff --git a/notes_qmd/10_pytask_demo.qmd b/notes_qmd/10_pytask_demo.qmd
new file mode 100644
index 0000000..1757558
--- /dev/null
+++ b/notes_qmd/10_pytask_demo.qmd
@@ -0,0 +1,279 @@
+---
+title: "Demo: How to Create Pipelines with `pytask`"
+engine: jupyter
+---
+
+## Data Preparation Demo
+
+> Data preparation task for `pytask` demo
+
+In this notebook, we are demonstrating how to convert our snakemake workflow into a `pytask` workflow. We use the basic tutorial to demonstrate this, but continue
+to use nbdev for development of functions in notebooks.
+
+`pytask` is a task management system that allows you to define tasks and their dependencies, similar to `Snakemake`. It is particularly useful for data science workflows.
+
+There are a number of reasons to use `pytask` over `snakemake`:
+- **Pythonic**: `pytask` is designed to be purely Pythonic by default, allowing you to write tasks and entire pipelines as Python functions.
+- **Flexibility**: `pytask` allows you to define tasks and their dependencies in a more flexible way, using Python functions and decorators, as opposed to orchestrating numerous scripts.
+- **Integration**: `pytask` integrates well with other Python libraries, such as `nbdev` here, or `hydra` configurations if you need, allowing you to use your existing code, notebooks, or configs in your workflows.
+- **Parallelism**: `pytask` supports parallel execution of tasks with `pytask-parallel`, which can speed up your workflows significantly, especially for data processing tasks.
+
+We'll use nbdev to define the task functions, and then export them to the `src` directory. `pytask` is then invoked at the command line to run the tasks.
+
+```{python}
+#| default_exp task_data_preparation:
+#
+```
+
+This demo task is taken from the tutorial at [pytask documentation](https://pytask-dev.readthedocs.io/en/stable/tutorials/write_a_task.html). At minimum, you need your package to contain the following in a config.py file:
+
+```md
+my_project
+│
+├───.pytask
+│
+├───bld
+│ └────...
+│
+├───src
+│ └───my_project
+│ ├────__init__.py
+│ ├────config.py
+│ └────...
+│
+└───pyproject.toml
+```
+
+```python
+#contents of `era5_sandbox.config` module
+from pathlib import Path
+
+
+SRC = Path(__file__).parent.resolve()
+BLD = SRC.joinpath("..", "..", "bld").resolve()
+```
+
+Additionally, your pyproject.toml file should contain the following at minimum:
+
+```toml
+[tool.pytask.ini_options]
+paths = ["src/era5_sandbox"]
+```
+
+The former tells Python where to find the source code and build directory for `pytask` objects and shims, while the latter tells `pytask` where to find the task definitions and dependency DAG.
+
+```{python}
+#| exports:
+#
+import os
+from pathlib import Path
+from typing import Annotated
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+from era5_sandbox.config import BLD
+from era5_sandbox.config import data_catalog, demo_catalog
+
+from pytask import PickleNode
+from pytask import Product
+```
+
+
+### Defining Tasks
+
+To define a task, simply use the `task_` prefix in the function name (or, if you are familiar and comfortable with decorators, use `@pytask.mark.task`). Be verbose and expressive in your use of type hints to specify the input and output data, so that `pytask` can automatically detect and handle the dependencies between tasks.
+
+### Defining Tracked Outputs
+
+To define something as a tracked output, you can annotate the input of the task with `Annotated[Path, Product]`, where `Product` is imported from `pytask`. This tells `pytask` that this is a product of the task and should be saved in the build directory.
+
+In this example, we're generating random data into a data frame and saving the object as a pickle in the `bld` directory (`bld` is the default build directory for `pytask`'s intermediate data). To get that directory, we use the `BLD` variable from the `era5_sandbox.config` module as above. This module itself could also be generated using `nbdev` if you want to keep your configuration in notebooks.
+
+Using `nbdev`, we can also include all of the bells and whistles of function documentation.
+
+```{python}
+#| exports:
+#
+
+def task_create_random_data(
+ seed: Annotated[int, 42], # Default seed for reproducibility
+ path_to_data: Annotated[Path, Product] = BLD / "data.pkl" # Path to the object in the build directory
+ ) -> None:
+ "Create a random data set and save it as a pickle file. Return the path to the saved file."
+ rng = np.random.default_rng(seed)
+ beta = 2
+
+ x = rng.normal(loc=5, scale=10, size=1_000)
+ epsilon = rng.standard_normal(1_000)
+
+ y = beta * x + epsilon
+
+ df = pd.DataFrame({"x": x, "y": y})
+
+ # this is a tracked output, so we annotate the return value with `Annotated[Path, Product]`
+ df.to_pickle(path_to_data)
+```
+
+We can test the function directly in the notebook:
+
+```{python}
+task_create_random_data(42)
+```
+
+Once this module and function are exported with `nbdev_export`, the functions are in a python package. We can then use the command line to look at the registered tasks:
+
+```{python}
+#| eval: false
+
+%%sh
+pytask collect
+```
+
+Let's add another task in the same module. This task plots the data we generated. To link the previous task to this one as a dependency, we can list the output of the previous task as an input to this one. This way, `pytask` will know that it needs to run the first task before this one.
+
+```{python}
+#| exports:
+#
+
+def task_plot_data(
+ path_to_data: Annotated[Path, BLD / "data.pkl"], # Path to the data file created by the previous task
+ path_to_plot: Annotated[Path, Product] = BLD / "plot.png" # Path to the build directory for the plot
+) -> None:
+ """
+ Plot the data from the pickle file and save the plot. Note that this task:
+ 1. depends on the data.pkl file created by the previous task,
+ 2. does not return any value, but saves a plot to the build directory. So the side effect of the task is what we are interested in here (though this is probably bad practice).
+ """
+
+ df = pd.read_pickle(path_to_data)
+
+ _, ax = plt.subplots()
+ df.plot(x="x", y="y", ax=ax, kind="scatter")
+
+ plt.savefig(path_to_plot)
+ plt.close()
+```
+
+We now have a DAG of tasks that `pytask` can execute. To see the tasks, we can use the command line to create a pygraphviz graph of the tasks:
+
+```bash
+pytask dag
+```
+
+The DAG is saved as a pdf file, and you can view it using any viewer. Now, to run the pipeline, just invoke `pytask` at the command line:
+
+```bash
+pytask
+```
+
+In Jupyter or iPython, you can interact with the task outputs directly:
+
+```{python}
+#| eval: false
+
+# list all the files in the build directory
+for file in os.listdir(BLD):
+ print(file)
+```
+
+We can use these to build subsequent tasks later.
+
+## More Complex Tasks & The Data Catalog
+
+As we define more complex tasks, we can use the `pytask` data catalog to manage the inputs and outputs of our tasks. The data catalog allows us to imperatively name the data and their formats, making it easier to manage the data flow in our tasks. Importantly, we can define the data pythonically, which allows us to use the full power of Python to manipulate and transform our data. This is particularly more useful than snakemake's approach, which requires you to define the data in a more static way using paths and a separate pseudo-language.
+
+The content of the `era5_sandbox.config` module can be extended to include a data catalog:
+
+```python
+from pathlib import Path
+from pytask import DataCatalog, Product
+
+SRC = Path(__file__).parent.resolve()
+BLD = SRC.joinpath("..", "..", "bld").resolve()
+
+demo_catalog = DataCatalog()
+```
+
+With just this definition, we're now able to refer directly to data by name in our tasks, and `pytask` will handle the paths and formats for us. This allows us to focus on the logic of our tasks rather than the details of data management.
+
+:::{.callout-note}
+This is a major advantage of `pytask` over `snakemake`, as it allows you to define the data in a more flexible and Pythonic way, while still maintaining the benefits of a task management system. It is a similar approach to building pipelines in R with targets, which allows you to define the data in a more flexible way.
+:::
+
+Let's create a task that modifies the data frame by adding a new column. This task will depend on the previous task's output, and we will use the data catalog to define the input and output data.
+
+```{python}
+#| exports:
+#
+
+def task_add_one(
+ path_to_data: Annotated[Path, BLD / "data.pkl"], # Path to the data file created by the previous task
+ node: Annotated[PickleNode, Product] = demo_catalog["mydata"]
+) -> None:
+ """
+ Add one to the 'y' column of the data frame and save it as a new pickle file.
+ """
+ df = pd.read_pickle(path_to_data)
+ df['z'] = df['y'] + 1
+
+ node.save(df)
+```
+
+In this function, we've defined that the task relies on the output of the first task being there, the `data.pkl` file. But importantly, we've also defined our product as a `node` from the `PickleNode` module. This will allow `pytask` to handle the serialization and deserialization of the data frame automatically, so we don't have to worry about the details of how the data is stored. We create the datacatalog in our config file, and then tell this task to create a Node in that catalog called `mydata`. Whatever we save with the `node.save()` method will be saved in the build directory, but more importantly _will be indexed and hashed by `pytask`_. This means that if the data changes, `pytask` will know to rerun the task.
+
+To make this even more pythonic, we can modify the format of our task function so that the return type annotator is used as a node in the data catalog. This allows us to define the output of the task as a `PickleNode`, which will automatically handle the serialization and deserialization of the data frame.
+
+:::{.callout-note}
+This is another trick I'm deriving from {targets}. By formatting tasks as pure functions where inputs are parameters and targets are return type annotations, we can define the output of the task as a `PickleNode`, which will automatically handle the serialization and deserialization of the data frame. This again allows us to focus on the logic of our tasks rather than the details of data management.
+:::
+
+So below, we're directly accessing the `data_catalog` to get the `mydata` node, and then modifying it by adding a new column. It _feels_ like we are doing this in place, such as in an iPython session, because we are allowing `pytask` to handle the serialization of the file on disk for us.
+
+```{python}
+#| exports:
+#
+
+def task_add_another_column(
+ df: Annotated[pd.DataFrame, demo_catalog["mydata"]] # which object in the catalog to fetch from the catalog with node.load()
+) -> Annotated[pd.DataFrame, demo_catalog["mydata2"]]: # which object in the catalog to save the return value to
+ """
+ Add another column to the data frame stored in the PickleNode.
+ """
+
+ # use the datacatalog directly to access the node
+ # this is a bit like accessing the node in an iPython session, but pytask
+ # will handle the serialization and deserialization for us
+ df['w'] = df['z'] * df['y']
+
+ return df
+```
+
+To test this interactively, we'd have to import the data catalog's object
+
+```{python}
+df = demo_catalog["mydata"].load() # load the data frame from the PickleNode
+result = task_add_another_column(df) # call the task function with the loaded data frame
+```
+
+```{python}
+result
+```
+
+Now that we know it will work, we can invoke pytask:
+
+```{python}
+#| eval: false
+%%sh
+pytask
+```
+
+Notice that the outputs are cached and not recomputed unless the inputs change. This is a key feature of `pytask` and other DAGs, allowing you to efficiently manage your data processing tasks without unnecessary recomputation.
+
+## Conclusion
+
+The takeaway here is that with `pytask`, you can define pure functions that take inputs and return outputs, and build a DAG of tasks that can be executed in a flexible and efficient way. This allows you to focus on the logic of your tasks rather than the details of data management, while still maintaining the benefits of a task management system. The key elements are:
+
+- **Task annotation**: You define your tasks by creating pure functions that take inputs and return outputs, and use decorators or naming conventions to mark them as "tasks" in a dag
+- **Input and output annotation**: You define the inputs and outputs of your tasksusing type hints, and allow `pytask` to automatically detect and handle the dependencies between tasks.
+- **Data catalog**: You define your data in a Pythonic object in your config called `data_catalog`. As you iteratively develop your DAG, you add objects to the data catalog, which are called nodes. As long as a node is a pythonic object and has a pickle method, `pytask` will handle the serialization and deserialization of the data for you.
\ No newline at end of file
diff --git a/notes_qmd/20_pytask_config.qmd b/notes_qmd/20_pytask_config.qmd
new file mode 100644
index 0000000..0aff0fb
--- /dev/null
+++ b/notes_qmd/20_pytask_config.qmd
@@ -0,0 +1,332 @@
+---
+title: "`pytask` Config: Defining the Pipeline Internals in `pytask`"
+engine: jupyter
+---
+
+## config
+
+> This is the config module for the `pytask` pipeline.
+This module defines the data catalog(s) and any hard-coded parameters that are used throughout the pipeline.
+
+```{python}
+#| default_exp config:
+#
+```
+
+```{python}
+#| hide:
+#
+from nbdev.showdoc import *
+```
+
+```{python}
+#| exports:
+#
+
+import pandas as pd
+
+from pathlib import Path
+from pyprojroot import here
+from pytask import DataCatalog
+
+
+SRC = here() / "src" / "era5_sandbox"
+BLD = here() / "bld"
+
+demo_catalog = DataCatalog()
+```
+
+## `DEV_MODE`: A Quick Development Flag
+
+I'm adding a flag to the config that can be used for quick development.
+If you import this boolean variable, it can be used to skip tasks,
+setup samples, etc. on the fly by `marking` a task with the `pytask.mark.skipif`
+decorator. Change this to `False` when you're ready to run the full pipeline.
+
+```{python}
+#| exports:
+#
+DEV_MODE=True
+```
+
+## The Data Catalog
+
+To manage our pipeline, we're going to use a nested data catalog structure.
+This way, we can easily return specific entries to specific tasks
+without having to manage multiple different data catalogs. Specifically,
+we'll have a data catalog for each stage of the pipeline, and each catalog
+will have entries for the inputs, outputs, and any other parameters needed
+for that stage. This is similar to how we used Hydra configs, but
+using the `pytask` data catalog, we can more easily gather the data
+for a specific task in structured manner entirely in Python.
+
+```{python}
+#| exports:
+#
+
+stages = ["mydata", 'mydata2', # from the demo, ignore
+ "download", # download task
+ "aggregate", # aggregation task
+ "publish", # publishing task
+ "viz"] # visualization task
+
+buckets = [
+ "inputs", # any specific inputs, eg for carrying over between tasks
+ "outputs", # specific output task returns
+ "jobs", # job parameters as a dataframe
+ "params" # any lingering hardcoded parameters
+ ]
+
+data_catalog = {
+
+ stage: {bucket: DataCatalog(name=f"{stage}_{bucket}") for bucket in buckets}
+ for stage in stages
+}
+```
+
+
+## The Download Task
+
+A good strategy may be to set pipeline stage parameters in the config file,
+and then use the `pytask` data catalog to manage the data. This way, we can
+easily change the parameters without having to modify the code. This is especially
+useful for the API query, where we need to be able to set the parameter grid for
+the years and data types we want to download data for. So, let's create an entry in the data catalog specifically for the download task.
+
+A good strategy I thought about for grid parameter comprehension is to create a dataframe expands all the combinations of
+parameters, and then uses each combination to create the tasks which are then
+easily added to the data catalog. This way, we can still easily inspect the
+pipeline and see what tasks are being run, while also being able to easily
+change the parameters in the config file without too much hassle.
+
+An important framework decision I'm making here is that each ROW of the dataframe corresponds to a single task, so that we can quickly understand at a glance what the task is doing, and also easily develop the code for the task itself. This is different from the hydra approach where a job is first specified by a default config, and then the parameters are swept over in multiple config files. This is a more flexible approach, IMO, because:
+
+1. each row defines a single task run, so it's easy to understand what the run is doing
+2. it's easy to add or remove runs by simply expanding the list of parameters and using dataframe filters to remove irrelevant parameter combinations
+3. we don't have to independently inspect and manage multiple different/overriding config files
+4. it's all in Python, so we can use the full power of the language to define
+ the parameters and the tasks in a single sweep, not through the need of
+ hydra+snakemake multi stage/multi-lingual config system
+
+So, to do this, we define one job as a query to the CDS API that must contain:
+- The dataset (re-analysis)
+- The year
+- The month
+- All days in the month
+- All times of day (hour)
+- The geography (region), which will need:
+ - The URL to the shapefile to calculate the bounding box
+
+Given one combination of all of these, a single SLURM job can complete the first "task" in parallel by having a run assigned to each row of the dataframe.
+
+```{python}
+#| exports:
+
+# a dataframe for the query parameters, with nested entries for days, times, and variables
+# Dimensions
+years = [str(x) for x in range(2009, 2025)] # 16 years
+months = [str(x).zfill(2) for x in range(1, 13)] # 12 months
+geographies = ["madagascar", "nepal"] # 2 geographies
+
+# nested values; we want ALL days, times, and variables for each job
+days = [str(x).zfill(2) for x in range(1, 32)]
+times = [f"{x:02d}:00" for x in range(24)]
+variables = ["2m_dewpoint_temperature", "2m_temperature", "total_precipitation", "volumetric_soil_water_layer_1"]
+
+product_type = "reanalysis"
+
+# Map shapefiles to geography
+shapefiles = {
+ "madagascar": "https://data.humdata.org/dataset/26fa506b-0727-4d9d-a590-d2abee21ee22/resource/ed94d52e-349e-41be-80cb-62dc0435bd34/download/mdg_adm_bngrc_ocha_20181031_shp.zip",
+ "nepal": "https://data.humdata.org/dataset/07db728a-4f0f-4e98-8eb0-8fa9df61f01c/resource/2eb4c47f-fd6e-425d-b623-d35be1a7640e/download/npl_adm_nd_20240314_ab_shp.zip"
+}
+
+# Build row-wise combinations of (year, month, geography)
+rows = []
+for year in years:
+ for month in months:
+ for geo in geographies:
+ rows.append({
+ "year": year,
+ "month": month,
+ "geography": geo,
+ "shapefile": shapefiles[geo],
+ "product_type": product_type,
+ "day": days,
+ "time": times,
+ "variables": variables,
+ "output": f"{year}_{month}_{geo}"
+ })
+
+# Create dataframe
+query_df = pd.DataFrame(rows)
+```
+
+```{python}
+query_df
+```
+
+```{python}
+print(f"Number of estimated jobs: {query_df.shape[0]}. Examples...")
+
+for i, row in query_df.sample(3).iterrows():
+ print(f"Year: {row['year']}, Month: {row['month']}, Geography: {row['geography']}, Link: {row['shapefile']}, Variables: {row['variables']}")
+```
+
+Now add them to the catalog. We're going to use a dictionary to
+nest data catalogs so that we can return specific task products to
+named data catalog nodes.
+
+```{python}
+#| export:
+# set up catalog
+
+data_catalog['download']['jobs'].add("queries_df", query_df)
+```
+
+Our data catalog now has a `download|jobs` node with a `queries_df` entry that contains the dataframe of all the jobs to be run in this task.
+
+```{python}
+data_catalog['download']['jobs']['queries_df'].load().head()
+```
+
+## The Aggregation Task
+
+To carry out the aggregation, we will follow similar logic to the original pipeline and use xarray to aggregate data into spatial and temporal averages. The aggregation task will take the downloaded data and compute the mean over the specified time period and spatial region. However, in this case, we want to aggregate the data diurnally, so we will need to fetch the sundown and sunrise times for the region and use them to compute the diurnal averages.
+
+Once again, we will use a dataframe to define the parameters for the aggregation task.
+
+Here we will use a dataframe with the jobs as rows;
+the first column is "input" which is the list of query names from
+the download task, and the last column is the output object name. Columns
+in between can be the parameters needed for the aggregation task, which
+then get expanded to the full list of jobs with `itertools.product`, `explode` or similar,
+and filtered as necessary.
+
+For explanations of the parameters, see the Aggregation Task notebook's final `task_aggregate_data_diurnal` function.
+
+```{python}
+#| exports:
+
+# aggregate task parameters
+
+inputs = query_df["output"].tolist()
+outputs = [f"{i}_agg" for i in inputs]
+
+variable_dict = {
+ "2m_dewpoint_temperature": "d2m",
+ "2m_temperature": "t2m",
+ "total_precipitation": "tp",
+ "volumetric_soil_water_layer_1": "swvl1"
+}
+
+# list of params that get fed into the task functions
+agg_params = {
+ "time": ["day", "night"],
+ "solar_classification": ["before"],
+ "variables": variables,
+ "variables_short": [variable_dict[x] for x in variables],
+ "aggregation_name": ["mean", "sum", "max", "min"]
+}
+
+from itertools import product
+import pandas as pd
+
+# expand all the params
+agg_params = pd.DataFrame(list(product(*agg_params.values())), columns=agg_params.keys())
+```
+
+Inspecting it:
+
+```{python}
+agg_params
+```
+
+Let's keep only rows where the variables and variables_short match
+
+```{python}
+#| exports:
+# quick filter to keep only matching rows
+
+agg_params = agg_params[agg_params.apply(lambda x: variable_dict[x['variables']] == x['variables_short'], axis=1)]
+```
+
+```{python}
+agg_params
+```
+
+Great, and now keeping `sum` only for total precipitation (we don't need mean, max, min for that variable), and removing `sum` for all other variables (we don't need sum for temperature or soil moisture):
+
+```{python}
+#| exports:
+# remove rows where tp aggregation is not sum
+mask = (agg_params['variables_short'] == "tp") & (agg_params['aggregation_name'] != "sum")
+agg_params = agg_params[~mask]
+
+# remove rows where non-tp aggregation is sum
+mask = (agg_params['variables_short'] != "tp") & (agg_params['aggregation_name'] == "sum")
+agg_params = agg_params[~mask]
+```
+
+```{python}
+agg_params
+```
+
+Now we add the input and output columns by joining:
+
+```{python}
+#| exports:
+# set up inputs and parameters
+inputs = pd.DataFrame({"input": inputs})
+aggregate_jobs = inputs.merge(agg_params, how="cross")
+```
+
+This result gives us the full list of jobs for the aggregation task. 20 rows for the parameters,
+and 384 inputs/outputs, giving a total of 7680 jobs:
+
+```{python}
+assert aggregate_jobs.shape[0] == 20 * len(inputs)
+aggregate_jobs
+```
+
+A few more configuration items need to be added, like
+the local timezone for each geography, the healthshed filename,
+the healthshed unique ID variable name in the shapefile,
+and whether the variable is instantaneous or accumulated:
+
+```{python}
+#| exports:
+# add a few more columns
+aggregate_jobs['local_tz'] = aggregate_jobs['input'].apply(
+ lambda x: "Asia/Kathmandu" if "nepal" in x else "Indian/Antananarivo"
+)
+aggregate_jobs['shapefile'] = aggregate_jobs['input'].apply(
+ lambda x: "Nepal_Healthsheds2024.zip" if "nepal" in x else "healthsheds2022.zip"
+)
+
+aggregate_jobs['hshd_unique_id'] = aggregate_jobs['input'].apply(
+ lambda x: "fid" if "nepal" in x else "fs_uid"
+)
+
+aggregate_jobs['climate_handler_var'] = aggregate_jobs['variables_short'].apply(
+ lambda x: "accum" if x == "tp" else "instant"
+)
+```
+
+```{python}
+aggregate_jobs
+```
+
+Now we add this to the data catalog:
+
+```{python}
+#| exports:
+# update catalog
+data_catalog['aggregate']['jobs'].add("jobs_df", aggregate_jobs)
+```
+
+Our data catalog now has an `aggregate|jobs` node with a `jobs_df` entry that contains the dataframe of all the jobs to be run in this task.
+
+```{python}
+data_catalog['aggregate']['jobs']['jobs_df'].load().head()
+```
\ No newline at end of file
diff --git a/notes_qmd/20_pytask_logger.qmd b/notes_qmd/20_pytask_logger.qmd
new file mode 100644
index 0000000..430715f
--- /dev/null
+++ b/notes_qmd/20_pytask_logger.qmd
@@ -0,0 +1,57 @@
+---
+title: "Logging: A simple logger to inject into `pytask` jobs"
+engine: jupyter
+---
+
+## logger
+
+> A simple logger module for the pytask tasks
+
+```{python}
+#| default_exp pytask_logger:
+#|
+```
+
+```{python}
+#| hide:
+# showdoc
+from nbdev.showdoc import *
+```
+
+```{python}
+#| exports:
+# imports
+
+import logging
+from pathlib import Path
+from pyprojroot import here
+from datetime import datetime
+
+LOG_DIR = here("logs")
+# get the date & time for the log file name
+log_date = datetime.now().strftime("%Y-%m-%d")
+log_time = datetime.now().strftime("%H-%M-%S")
+LOG_DIR = here("logs") / log_date / log_time
+
+```
+
+```{python}
+#| exports:
+# main function to setup a logger
+
+
+
+def setup_logger(name: str, log_path: Path=LOG_DIR, level=logging.INFO) -> logging.Logger:
+ log_path.mkdir(parents=True, exist_ok=True)
+ formatter = logging.Formatter('%(asctime)s — %(name)s — %(levelname)s — %(message)s')
+
+ handler = logging.FileHandler(log_path / f"{name}.log", mode='a')
+ handler.setFormatter(formatter)
+
+ logger = logging.getLogger(name)
+ logger.setLevel(level)
+ logger.addHandler(handler)
+ logger.propagate = False
+
+ return logger
+```
\ No newline at end of file
diff --git a/notes_qmd/21_pytask_download.qmd b/notes_qmd/21_pytask_download.qmd
new file mode 100644
index 0000000..85f2e20
--- /dev/null
+++ b/notes_qmd/21_pytask_download.qmd
@@ -0,0 +1,170 @@
+---
+title: "Download: `download` Module as a `pytask` Task"
+engine: jupyter
+---
+
+## task_download
+
+> This module downloads the raw era5 data from the CDS API. It is similar to the original script, refactored for `pytask`.
+
+```{python}
+#| default_exp task_download:
+#|
+```
+
+```{python}
+#| hide:
+# showdoc
+from nbdev.showdoc import *
+```
+
+We're going to quickly refactor the pipeline to use pytask instead of hydra and snakemake. This will hopefully demonstrate a simpler and more flexible way to manage data pipelines in Python.
+
+To start off, we need to create a function that queries the CDS API with one job. This function will be used to download the data for each query in the range specified in the data catalog in the config file.
+
+Let's take a look at the data catalog we created in the config module:
+
+```{python}
+#| export:
+# necessary imports
+import cdsapi
+import pytask
+import os
+from pytask import task, Product
+from pathlib import Path
+from typing import Annotated
+from pandas import Series
+
+from era5_sandbox.config import data_catalog
+from era5_sandbox.config import BLD
+from era5_sandbox.config import DEV_MODE
+from era5_sandbox.pytask_logger import setup_logger
+from era5_sandbox.download import fetch_GADM, create_bounding_box
+
+```
+
+You can see the queries entry we created in the data catalog. Each query is a row of a dataframe that contains the parameters for the CDS API query.
+
+```{python}
+queries = data_catalog['download']['jobs']['queries_df'].load()
+queries
+```
+
+We can test this query like we did in the original work:
+
+
+```{python}
+example_query = queries.iloc[0]
+
+create_bounding_box(example_query['shapefile'])
+```
+
+In this way, we have a similar approach as Hydra configs, but, using the `pytask` data catalog, we can more easily gather the data for a specific task in structured manner entirely in Python.
+
+```{python}
+#| eval: false
+
+client = cdsapi.Client()
+
+ex_bounding_box = create_bounding_box(example_query['shapefile'])
+
+request = {
+ "product_type": example_query['product_type'],
+ "variable": example_query['variables'],
+ "year": str(example_query['year']),
+ "month": str(example_query['month']),
+ "day": example_query['day'],
+ "time": example_query['time'],
+ "data_format": "netcdf",
+ "download_format": "unarchived",
+ "area": ex_bounding_box
+ }
+
+target = f"{example_query['output']}.nc"
+
+client.retrieve("reanalysis-era5-single-levels", request).download(target)
+```
+
+This works! So now we just need to create a `task_` function that pytask will recognise to parallelise the download of queries over:
+
+```{python}
+#| export:
+# define the download task
+
+queries = data_catalog['download']['jobs']['queries_df'].load()
+
+for i, job in queries.iterrows():
+
+ @task(id=job['output'], name=f"Download {job['output']}")
+ def task_download_raw_data(
+ _query: Series = job # The query object from the data catalog
+ )-> Annotated[Path, data_catalog['download']['outputs'][job['output']]]:
+
+ logger = setup_logger(_query['output'])
+ output_path = BLD / f"{_query['output']}.nc"
+ logger.info(f"Starting download for {_query['output']} to {output_path}")
+
+ # check if string file path exists
+ if os.path.exists(output_path):
+ logger.info(f"File {output_path} already exists. Skipping download.")
+ return output_path
+
+ client = cdsapi.Client()
+ bounding_box = create_bounding_box(_query['shapefile'])
+
+ request = {
+ "product_type": _query['product_type'],
+ "variable": _query['variables'],
+ "year": _query['year'],
+ "month": _query['month'],
+ "day": _query['day'],
+ "time": _query['time'],
+ "data_format": "netcdf",
+ "download_format": "unarchived",
+ "area": bounding_box
+ }
+
+ client.retrieve("reanalysis-era5-land", request).download(output_path)
+ logger.info(f"Downloaded data for {_query['output']} to {output_path}")
+
+ return output_path
+```
+
+### How this works (with some help from GPT):
+
+#### 🧠 How pytask Discovers and Executes Tasks
+
+When you run pytask, it automatically scans your project for Python files named `task_*.py`. In these files, it looks for:
+- Functions decorated with `@task`, or
+- Functions prefixed with `task_`
+
+These functions are not executed immediately. Instead, `pytask`:
+1. Imports each task_*.py module (just like Python would)
+2. Registers any matching task functions as nodes in a directed acyclic graph (DAG)
+3. Resolves dependencies by analyzing:
+ - Input annotations (e.g., `Annotated[x, DependsOn]`)
+ - Output declarations (e.g., `return` values or `Product` annotations)
+4. Builds the DAG, where each task function is a node
+5. Executes the tasks, respecting dependency order and skipping up-to-date nodes
+
+So even though the task functions aren’t explicitly “run” in the Python code itself, pytask knows how and when to execute them — based on their position in the DAG.
+
+#### 🔄 How This Differs from Snakemake
+
+In `snakemake`, you’re expected to define a series of explicitly executable rules, often using shell commands or Python scripts. You “stitch together” rules using filenames and wildcard matching.
+
+In contrast:
+- 🐍 pytask is Python-native — tasks are just regular Python functions
+- ⚙️ It builds a DAG from those functions and tracks inputs/outputs automatically
+- 🧱 You are declaring nodes, not scripting execution
+
+Think of your Python files not as scripts to run, but as a way to define and wire together declarative tasks that will be executed by the pytask engine.
+
+---
+
+Because we defined this task in a function and loop, we can easily debug a node in the DAG by simply calling it:
+
+```{python}
+#| eval: false
+task_download_raw_data()
+```
diff --git a/notes_qmd/22_pytask_aggregate.qmd b/notes_qmd/22_pytask_aggregate.qmd
new file mode 100644
index 0000000..3868c8e
--- /dev/null
+++ b/notes_qmd/22_pytask_aggregate.qmd
@@ -0,0 +1,638 @@
+---
+title: "Aggregation: The `aggregation` Module as a `pytask` Task"
+format: html
+engine: jupyter
+---
+
+# task_aggregate
+
+> This task aggregates the downloaded data into spatial and temporal averages. It uses xarray to compute summary statistics over the specified time period and spatial region. The aggregation is done diurnally, so we will fetch the sundown and sunrise times for the region and use them to compute the diurnal averages.
+
+```{python}
+#| default_exp task_aggregate:
+#
+```
+
+```{python}
+#| hide:
+# showdoc
+
+from nbdev.showdoc import *
+
+```
+
+```{python}
+#| export:
+#
+
+import os
+import tempfile
+import rasterio
+import yaml
+import xarray as xr
+from pyprojroot import here
+from typing import Literal
+from pytask import task, Product
+from pathlib import Path
+from typing import Annotated
+from rasterstats.io import Raster
+
+from era5_sandbox.config import BLD, data_catalog
+from era5_sandbox.pytask_logger import setup_logger
+
+from era5_sandbox.core import GoogleDriver, _get_callable, describe, ClimateDataFileHandler, kelvin_to_celsius
+
+from era5_sandbox.aggregate import polygon_to_raster_cells, aggregate_to_healthsheds, RasterFile, netcdf_to_tiff
+
+```
+
+## Diurnal Classification Based on Sun Position
+
+To do diurnal classificaiton, we will need to fetch the sundown and sunrise times for the region and use them to compute the diurnal averages. We will use the [astral library](https://astral.readthedocs.io/en/latest/) to get the sunrise and sunset times for the specified latitude and longitude. The aggregation will be done using xarray, which allows us to compute the mean over the specified time period and spatial region.
+
+Here's our example file:
+
+```{python}
+
+eg_file = data_catalog['download']['outputs']['2009_01_nepal'].load()
+with ClimateDataFileHandler(eg_file) as handler:
+
+ ds = xr.open_dataset(handler.get_dataset("instant"))
+ #ds = xr.open_dataset(handler.get_dataset("accum"))
+
+ds
+
+```
+
+We can see the astral library in action below:
+
+```{python}
+#| exports:
+#
+from astral import Observer, sun
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+import random
+import datetime
+from pytz import UTC
+
+```
+
+```{python}
+# get the location of a datapoint in the dataset
+lat, long = ds.coords["latitude"].values[0], ds.coords["longitude"].values[0]
+time = ds['valid_time'].values[0]
+dt = pd.to_datetime(time, utc=True)
+
+```
+
+```{python}
+dt
+```
+
+```{python}
+observer = Observer(latitude=lat, longitude=long, elevation=0)
+sun_info = sun.sun(observer, date=dt)
+sun_info
+
+```
+
+Astral is very fast:
+```{python}
+%%timeit
+
+#fetch a random time from valid_time
+options = ds['valid_time'].values
+
+random_time = random.choice(options)
+dt = pd.to_datetime(random_time, utc=True)
+sun_info = sun.sun(observer, date=dt)
+if dt < sun_info['sunrise']:
+ print(f"Randomly selected time: {dt} is pre_dawn")
+elif dt >= sun_info['sunrise'] and dt < sun_info['sunset']:
+ print(f"Randomly selected time: {dt} is day")
+else:
+ print(f"Randomly selected time: {dt} is post_dusk")
+
+
+```
+
+This tells us that we can use the valid time for the specific location of each data point in the query and know based on the sun whether it was daytime or nighttime. The runtime will be limited only by the looping.
+Let's put this in a function so that we can use the resampling in `xarray`.
+
+The resampling approach will be a single function that can resample in three ways:
+
+- By calendar date, default (1 value per calendar date)
+- By diurnal class by calendar date (3 values, pre-dawn, day, post-dusk)
+- By solar date (2 values per calendar date, with night classified as "before" or "after")
+
+Therefore, we'll need 2 internal functions; one to do diurnal, and one to do solar date bins.
+
+Essentially, we are going to create an array-shaped index/mask, (time, latitude, longitude). As a
+demonstration, this loop goes through the first 24 time points in the dataset,
+and calculates the sun info for each latitude and longitude, assigning the values to an array:
+
+```{python}
+%%time
+times = ds['valid_time'].values[:24]
+lats = ds.coords['latitude'].values
+lons = ds.coords['longitude'].values
+
+result = np.full((len(times), len(lats), len(lons)), "", dtype=object)
+
+for i, dt in enumerate(times):
+
+ for j, lat in enumerate(lats):
+
+ for k, lon in enumerate(lons):
+
+ # set the geographical position
+ observer = Observer(latitude=lat, longitude=lon, elevation=0)
+
+ # use the time
+ dt = pd.to_datetime(dt, utc=True)
+
+ # where/when is the sun at this time for this position
+ sun_info = sun.sun(observer, date=dt)
+ result[i, j, k] = sun_info
+```
+
+So we know that in the first hour, the sun goes up and comes down at slightly different
+times based on latitude and longitude. Take the first hour, for example:
+
+```{python}
+print(result.shape)
+hour_1 = 0 # 0th index of the results
+
+min_lat = 0
+min_lon = 0
+max_lat = 48
+max_lon = 90
+print(f"Even though the reading came from the first HOUR of data UTC, the sun info at the minimum latitude/longitude is: {result[hour_1, min_lat, min_lon]}")
+
+print(f"this is different from the sun info at the maximum latitude/longitude is: {result[hour_1, max_lat, max_lon]}")
+```
+
+```{python}
+#| export:
+# define the basic diurnal classification function
+
+def compute_diurnal_class_bins(
+ ds: xr.Dataset
+ )-> np.ndarray:
+ """
+ Compute the diurnal value for each data point in the dataset.
+ This function iterates over each data point in the dataset,
+ calculates the sunrise and sunset times for the given time, latitude and longitude,
+ and returns whether or not that data point is before dawn, during the day, or after dusk.
+ """
+
+ times = ds['valid_time'].values
+ lats = ds.coords['latitude'].values
+ lons = ds.coords['longitude'].values
+
+ result = np.full((len(times), len(lats), len(lons)), "", dtype=object)
+
+ for i, dt in enumerate(tqdm(times, desc="Classifying data points by sun position")):
+ # use the time
+ dt = pd.to_datetime(dt, utc=True)
+
+ for j, lat in enumerate(lats):
+
+ for k, lon in enumerate(lons):
+
+ # set the geographical position
+ observer = Observer(latitude=lat, longitude=lon, elevation=0)
+
+ # where/when is the sun at this time for this position
+ sun_info = sun.sun(observer, date=dt)
+
+ if dt < sun_info['sunrise']:
+ result[i, j, k] = "pre_dawn"
+ elif dt >= sun_info['sunrise'] and dt < sun_info['sunset']:
+ result[i, j, k] = "day"
+ else:
+ result[i, j, k] = "post_dusk"
+
+ return result
+```
+
+```{python}
+ex=compute_diurnal_class_bins(ds)
+```
+
+So, for our 720 time points, we should find that
+if we take the `set()` of all the classifications within that slice,
+there should be a few of them with 2 classes.
+In other words, at any given hour, almost all of
+the readings are "day", because it is daytime across all
+of Madagascar, _but_ at certain timepoints, the sun is rising
+or setting in the northern part of the country and so some
+portion of the slice is classified differently:
+
+
+
+```{python}
+for x in range(720):
+ print(set(ex[x].flatten()))
+```
+
+This works! Now we can do a similar, but slightly more
+complicated function to define "night" and "day",
+where "night" includes all of the values after the sun goes down.
+
+```{python}
+#| exports:
+#
+
+def compute_solar_day_night_class_bins(
+ ds: xr.Dataset,
+ night_direction: Literal["before", "after"],
+ )-> list:
+ """
+ Compute the diurnal value for each data point in the dataset.
+ This function iterates over each data point in the dataset,
+ calculates the sunrise and sunset times for the given time, latitude and longitude,
+ and returns whether or not that data point is daytime or nighttime.
+ The definition of "nighttime" can be set to be all the darkness before the sun
+ came up (before), or all the darkness after it went down (after).
+ """
+
+ times = ds['valid_time'].values
+ lats = ds.coords['latitude'].values
+ lons = ds.coords['longitude'].values
+
+ result = np.full((len(times), len(lats), len(lons)), "", dtype=object)
+ datetimes = np.full((len(times), len(lats), len(lons)), "", dtype=object)
+
+ for i, dt in enumerate(tqdm(times, desc="Classifying data points by sun position")):
+ # use the time
+ dt = pd.to_datetime(dt, utc=True)
+
+ for j, lat in enumerate(lats):
+
+ for k, lon in enumerate(lons):
+
+ # set the geographical position
+ observer = Observer(latitude=lat, longitude=lon, elevation=0)
+ if night_direction == "before":
+ # Night is from previous sunset to today's sunrise
+ sun_today = sun.sun(observer, date=dt.date())
+ sun_prev = sun.sun(observer, date=(dt - pd.Timedelta(days=1)).date())
+ night_start = sun_prev["sunset"].astimezone(pd.Timestamp.utcnow().tz)
+ night_end = sun_today["sunrise"].astimezone(pd.Timestamp.utcnow().tz)
+
+ # the reading is from yesterday's nighttime
+ if night_start <= dt < night_end:
+ result[i, j, k] = "night"
+ # the date counts as today
+ datetimes[i, j, k] = dt.date()
+
+ # the reading is from daytime
+ elif sun_today["sunrise"] <= dt < sun_today["sunset"]:
+ result[i, j, k] = "day"
+ # the date counts as today
+ datetimes[i, j, k] = dt.date()
+
+ # the reading is from today's nighttime, but counts as tomorrow's night
+ else:
+ result[i, j, k] = "night"
+ # the date is tomorrow
+ datetimes[i, j, k] = (dt + pd.Timedelta(days=1)).date()
+
+ elif night_direction == "after":
+ # Night is from today's sunset to next sunrise
+ sun_today = sun.sun(observer, date=dt.date())
+ sun_next = sun.sun(observer, date=(dt + pd.Timedelta(days=1)).date())
+ night_start = sun_today["sunset"].astimezone(pd.Timestamp.utcnow().tz)
+ night_end = sun_next["sunrise"].astimezone(pd.Timestamp.utcnow().tz)
+
+ # the reading is from daytime
+ if sun_today["sunrise"] <= dt < sun_today["sunset"]:
+ result[i, j, k] = "day"
+ # the date counts as today
+ datetimes[i, j, k] = dt.date()
+ # the reading is from tonight
+ elif night_start <= dt < night_end:
+ result[i, j, k] = "night"
+ # the date counts as today
+ datetimes[i, j, k] = dt.date()
+
+ # the reading is from yesterday night
+ else:
+ # the date counts as yesterday
+ result[i, j, k] = "day"
+ datetimes[i, j, k] = (dt - pd.Timedelta(days=1)).date()
+ else:
+ raise ValueError(f"Invalid night_direction: {night_direction}")
+
+ return result, datetimes
+
+```
+
+```{python}
+%%time
+ex_class, ex_dt = compute_solar_day_night_class_bins(ds, "before")
+```
+
+```{python}
+ex_class
+```
+
+As before, we should see that most slices are homogenous,
+meaning most of the time, all the readings are from the day,
+but some slices should have day and night values:
+
+```{python}
+for slice_ in range(720):
+ print(set(ex_class[slice_].flatten()))
+```
+
+The returned array can serve as new "variable indexes" for the dataset:
+
+```{python}
+ds_masked = ds.copy()
+ds_masked['solar_class'] = (('valid_time', 'latitude', 'longitude'), ex_class)
+ds_masked["solar_date"] = (("valid_time", "latitude", "longitude"), ex_dt)
+```
+
+## Diurnal Resampling
+
+Now, to see if it will resample by both solar day and diurnal class. Let's try by masking and making copies with NaN in the masked values:
+
+```{python}
+ds_day = ds_masked.where(ds_masked["solar_class"] == "day").drop_vars(["solar_class", "solar_date"])
+ds_night = ds_masked.where(ds_masked["solar_class"] == "night").drop_vars(["solar_class", "solar_date"])
+```
+
+Next, we set the time zone for Madagascar since, to resample by day and night,
+we should observe the local time:
+
+```{python}
+ds_day = ds_day.assign_coords(valid_time=pd.to_datetime(ds["valid_time"].values).tz_localize("UTC").tz_convert("Asia/Kathmandu"))
+ds_night = ds_night.assign_coords(valid_time=pd.to_datetime(ds["valid_time"].values).tz_localize("UTC").tz_convert("Asia/Kathmandu"))
+```
+
+Now if we can resample by day...
+
+```{python}
+ds_day_rs = ds_day.resample(valid_time="1D").reduce(np.nanmean)
+ds_night_rs = ds_night.resample(valid_time="1D").reduce(np.nanmean)
+ds_day_rs
+```
+
+Can we successfully convert this to a tiff?
+
+```{python}
+from era5_sandbox.aggregate import netcdf_to_tiff
+```
+
+```{python}
+raster_day = netcdf_to_tiff(ds_day_rs, band=1, variable="d2m")
+raster_night = netcdf_to_tiff(ds_night_rs, band=1, variable="d2m")
+```
+
+Looks great! These two rasters represent one calendar day of daytime and nighttime values.
+
+### Testing Polygon to Raster Cells & Healthshed Aggregation
+
+The penultimate step of the aggregate pipeline in the original version is
+assigning each datapoint to the respective healthshed. The `vectors` argument
+comes from the healthshed, and represents each geographic polygon on the ground
+that we want to aggregate data to.
+
+```{python}
+from hydra import initialize, compose
+```
+
+```{python}
+try:
+ with initialize(version_base=None, config_path="../conf"):
+ cfg = compose(config_name='config.yaml')
+except Exception as e:
+ print(f"Error initializing Hydra: {e}")
+ with initialize(version_base=None, config_path="conf"):
+ cfg = compose(config_name='config.yaml')
+
+driver = GoogleDriver(json_key_path=here() / cfg.GOOGLE_DRIVE_AUTH_JSON.path)
+drive = driver.get_drive()
+healthsheds = driver.read_healthsheds("Nepal_Healthsheds2024.zip")
+```
+
+```{python}
+res_poly2cell=polygon_to_raster_cells(
+ vectors = healthsheds.geometry.values, # the geometries of the shapefile of the regions
+ raster=raster_day.data, # the raster data above
+ nodata=np.nan, # any intersections with no data, may have to be np.nan
+ affine=raster_day.transform, # some math thing need to revise
+ all_touched=True,
+ verbose=True
+)
+```
+
+This works fine. Finally, we aggregate to healthsheds:
+
+```{python}
+from era5_sandbox.aggregate import aggregate_to_healthsheds
+
+```
+
+```{python}
+result_day = aggregate_to_healthsheds(
+ res_poly2cell=res_poly2cell,
+ raster=raster_day,
+ shapes=healthsheds,
+ names_column="fid",
+ aggregation_func=np.nanmean,
+ aggregation_name="mean_dewpoint_day"
+)
+
+result_night = aggregate_to_healthsheds(
+ res_poly2cell=res_poly2cell,
+ raster=raster_night,
+ shapes=healthsheds,
+ names_column="fid",
+ aggregation_func=np.nanmean,
+ aggregation_name="mean_dewpoint_night"
+)
+```
+
+Below shows the result of aggregating the daytime dewpoint temperature to the healthshed level:
+
+```{python}
+result_day
+```
+
+```{python}
+result_night
+```
+
+So from one input, we will have two outputs, one for daytime and one for nighttime, and this will have to loop over the bands (ie each day in the month).
+
+# Putting it all together in a `pytask` task
+
+Below we define our `pytask` task to aggregate data to the healthshed level.
+
+```{python}
+#| exports:
+#
+
+job_rows = data_catalog['aggregate']['jobs']['jobs_df'].load()
+
+aggregation_funcs = {
+ "mean": np.nanmean,
+ "sum": np.nansum,
+ "max": np.nanmax,
+ "min": np.nanmin
+}
+
+for i, job in job_rows.iterrows():
+ #print(f"Job {i+1}: variable={job['variables']}, time={job['time']}, aggregation={job['aggregation_name']}")
+
+ # parse the row into function parameters
+ input_file = data_catalog['download']['outputs'][job['input']]
+ solar_classification = job['solar_classification']
+ variable = job['variables_short']
+ time = job['time']
+ aggregation_func = aggregation_funcs[job['aggregation_name']]
+ aggregation_name = job['aggregation_name']
+
+ climate_handler_var = job['climate_handler_var']
+ local_tz = job['local_tz']
+
+ shapefile = job['shapefile']
+ hshd_unique_id = job['hshd_unique_id']
+
+ output_file = job['input'] + "_" + job['time'] + "_" + job['variables_short'] + "_" + job['aggregation_name'] + ".parquet"
+
+ @task(id=output_file, name=f"Aggregate {output_file}", after="task_download_raw_data")
+ def task_aggregate_data_diurnal(
+ input_file: Path = data_catalog['download']['outputs'][job['input']], # input data Path from the download task
+ aggregation_func: callable = aggregation_func, # the aggregation function
+ aggregation_name: str = aggregation_name, # the name of the aggregation function
+ time: Literal["day", "night"] = time, # whether to aggregate by day or night
+ night_direction: Literal["before", "after"] = solar_classification, # how to define night
+ variable: str = variable, # the variable to aggregate,
+ climate_handler_var: Literal["instant", "accum"] = climate_handler_var, # whether the variable is instant or accum,
+ local_tz: str = local_tz, # the local timezone for resampling
+ shapefile: str = shapefile, # the shapefile for the healthsheds,
+ hshd_unique_id: str = hshd_unique_id, # the unique id column in the shapefile,
+ output_file: str = output_file # the output file name
+ ) -> Annotated[Path, data_catalog['aggregate']['outputs'][output_file]]:
+ """
+ Task to aggregate data from a CDSAPI Query to the healthshed
+ level. Returns path to parquet file with aggregated data.
+ """
+
+ logger = setup_logger(output_file)
+
+ logger.info(f"Aggregating: {output_file}")
+
+ # check if the string path exists
+ # if os.path.exists(output_file):
+ # logger.info(f"File {output_file} already exists. Skipping aggregation.")
+ # return output_file
+
+ # get input data
+ logger.info("Reading input data...")
+ with ClimateDataFileHandler(input_file) as handler:
+ ds = xr.open_dataset(handler.get_dataset('instant'))
+
+ #get the healthshed shapefile
+ logger.info(f"Reading healthshed shapefile from yaml {here()}...")
+ with open(here() / "conf" / "config.yaml") as f:
+ healthshed_config = yaml.safe_load(f)
+
+ key_path = here() / healthshed_config['GOOGLE_DRIVE_AUTH_JSON']['path']
+
+ driver = GoogleDriver(json_key_path=key_path)
+ drive = driver.get_drive()
+ healthsheds = driver.read_healthsheds(shapefile)
+
+ # compute the diurnal classification bins
+ logger.info("Computing diurnal classification bins...")
+ class_bins, class_dts = compute_solar_day_night_class_bins(ds, night_direction)
+
+ ds_masked = ds.copy()
+
+ # assign classifications
+ logger.info("Assigning classification bins to dataset...")
+ ds['solar_class'] = (('valid_time', 'latitude', 'longitude'), class_bins)
+ ds["solar_date"] = (("valid_time", "latitude", "longitude"), class_dts)
+
+ # mask the dataset to the requested time
+ mask = ds["solar_class"] == time
+ ds_masked = ds_masked.where(mask)
+
+ # set the local timezone
+ ds_masked = ds_masked.assign_coords(valid_time=pd.to_datetime(ds["valid_time"].values).tz_localize("UTC").tz_convert(local_tz))
+
+ # resample by local date
+ logger.info("Resampling by local date...")
+ ds_rs = ds_masked.resample(valid_time="1D").reduce(aggregation_func)
+
+ # convert to tiff
+ logger.info("Rasterizing resampled data...")
+ n_bands = ds_rs.dims['valid_time']
+
+ # polygon to raster cells for the first band
+ logger.info("Converting polygons to raster cells...")
+ raster = netcdf_to_tiff(ds_rs, band=1, variable=variable)
+ res_poly2cell=polygon_to_raster_cells(
+ vectors = healthsheds.geometry.values, # the geometries of the shapefile of the regions
+ raster=raster.data, # the raster data above
+ nodata=np.nan, # any intersections with no data, may have to be np.nan
+ affine=raster.transform, # some math thing need to revise
+ all_touched=True,
+ verbose=True
+ )
+
+ result_df = healthsheds[[hshd_unique_id, "geometry"]].copy()
+
+ # loop over bands and aggregate to healthsheds
+ for band in tqdm(range(1, n_bands + 1)):
+ logger.info(f"Processing band {band} of {n_bands}...")
+
+ day = band # band is 1-indexed
+
+ day_col = f"day_{day:02d}"
+
+ # calculate raster for this band
+ raster = netcdf_to_tiff(ds_rs, band=band, variable=variable)
+
+ # aggregate to healthsheds
+ result = aggregate_to_healthsheds(
+ res_poly2cell=res_poly2cell,
+ raster=raster,
+ shapes=healthsheds,
+ names_column=hshd_unique_id,
+ aggregation_func=aggregation_func,
+ aggregation_name=variable
+ )
+
+ # add band to result dataframe
+ result_df[day_col] = result[variable]
+
+ # save to parquet
+ result_df.to_parquet(f"{BLD}/{output_file}")
+
+ logger.info("Aggregation complete.")
+
+ return Path(f"{BLD}/{output_file}")
+
+```
+
+That should wrap it up! To test, we can run a single job:
+
+```{python}
+#| eval: false
+# runs the last defined job only
+task_aggregate_data_diurnal()
+```
+
+Or we can run the task in `pytask`:
+
+```bash
+pytask build -k "nepal and 2009" --dry-run
+```
\ No newline at end of file
diff --git a/notes_qmd/IMG_740012467778-1.jpeg b/notes_qmd/IMG_740012467778-1.jpeg
new file mode 100644
index 0000000..52886eb
Binary files /dev/null and b/notes_qmd/IMG_740012467778-1.jpeg differ
diff --git a/notes_qmd/index.qmd b/notes_qmd/index.qmd
new file mode 100644
index 0000000..bc971cc
--- /dev/null
+++ b/notes_qmd/index.qmd
@@ -0,0 +1,157 @@
+---
+title: "The ERA5 Spatial Aggregation Pipeline"
+exec_all: true
+---
+
+```{python}
+#| hide: null
+from era5_sandbox.core import *
+```
+
+## era5_sandbox
+
+> Sandbox environment for era5 development
+
+This package documents the development and implementation of functions and code for the Madagascar ERA5 dataset project. The goal is for exposure data to be made available at the daily resolution when possible. Finer resolutions shouldn’t ever be needed for our purposes, and it should then be relatively easy to aggregate at coarser resolutions, such as weekly or monthly. Additionally, we've extended this work to Nepal as well.
+
+Variables should generally be made available from 2010 onward, as that’s where our clinic data starts.
+
+All data are ideally made available at the “healthshed” geographical level. Healthsheds are defined as geographical areas where people who live all go to the same clinic. There are a total of ~2700 public clinics in Madagascar, hence ~2700 healthsheds, with each healthshed containing ~10000 people on average.
+
+Preliminary list of environmental variables
+
+- [x] 2-m air temperature from ERA5: daily min, max, mean
+
+- [x] 2-m air dew point temperature from ERA5: daily min, max, mean
+
+- [x] Precipitation: daily total (ERA5)
+
+- [x] Soil moisture: daily average (ERA5)
+
+Variables from other sources:
+
+- [ ] Sea surface temperature: daily average and maximum in the nearest neighbor for each healthshed.
+
+- [ ] Precipitation: daily total (CHIRPS)
+
+- [ ] Chlorophyll-A (Giacomo)
+
+- [ ] Wealth index: Available from Giacomo
+
+- [ ] NDVI
+
+- [ ] Tropical storm
+
+- [ ] Flooding
+
+- [ ] Deforestation
+
+- [ ] Linking/segmenting healthsheds into climate zones and other
+
+- [ ] Relative humidity: daily average (lower priority)
+
+Those from the ERA5 dataset will be housed here, but we may likely develop a separate repository for the other datasets.
+
+## Developer Guide
+
+This package is built and maintained with `nbdev`. If you are new to using `nbdev` here are some useful pointers to get you started.
+
+### Install era5_sandbox in Development mode
+
+```sh
+# make sure era5_sandbox package is installed in development mode
+$ pip install -e .
+```
+
+To make changes, go to the "notes" directory and edit the notebooks as necessary.
+Each notebook refers to a module in the era5_sandbox package. Cells are exported to the module
+when the notebook is saved and you run the following command:
+
+```sh
+$ nbdev_export
+```
+
+For e.g., to change functionality of the `testAPI()` function in the testAPI Hydra rule, you would edit the `testAPI` notebook in the `notes` directory `notes/testAPI.ipynb`, and then save that notebook and run `nbdev_export` to update the `core` module in the package.
+
+### How to Run the Pipeline
+
+The pipeline downloads ERA5 variables for a given date range and geographical bounding box. You can learn how each of these steps was by following the notebooks in `notes` in numerical order.
+
+::: {.callout-important}
+The pipeline has two implementations: one using `snakemake` and `hydra`, and another using `pytask`. The `pytask` implementation is the more recent one, and is recommended for future use. The `snakemake` implementation is left here for reference to legacy code.
+:::
+
+#### Using `pytask`
+
+To run the pipeline, the `pytask` config at `note/20_pytask_config.qmd` should be reviewed
+and updated if necessary. The pipeline can then be run with the following command:
+
+```sh
+$ sbatch pytask.sbatch
+```
+
+#### Using `snakemake` and `hydra`
+
+To run the pipeline, the config at `config/config.yaml` should be updated with the desired date range and geographical bounding box. The pipeline can then be run with the following command:
+
+```sh
+sbatch snakemake.sbatch
+```
+
+### What Does the Pipeline Produce?
+
+Using `pytask`'s data catalog, you can investigate the downloaded raw data with python, eg.:
+
+```{python}
+#| exec_doc:
+#
+import xarray as xr
+from era5_sandbox.config import data_catalog
+from era5_sandbox.core import ClimateDataFileHandler
+
+ex_nc = list(data_catalog['download']['outputs']._entries).pop()
+ex_nc_path = data_catalog['download']['outputs'][ex_nc].load()
+
+with ClimateDataFileHandler(ex_nc_path) as handler:
+ ds = xr.open_dataset(handler.get_dataset("instant"))
+
+ds
+```
+
+And plot it with cartopy, eg.:
+
+```{python}
+#| exec_doc:
+#
+import matplotlib.pyplot as plt
+import cartopy.crs as ccrs
+import cartopy.feature as cfeature
+
+temperature = ds["t2m"]
+
+# Select a specific time step
+temperature_at_time = temperature.isel(valid_time=0)
+
+# Plot the data on a map
+plt.figure(figsize=(12, 8))
+ax = plt.axes(projection=ccrs.PlateCarree())
+temperature_at_time.plot(ax=ax, cmap="coolwarm", transform=ccrs.PlateCarree(), cbar_kwargs={"label": "Temperature (K)"})
+ax.coastlines()
+ax.add_feature(cfeature.BORDERS, linestyle=":")
+ax.set_title("Temperature at Time Step 0")
+plt.show()
+```
+
+You can also load the aggregated data:
+
+```{python}
+#| exec_doc:
+#
+import pandas as pd
+import geopandas as gpd
+from era5_sandbox.config import data_catalog
+
+ex_agg_path = data_catalog['aggregate']['outputs']['2019_08_madagascar_night_d2m_max.parquet'].load()
+
+gpd.read_parquet(ex_agg_path).describe()
+```
\ No newline at end of file
diff --git a/notes_qmd/nbdev.yml b/notes_qmd/nbdev.yml
new file mode 100644
index 0000000..d8c5049
--- /dev/null
+++ b/notes_qmd/nbdev.yml
@@ -0,0 +1,9 @@
+project:
+ output-dir: _docs
+
+website:
+ title: "era5_sandbox"
+ site-url: "https://TinasheMTapera.github.io/era5_sandbox"
+ description: "Sandbox environment for era5 development"
+ repo-branch: main
+ repo-url: "https://github.com/TinasheMTapera/era5_sandbox"
diff --git a/notes_qmd/sidebar.yml b/notes_qmd/sidebar.yml
new file mode 100644
index 0000000..caf3166
--- /dev/null
+++ b/notes_qmd/sidebar.yml
@@ -0,0 +1,16 @@
+website:
+ sidebar:
+ contents:
+ - index.ipynb
+ - section: "Snakemake Modules"
+ - 00_core.ipynb
+ - 01_download_raw_data.ipynb
+ - 02_aggregate.ipynb
+ - 03_publish.ipynb
+ - section: "PyTask Modules"
+ - 20_pytask_config.ipynb
+ - 20_pytask_logger.ipynb
+ - 21_pytask_download.ipynb
+ - 22_pytask_aggregate.ipynb
+ - section: "PyTask Demo"
+ - 10_pytask_demo.ipynb
diff --git a/notes_qmd/styles.css b/notes_qmd/styles.css
new file mode 100644
index 0000000..66ccc49
--- /dev/null
+++ b/notes_qmd/styles.css
@@ -0,0 +1,37 @@
+.cell {
+ margin-bottom: 1rem;
+}
+
+.cell > .sourceCode {
+ margin-bottom: 0;
+}
+
+.cell-output > pre {
+ margin-bottom: 0;
+}
+
+.cell-output > pre, .cell-output > .sourceCode > pre, .cell-output-stdout > pre {
+ margin-left: 0.8rem;
+ margin-top: 0;
+ background: none;
+ border-left: 2px solid lightsalmon;
+ border-top-left-radius: 0;
+ border-top-right-radius: 0;
+}
+
+.cell-output > .sourceCode {
+ border: none;
+}
+
+.cell-output > .sourceCode {
+ background: none;
+ margin-top: 0;
+}
+
+div.description {
+ padding-left: 2px;
+ padding-top: 5px;
+ font-style: italic;
+ font-size: 135%;
+ opacity: 70%;
+}
diff --git a/pyproject.toml b/pyproject.toml
index f2c07bf..c9d6b9e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,7 @@
[build-system]
requires = ["setuptools>=64.0"]
build-backend = "setuptools.build_meta"
+
+[tool.pytask.ini_options]
+paths = ["src/era5_sandbox"]
+editor_url_scheme = "vscode"
\ No newline at end of file
diff --git a/pytask.sbatch b/pytask.sbatch
new file mode 100644
index 0000000..3063b72
--- /dev/null
+++ b/pytask.sbatch
@@ -0,0 +1,15 @@
+#!/bin/bash
+#
+#SBATCH -p intermediate # partition (queue)
+#SBATCH -c 6 # number of cores
+#SBATCH --cpus-per-task=36 # request >= number of workers you want
+#SBATCH --mem 100GB # memory
+#SBATCH -t 1-12:00 # time (D-HH:MM)
+
+#SBATCH --mail-type=BEGIN,END,TIME_LIMIT_80,
+#SBATCH --mail-user=ttapera@hsph.harvard.edu
+
+# This is a test to see if we can use pytask to download ERA5 data in parallel
+
+#pytask --dry-run
+pytask --parallel-backend loky -n 36
\ No newline at end of file
diff --git a/pytask_collect.txt b/pytask_collect.txt
new file mode 100644
index 0000000..efaaea3
--- /dev/null
+++ b/pytask_collect.txt
@@ -0,0 +1,135193 @@
+─────────────────────────────────────────────────────────────────────────────────────────────────────── Start pytask session ────────────────────────────────────────────────────────────────────────────────────────────────────────
+Platform: linux -- Python 3.11.11, pytask 0.5.5, pluggy 1.5.0
+Root: /net/rcstorenfs02/ifs/rc_labs/dominici_lab/lab/data_processing/csph-era5_sandbox
+Configuration: /net/rcstorenfs02/ifs/rc_labs/dominici_lab/lab/data_processing/csph-era5_sandbox/pyproject.toml
+Plugins: pytask_parallel-0.5.1, vscode-0.0.2
+Collected 8068 tasks.
+
+Collected tasks:
+├── 🐍
+│ ├── 📝
+│ │ ├── 📄
+│ │ ├── 📄
+│ │ ├── 📄
+│ │ ├── 📄
+│ │ ├── 📄
+│ │ ├── 📄
+│ │ ├── 📄