diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index eeab6d0da1ca657ee271f322b238dc0aacc41e92..fee7ff79e851dcce3b6c60a1f68263d733270c97 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -20,7 +20,7 @@ prepare: test_python2: stage: test script: - - conda env update -f environment.yml -n hermesv3_gr python=2.7 + - conda env update -f environment.yml -n hermesv3_gr python=3.6 - source activate hermesv3_gr - python run_test.py # - pip install codacy-coverage --upgrade diff --git a/CHANGELOG b/CHANGELOG index 8238cd193e02a05ccf4207570db0d4333a2e8c3b..222e0e1ed78ce0888289dabe782f2900a4711032 100755 --- a/CHANGELOG +++ b/CHANGELOG @@ -16,6 +16,19 @@ - Sector Manager: 1. Aviation sector + 2. Shipping port sector + 3. Livestock sector + 4. Crop operations sector + 5. Crop fertilizers sector + 6. Agricultural machinery sector + 7. Residential combustion sector + 8. Recreational boats sector + 9. Point sources sector + 10. Road traffic sector + 11. Traffic area (evaporative & small cities) sector - Writing options: - 1. Default writer \ No newline at end of file + 1. Default writer + 2. CMAQ writer + 3. MONARCH writer + 4. WRF-Chem writer \ No newline at end of file diff --git a/LICENSE b/LICENSE index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..20d40b6bceca3a6c0237d7455ebf1820aeff3680 100755 --- a/LICENSE +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. \ No newline at end of file diff --git a/conf/hermes.conf b/conf/hermes.conf index a2bd0a3acdeb2084405b8743f04a5f50f5c76c5e..ad9ea14736848678cbfc107aa8a6a3e677db1d05 100755 --- a/conf/hermes.conf +++ b/conf/hermes.conf @@ -2,14 +2,15 @@ log_level = 3 input_dir = /home/Earth/ctena/Models/hermesv3_bu_data data_path = /esarchive/recon -output_dir = /scratch/Earth/HERMESv3_BU_OUT -output_name = HERMES_.nc +output_dir = /scratch/Earth/HERMESv3_BU_OUT/ +output_name = HERMESv3_.nc emission_summary = 0 start_date = 2016/11/29 00:00:00 # ----- end_date = start_date [DEFAULT] ----- # end_date = 2010/01/01 00:00:00 -output_timestep_num = 25 -auxiliary_files_path = /scratch/Earth/HERMESv3_BU_aux/_ +output_timestep_num = 24 +auxiliary_files_path = /scratch/Earth/HERMESv3_BU_aux/__test +first_time = 0 erase_auxiliary_files = 0 @@ -24,12 +25,19 @@ vertical_description = /profiles/vertical/MONARCH_Global_48layers_ver #vertical_description = /profiles/vertical/CMAQ_15layers_vertical_description.csv # if domain_type == rotated: - centre_lat = 51 - centre_lon = 10 - west_boundary = -35 - south_boundary = -27 - inc_rlat = 0.2 - inc_rlon = 0.2 + # centre_lat = 51 + # centre_lon = 10 + # west_boundary = -35 + # south_boundary = -27 + # inc_rlat = 0.2 + # inc_rlon = 0.2 + + # centre_lat = 40.5 + # centre_lon = -3.5 + # west_boundary = -7.0 + # south_boundary = -7.0 + # inc_rlat = 0.4 + # inc_rlon = 0.4 # if domain_type == lcc: @@ -39,14 +47,6 @@ vertical_description = /profiles/vertical/MONARCH_Global_48layers_ver lon_0 = -3 lat_0 = 40 - # TEST - #nx = 30 - #ny = 30 - #inc_x = 10000 - #inc_y = 10000 - #x_0 = 253151.59375 - #y_0 = 43862.90625 - # CATALUNYA #nx = 278 #ny = 298 @@ -58,11 +58,21 @@ vertical_description = /profiles/vertical/MONARCH_Global_48layers_ver # CATALUNYA test nx = 28 ny = 30 + #nx = 4 + #ny = 4 inc_x = 10000 inc_y = 10000 x_0 = 253151.59375 y_0 = 43862.90625 + # IP + #nx = 397 + #ny = 397 + #inc_x = 4000 + #inc_y = 4000 + #x_0 = -807847.688 + #y_0 = -797137.125 + # EUROPA #nx = 478 #ny = 398 @@ -71,13 +81,6 @@ vertical_description = /profiles/vertical/MONARCH_Global_48layers_ver #x_0 = -2131849.000 #y_0 = -2073137.875 - # IP - # nx = 397 - # ny = 397 - # inc_x = 4000 - # inc_y = 4000 - # x_0 = -807847.688 - # y_0 = -797137.125 # MAD #nx = 146 @@ -87,23 +90,23 @@ vertical_description = /profiles/vertical/MONARCH_Global_48layers_ver #x_0 = -142848.422 #y_0 = -20137.891 -# if domain_type == mercator: - #lat_ts = -1.5 - #lon_0 = -18 - #nx = 210 - #ny = 236 - #inc_x = 50000 - #inc_y = 50000 - #x_0 = -126017.5 - #y_0 = -5407460 +# if domain_type == mercator + # lat_ts = -1.5 + # lon_0 = -18 + # nx = 10 + # ny = 10 + # inc_x = 50000 + # inc_y = 50000 + # x_0 = -126017.5 + # y_0 = -5407460 # if domain_type == regular: - lat_orig = 41.1 - lon_orig = 1.8 - inc_lat = 0.1 - inc_lon = 0.1 - n_lat = 10 - n_lon = 10 + # lat_orig = 40.5 + # lon_orig = 0.0 + # inc_lat = 0.05 + # inc_lon = 0.05 + # n_lat = 50 + # n_lon = 70 [CLIPPING] @@ -116,24 +119,29 @@ vertical_description = /profiles/vertical/MONARCH_Global_48layers_ver #################################################################### [SECTOR MANAGEMENT] writing_processors = 1 - -aviation_processors = 0 -shipping_port_processors = 0 -livestock_processors = 0 -crop_operations_processors = 0 -crop_fertilizers_processors = 0 -agricultural_machinery_processors = 0 -residential_processors = 0 -recreational_boats_processors = 0 -point_sources_processors = 0 -traffic_processors = 0 -traffic_area_processors = 0 - +# +# aviation_processors = 1 +# shipping_port_processors = 1 +# livestock_processors = 12 +# crop_operations_processors = 1 +# crop_fertilizers_processors = 4 +# agricultural_machinery_processors = 1 +# residential_processors = 4 +# recreational_boats_processors = 4 +# point_sources_processors = 16 +# traffic_processors = 256 +traffic_area_processors = 1 [SHAPEFILES] -nut_shapefile_prov = /Shapefiles/Provinces/ES_Provinces.shp -nut_shapefile_ccaa = /Shapefiles/CCAA/ES_CCAA.shp +nuts3_shapefile = /shapefiles/nuts3/nuts3.shp +nuts2_shapefile = /shapefiles/nuts2/nuts2.shp +land_uses_path = /ecmwf/clc/original_files/g250_clc12_v18_5a/g250_clc12_V18_5.tif +land_uses_nuts2_path = /agriculture/land_use_ccaa.csv population_density_map = /jrc/ghsl/original_files/GHS_POP_GPW42015_GLOBE_R2015A_54009_1k_v1_0.tif +population_nuts2 = /solvents/pop_by_nut2.csv +population_type_map = /jrc/ghsl/original_files/GHS_SMOD_POP2015_GLOBE_R2016A_54009_1k_v1_0.tif +population_type_nuts2 = /residential/pop_type_ccaa.csv +population_type_nuts3 = /residential/pop_type_prov.csv [SPECIATION DATA] speciation_map = /profiles/speciation/map_base.csv @@ -159,8 +167,8 @@ layer_thickness_dir = /esarchive/exp/monarch/a1wd/regional/hourly/layer_thicknes [AVIATION SECTOR] # With 'hc' is calculated 'nmvoc' and 'ch4' -aviation_source_pollutants = nox_no2, co, hc, so2, pm10, pm25, co2 -# airport_list = LEBL +aviation_source_pollutants = nox_no2, co, hc, so2, pm10, pm25, co2, nmvoc +# airport_list = # plane_list = airport_shapefile_path = /aviation/Airports.shp airport_runways_shapefile_path = /aviation/Runways.shp @@ -179,8 +187,8 @@ aviation_speciation_profiles = /profiles/speciation/aviation/speciati shipping_port_source_pollutants = nox_no2, pm10, pm25, co, so2, nmvoc, ch4, nh3, co2 vessel_list = LC,DC,GC,RO,FE,CR,CO,TU,OT port_list = ACO, ALC, ALI, ALM, ARI, ARR, AVI, ALG, BAR, BIL, CAB, CAD, CSA, CAR, CTG, CAS, CEU, HIE, FER, GAN, GIJ, HUE, IBI, LPM, LCR, MAH, MAL, MPO, MEL, MOT, PMA, PAS, PRO, PSM, SSG, SCP, SCT, SAG, SAL, SCI, SAN, SEV, TAR, TRG, VAL, VIG, VIL, ZFC -hoteling_shapefile_path = /Shapefiles/shipping_port/Areas_Hot_Puertos_and_BCN.shp -maneuvering_shapefile_path = /Shapefiles/shipping_port/Areas_Maneuv_Puertos_and_BCN.shp +hoteling_shapefile_path = /shapefiles/shipping_port/Areas_Hot_Puertos_and_BCN.shp +maneuvering_shapefile_path = /shapefiles/shipping_port/Areas_Maneuv_Puertos_and_BCN.shp shipping_port_ef_path = /shipping_port/ef/engines_fuel_EF.csv shipping_port_engine_percent_path = /shipping_port/ef/ship_perc_engines.csv shipping_port_tonnage_path = /shipping_port/ship_operations_GT_2015.csv @@ -204,8 +212,6 @@ livestock_hourly_profiles = /profiles/temporal/livestock/hourly_profi livestock_speciation_profiles = /profiles/speciation/livestock/speciation_profiles_base.csv [AGRICULTURAL] -land_uses_path = /ecmwf/clc/original_files/g250_clc12_v18_5a/g250_clc12_V18_5.tif -land_use_by_nut_path = /agriculture/land_use_ccaa.csv crop_by_nut_path = /agriculture/crops_ha_2017.csv crop_from_landuse_path = /agriculture/map_crops_landuse.csv @@ -248,23 +254,22 @@ crop_machinery_monthly_profiles = /profiles/temporal/agricultural_mac crop_machinery_weekly_profiles = /profiles/temporal/agricultural_machinery/weekly_profiles.csv crop_machinery_hourly_profiles = /profiles/temporal/agricultural_machinery/hourly_profiles.csv crop_machinery_speciation_profiles = /profiles/speciation/agricultural_machinery/speciation_profiles_base.csv -crop_machinery_by_nut = /agriculture/agricultural_machinery/crops_ha_prov_2017.csv +crop_machinery_nuts3 = /agriculture/agricultural_machinery/crops_ha_prov_2017.csv [RESIDENTIAL] -fuel_list = B_res, B_com +fuel_list = HD_res, LPG_res, NG_res, HD_com, LPG_com, NG_com, B_res, B_com +# fuel_list = B_res, B_com +# fuel_list = HD_res, LPG_res, NG_res, HD_com, LPG_com, NG_com residential_source_pollutants = nox_no2, so2, co, nh3, pm10, pm25, nmvoc -population_type_map = /jrc/ghsl/original_files/GHS_SMOD_POP2015_GLOBE_R2016A_54009_1k_v1_0.tif -population_type_by_ccaa = /residential/pop_type_ccaa.csv -population_type_by_prov = /residential/pop_type_prov.csv -energy_consumption_by_prov = /residential/energy_consumption_nuts3.csv -energy_consumption_by_ccaa = /residential/energy_consumption_nuts2.csv +energy_consumption_nuts2 = /residential/energy_consumption_nuts2.csv +energy_consumption_nuts3 = /residential/energy_consumption_nuts3.csv residential_spatial_proxies = /residential/spatial_proxies.csv residential_ef_files_path = /residential/ef/ef.csv residential_heating_degree_day_path = /ecmwf/era5/yearly/heatingdegreeday/hdd_.nc residential_hourly_profiles = /profiles/temporal/residential/hourly_profiles.csv residential_speciation_profiles = /profiles/speciation/residential/speciation_profiles_base.csv -[RECREATIONAL_BOATS} +[RECREATIONAL_BOATS] recreational_boats_source_pollutants = nox_no2,so2,nmvoc,co,nh3,pm10,pm25,co2,ch4 recreational_boats_list = YB_001,YB_002,SB_001,SB_002,SP_001,SP_002,OB_001,OB_002,WS_001,WS_002,YB_003,SB_003,SP_004,SP_005,OB_002,WS_003,MB_001,MB_002,MB_003,MB_004,MB_005,MB_006,MS_001,MS_002,SB_004,SB_005 recreational_boats_density_map = /recreational_boats/recreation_boats_area.tif @@ -279,7 +284,7 @@ recreational_boats_speciation_profiles = /profiles/speciation/recreat point_source_pollutants = nox_no2,nmvoc,so2,co,nh3,pm10,pm25,ch4,n2o,co2 plume_rise = True # point_source_snaps = 09 -point_source_catalog = /point_sources/Maestra_Focos_SNAP01030409_2015_plume_rise.csv +point_source_catalog = /point_sources/Maestra_focos_2015_plume_rise.shp point_source_monthly_profiles = /profiles/temporal/point_sources/monthly_profiles.csv point_source_weekly_profiles = /profiles/temporal/point_sources/weekly_profiles.csv point_source_hourly_profiles = /profiles/temporal/point_sources/hourly_profiles.csv @@ -310,23 +315,34 @@ traffic_hourly_profiles_mean = /profiles/temporal/traffic/aadt_h_mn.c traffic_hourly_profiles_weekday = /profiles/temporal/traffic/aadt_h_wd.csv traffic_hourly_profiles_saturday = /profiles/temporal/traffic/aadt_h_sat.csv traffic_hourly_profiles_sunday = /profiles/temporal/traffic/aadt_h_sun.csv -traffic_speciation_profile_hot_cold = /profiles/speciation/traffic/hot_cold_cmaq_cb05_aero5.csv -traffic_speciation_profile_tyre = /profiles/speciation/traffic/tyre_cmaq_cb05_aero5.csv -traffic_speciation_profile_road = /profiles/speciation/traffic/road_cmaq_cb05_aero5.csv -traffic_speciation_profile_brake = /profiles/speciation/traffic/brake_cmaq_cb05_aero5.csv -traffic_speciation_profile_resuspension = /profiles/speciation/traffic/resuspension_cmaq_cb05_aero5.csv +traffic_speciation_profile_hot_cold = /profiles/speciation/traffic/hot_cold_base.csv +traffic_speciation_profile_tyre = /profiles/speciation/traffic/tyre_base.csv +traffic_speciation_profile_road = /profiles/speciation/traffic/road_base.csv +traffic_speciation_profile_brake = /profiles/speciation/traffic/brake_base.csv +traffic_speciation_profile_resuspension = /profiles/speciation/traffic/resuspension_base.csv [TRAFFIC AREA SECTOR] traffic_area_pollutants = nox_no2,nmvoc,so2,co,nh3,pm10,pm25 do_evaporative = 1 traffic_area_gas_path = /traffic_area/gasoline_vehicles_provinces_2015.csv -popullation_by_municipality = /traffic_area/population_by_mun.csv +population_nuts3 = /traffic_area/population_nuts3.csv traffic_area_speciation_profiles_evaporative = /profiles/speciation/traffic_area/evaporative_base.csv traffic_area_evaporative_ef_file = /traffic_area/ef/evaporative_nmvoc.csv do_small_cities = 1 -traffic_area_small_cities_path = /Shapefiles/small_cities/small_cities.shp +traffic_area_small_cities_path = /shapefiles/small_cities/small_cities.shp traffic_area_speciation_profiles_small_cities = /profiles/speciation/traffic_area/small_cities_base.csv traffic_area_small_cities_ef_file = /traffic_area/ef/small_cities.csv small_cities_monthly_profile = /profiles/temporal/traffic_area/small_cities_monthly_profiles.csv small_cities_weekly_profile = /profiles/temporal/traffic_area/small_cities_weekly_profiles.csv small_cities_hourly_profile = /profiles/temporal/traffic_area/small_cities_hourly_profiles.csv + +[SOLVENTS] +solvents_pollutants = nmvoc +solvents_proxies_path = /solvents/proxies_profiles.csv +solvents_yearly_emissions_by_nut2_path = /solvents/miteco_solvent_emissions_nuts2_2015.csv +solvents_point_sources_shapefile = /solvents/use_solvents_point_sources.shp +solvents_point_sources_weight_by_nut2_path = /solvents/point_sources_weights_nuts2.csv +solvents_monthly_profile = /profiles/temporal/solvents/monthly_profiles.csv +solvents_weekly_profile = /profiles/temporal/solvents/weekly_profiles.csv +solvents_hourly_profile = /profiles/temporal/solvents/hourly_profiles.csv +solvents_speciation_profiles = /profiles/speciation/solvents/speciation_profiles_base.csv diff --git a/environment.yml b/environment.yml index fc71b53fb3264847339c3d43aba843e8614b2232..584e815d9fbd3e5843f104f8f1f0bbab6c8866e2 100755 --- a/environment.yml +++ b/environment.yml @@ -23,5 +23,3 @@ dependencies: - pytest-cov - pycodestyle - shapely - - pip: - - holidays diff --git a/hermesv3_bu/__init__.py b/hermesv3_bu/__init__.py index 6c8e6b979c5f58121ac7ee2d9e024749da3a8ce1..3dc1f76bc69e3f559bee6253b24fc93acee9e1f9 100755 --- a/hermesv3_bu/__init__.py +++ b/hermesv3_bu/__init__.py @@ -1 +1 @@ -__version__ = "0.0.0" +__version__ = "0.1.0" diff --git a/hermesv3_bu/clipping/shapefile_clip.py b/hermesv3_bu/clipping/shapefile_clip.py index a0f1ec2d4f84339838fc7e32d9b3619b1790e164..88792ef532d2403cd46d5e97780b0c4d5027e746 100755 --- a/hermesv3_bu/clipping/shapefile_clip.py +++ b/hermesv3_bu/clipping/shapefile_clip.py @@ -6,6 +6,7 @@ import timeit import geopandas as gpd from hermesv3_bu.clipping.clip import Clip from hermesv3_bu.logger.log import Log +from hermesv3_bu.tools.checker import error_exit class ShapefileClip(Clip): @@ -48,7 +49,7 @@ class ShapefileClip(Clip): clip = gpd.GeoDataFrame(geometry=[clip.unary_union], crs=clip.crs) clip.to_file(self.shapefile_path) else: - raise IOError(" Clip shapefile {0} not found.") + error_exit(" Clip shapefile {0} not found.") else: clip = gpd.read_file(self.shapefile_path) self.logger.write_log("\tClip created at '{0}'".format(self.shapefile_path), 3) diff --git a/hermesv3_bu/config/config.py b/hermesv3_bu/config/config.py index 043ee0e4acc824eb254d1593529bcbe6a1e5eb85..c3fccf8492de2b97a209465326b9aa747eec6cc6 100755 --- a/hermesv3_bu/config/config.py +++ b/hermesv3_bu/config/config.py @@ -1,39 +1,26 @@ #!/usr/bin/env python -# Copyright 2018 Earth Sciences Department, BSC-CNS -# -# This file is part of HERMESv3_BU. -# -# HERMESv3_BU is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# HERMESv3_BU is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with HERMESv3_BU. If not, see . - - from configargparse import ArgParser import os from mpi4py import MPI +from hermesv3_bu.tools.checker import error_exit class Config(ArgParser): """ Configuration arguments class. """ - def __init__(self, new_date=None): + def __init__(self, new_date=None, comm=None): """ Read and parse all the arguments. :param new_date: Starting date for simulation loop day. :type new_date: datetime.datetime """ + if comm is None: + comm = MPI.COMM_WORLD + self.comm = comm + self.new_date = new_date super(Config, self).__init__() @@ -72,10 +59,10 @@ class Config(ArgParser): p.add_argument('--auxiliary_files_path', required=True, help='Path to the directory where the necessary auxiliary files will be created if them are ' + 'not created yet.') + p.add_argument('--first_time', required=False, default='False', type=str, + help='Indicates if you want to run it for first time (only create auxiliary files).') p.add_argument('--erase_auxiliary_files', required=False, default='False', type=str, help='Indicates if you want to start from scratch removing the auxiliary files already created.') - p.add_argument('--molecular_weights', required=True, - help='Path to the file that contains the molecular weights of the input pollutants.') # ===== DOMAIN ===== p.add_argument('--output_model', required=True, help='Name of the output model.', @@ -141,7 +128,7 @@ class Config(ArgParser): 'GRIDDESC file.') # Mercator - p.add_argument('--lat_ts', required=False, type=float, help='...') + p.add_argument('--lat_ts', required=False, type=float, help='Latitude of true scale (degrees).') # Regular lat-lon options: p.add_argument('--lat_orig', required=False, type=float, help='Latitude of the corner of the first cell.') @@ -152,217 +139,534 @@ class Config(ArgParser): p.add_argument('--inc_lon', required=False, type=float, help='Longitude grid resolution.') # ===== SECTOR SELECTION ===== - p.add_argument('--traffic_processors', required=True, type=int) - p.add_argument('--traffic_area_processors', required=True, type=int) - p.add_argument('--aviation_processors', required=True, type=int) - p.add_argument('--point_sources_processors', required=True, type=int) - p.add_argument('--recreational_boats_processors', required=True, type=int) - p.add_argument('--shipping_port_processors', required=True, type=int) - p.add_argument('--residential_processors', required=True, type=int) - p.add_argument('--livestock_processors', required=True, type=int) - p.add_argument('--crop_operations_processors', required=True, type=int) - p.add_argument('--crop_fertilizers_processors', required=True, type=int) - p.add_argument('--agricultural_machinery_processors', required=True, type=int) - - p.add_argument('--speciation_map', required=False, help='...') + p.add_argument('--traffic_processors', required=False, type=int, default=0, + help="Number of processors dedicated to simulate the road traffic sector " + + "(0 to deactivated the sector).") + p.add_argument('--traffic_area_processors', required=False, type=int, default=0, + help="Number of processors dedicated to simulate the traffic area " + + "(evaporative and small cities) sector (0 to deactivated the sector).") + p.add_argument('--aviation_processors', required=False, type=int, default=0, + help="Number of processors dedicated to simulate the livestock sector " + + "(0 to deactivated the sector).") + p.add_argument('--point_sources_processors', required=False, type=int, default=0, + help="Number of processors dedicated to simulate the point sources sector " + + "(0 to deactivated the sector).") + p.add_argument('--recreational_boats_processors', required=False, type=int, default=0, + help="Number of processors dedicated to simulate the recreational boats sector " + + "(0 to deactivated the sector).") + p.add_argument('--shipping_port_processors', required=False, type=int, default=0, + help="Number of processors dedicated to simulate the shipping port sector " + + "(0 to deactivated the sector).") + p.add_argument('--residential_processors', required=False, type=int, default=0, + help="Number of processors dedicated to simulate the residential combustion sector " + + "(0 to deactivated the sector).") + p.add_argument('--livestock_processors', required=False, type=int, default=0, + help="Number of processors dedicated to simulate the livestock sector " + + "(0 to deactivated the sector).") + p.add_argument('--crop_operations_processors', required=False, type=int, default=0, + help="Number of processors dedicated to simulate the agricultural crop operations sector " + + "(0 to deactivated the sector).") + p.add_argument('--crop_fertilizers_processors', required=False, type=int, default=0, + help="Number of processors dedicated to simulate the agricultural crop fertilizers sector " + + "(0 to deactivated the sector).") + p.add_argument('--agricultural_machinery_processors', required=False, type=int, default=0, + help="Number of processors dedicated to simulate the agricultural machinery sector " + + "(0 to deactivated the sector).") + p.add_argument('--solvents_processors', required=False, type=int, default=0, + help="Number of processors dedicated to simulate the solvents sector " + + "(0 to deactivated the sector).") + + p.add_argument('--speciation_map', required=False, + help="Defines the path to the file that contains the mapping between input and output " + + "pollutant species") + p.add_argument('--molecular_weights', required=True, + help='Path to the file that contains the molecular weights of the input pollutants.') # ===== SHAPEFILES ===== - p.add_argument('--nut_shapefile_prov', required=False, type=str, default='True') - p.add_argument('--nut_shapefile_ccaa', required=False, type=str, default='True') + p.add_argument('--nuts3_shapefile', required=False, type=str, default='True', + help="Defines the path to the shapefile with the NUTS2 administrative boundaries. Used in " + + "livestock, agricultural machinery, residential combustion and traffic area sector.") + p.add_argument('--nuts2_shapefile', required=False, type=str, default='True', + help="Defines the path to the shapefile with the NUTS3 administrative boundaries. Used in " + + "agricultural crop operations, agricultural crop fertilizers, agricultural machinery, " + + "residential combustion and solvents sector.") + p.add_argument('--population_density_map', required=False, + help="Defines the path to the GHS population density raster file. Used in residential " + + "combustion, traffic area and solvents sectors.") + p.add_argument('--population_type_map', required=False, + help="Defines the path to the GHS population type raster file.") + p.add_argument('--population_type_nuts2', required=False, + help="Defines the path to the CSV file that contains the total amount of urban and rural " + + "population registered at NUTS2 level (based on the GHS dataset).") + p.add_argument('--population_type_nuts3', required=False, + help="Defines the path to the CSV file that contains the total amount of urban and rural " + + "population registered at NUTS3 level (based on the GHS dataset).") + p.add_argument('--population_nuts2', required=False, type=str, default='True', + help="Defines the path to the CSV file that contains the total amount of population " + + "registered at NUTS2 level") + p.add_argument('--land_uses_path', required=False, + help='Defines the path to the CORINE Land Cover land use raster file') + p.add_argument('--land_uses_nuts2_path', required=False, + help="Defines the path to the CSV file that contains the total amount of each CLC " + + "land use area by NUTS2") p.add_argument('--clipping', required=False, type=str, default=None, - help='To clip the domain into an specific zone. ' + - 'It can be a shapefile path, a list of points to make a polygon or nothing to use ' + - 'the default clip: domain extension') + help="To clip the domain into an specific zone. It can be a shapefile path, a list of points " + + "to make a polygon or nothing to use the default clip: domain extension") # ===== METEO PATHS ===== - p.add_argument('--temperature_hourly_files_path', required=False, type=str, default='True') - p.add_argument('--temperature_daily_files_path', required=False, type=str, default='True') - p.add_argument('--wind_speed_daily_files_path', required=False, type=str, default='True') - p.add_argument('--precipitation_files_path', required=False, type=str, default='True') - p.add_argument('--temperature_4d_dir', required=False, type=str, default='True') - p.add_argument('--temperature_sfc_dir', required=False, type=str, default='True') - p.add_argument('--u_wind_speed_4d_dir', required=False, type=str, default='True') - p.add_argument('--v_wind_speed_4d_dir', required=False, type=str, default='True') - p.add_argument('--u10_wind_speed_dir', required=False, type=str, default='True') - p.add_argument('--v10_wind_speed_dir', required=False, type=str, default='True') - p.add_argument('--friction_velocity_dir', required=False, type=str, default='True') - p.add_argument('--pblh_dir', required=False, type=str, default='True') - p.add_argument('--obukhov_length_dir', required=False, type=str, default='True') - p.add_argument('--layer_thickness_dir', required=False, type=str, default='True') + p.add_argument('--temperature_hourly_files_path', required=False, type=str, default='True', + help="Defines the path to the NetCDF files containing hourly mean 2m temperature data.") + p.add_argument('--temperature_daily_files_path', required=False, type=str, default='True', + help="Defines the path to the NetCDF files containing daily mean 2m temperature data.") + p.add_argument('--wind_speed_daily_files_path', required=False, type=str, default='True', + help="Defines the path to the NetCDF files containing daily mean 10m wind speed data.") + p.add_argument('--precipitation_files_path', required=False, type=str, default='True', + help="Defines the path to the NetCDF files containing hourly mean precipitation data.") + p.add_argument('--temperature_4d_dir', required=False, type=str, default='True', + help="Defines the path to the NetCDF files containing hourly mean 4D temperature data.") + p.add_argument('--temperature_sfc_dir', required=False, type=str, default='True', + help="Defines the path to the NetCDF files containing hourly mean surface temperature data.") + p.add_argument('--u_wind_speed_4d_dir', required=False, type=str, default='True', + help="Defines the path to the NetCDF files containing hourly mean 4D U wind component data.") + p.add_argument('--v_wind_speed_4d_dir', required=False, type=str, default='True', + help="Defines the path to the NetCDF files containing hourly mean 4D V wind component data.") + p.add_argument('--u10_wind_speed_dir', required=False, type=str, default='True', + help="Defines the path to the NetCDF files containing daily mean 10m U wind component data.") + p.add_argument('--v10_wind_speed_dir', required=False, type=str, default='True', + help="Defines the path to the NetCDF files containing daily mean 10m V wind component data.") + p.add_argument('--friction_velocity_dir', required=False, type=str, default='True', + help="Defines the path to the NetCDF files containing hourly mean 4D friction velocity data.") + p.add_argument('--pblh_dir', required=False, type=str, default='True', + help="Defines the path to the NetCDF files containing hourly mean PBL height data.") + p.add_argument('--obukhov_length_dir', required=False, type=str, default='True', + help="Defines the path to the NetCDF files containing hourly mean Obukhov length data.") + p.add_argument('--layer_thickness_dir', required=False, type=str, default='True', + help="Defines the path to the NetCDF files containing hourly mean 4D layer thickness data.") # ***** AVIATION SECTOR ***** - p.add_argument('--aviation_source_pollutants', required=False, help='...') - p.add_argument('--airport_list', required=False, help='...') - p.add_argument('--plane_list', required=False, help='...') - p.add_argument('--airport_shapefile_path', required=False, help='...') - p.add_argument('--airport_runways_shapefile_path', required=False, help='...') - p.add_argument('--airport_runways_corners_shapefile_path', required=False, help='...') - p.add_argument('--airport_trajectories_shapefile_path', required=False, help='...') - p.add_argument('--airport_operations_path', required=False, help='...') - p.add_argument('--planes_path', required=False, help='...') - p.add_argument('--airport_times_path', required=False, help='...') - p.add_argument('--airport_ef_dir', required=False, help='...') - p.add_argument('--aviation_weekly_profiles', required=False, help='...') - p.add_argument('--aviation_hourly_profiles', required=False, help='...') - p.add_argument('--aviation_speciation_profiles', required=False, help='...') + p.add_argument('--aviation_source_pollutants', required=False, + help="List of pollutants considered for the calculation of the aviation sector.") + p.add_argument('--airport_list', required=False, + help="Defines the list of airport codes to be considered for the calculation of the sector. " + + "By default, all the airports located within the working domain will be considered.") + p.add_argument('--plane_list', required=False, + help="List of plane categories to be considered for the calculation of the sector. " + + "By default, all the plane categories are considered.") + p.add_argument('--airport_shapefile_path', required=False, + help="Defines the path to the polygon shapefile with the airport infrastructure boundaries.") + p.add_argument('--airport_runways_shapefile_path', required=False, + help="Defines the path to the polyline shapefile with the airport runways.") + p.add_argument('--airport_runways_corners_shapefile_path', required=False, + help="Defines the path to the multipoint shapefile with the airport runway’s corners.") + p.add_argument('--airport_trajectories_shapefile_path', required=False, + help="Defines the path to the polyline shapefile with the airport’s air trajectories.") + p.add_argument('--airport_operations_path', required=False, + help="Defines the path to the CSV file that contains the number of monthly operations " + + "(arrival, departure) per airport and plane.") + p.add_argument('--planes_path', required=False, + help="Defines the path to the CSV file that contains the description of the planes.") + p.add_argument('--airport_times_path', required=False, + help="Defines the path to the CSV file that contains the times associates to each LTO phase " + + "per airport and plane.") + p.add_argument('--airport_ef_dir', required=False, + help="Defines the path to the CSV files that contain the emission factors for each plane and " + + "LTO phase.") + p.add_argument('--aviation_weekly_profiles', required=False, + help="Defines the path to the CSV file that contains the weekly temporal profiles per airport.") + p.add_argument('--aviation_hourly_profiles', required=False, + help="Defines the path to the CSV file that contains the hourly temporal profiles per airport.") + p.add_argument('--aviation_speciation_profiles', required=False, + help="Defines the path to the CSV file that contains the speciation profiles.") # ***** SHIPPING PORT SECTOR ***** - p.add_argument('--shipping_port_source_pollutants', required=False, help='...') - p.add_argument('--vessel_list', required=False, help='...') - p.add_argument('--port_list', required=False, help='...') - p.add_argument('--hoteling_shapefile_path', required=False, help='...') - p.add_argument('--maneuvering_shapefile_path', required=False, help='...') - p.add_argument('--shipping_port_ef_path', required=False, help='...') - p.add_argument('--shipping_port_engine_percent_path', required=False, help='...') - p.add_argument('--shipping_port_tonnage_path', required=False, help='...') - p.add_argument('--shipping_port_load_factor_path', required=False, help='...') - p.add_argument('--shipping_port_power_path', required=False, help='...') - p.add_argument('--shipping_port_monthly_profiles', required=False, help='...') - p.add_argument('--shipping_port_weekly_profiles', required=False, help='...') - p.add_argument('--shipping_port_hourly_profiles', required=False, help='...') - p.add_argument('--shipping_port_speciation_profiles', required=False, help='...') + p.add_argument('--shipping_port_source_pollutants', required=False, + help="List of pollutants considered for the calculation of the shipping port sector.") + p.add_argument('--vessel_list', required=False, + help="Defines the list of vessel categories to be considered for the emission calculation.") + p.add_argument('--port_list', required=False, + help="Defines the list of ports to be considered for the emission calculation. ") + p.add_argument('--hoteling_shapefile_path', required=False, + help="Defines the path to the multipolygon shapefile with the hotelling areas.") + p.add_argument('--maneuvering_shapefile_path', required=False, + help="Defines the path to the multipolygon shapefile with the maneuvering areas.") + p.add_argument('--shipping_port_ef_path', required=False, + help="Defines the path to the CSV file that contains the emission factors for each main and " + + "auxiliary engine class and fuel type.") + p.add_argument('--shipping_port_engine_percent_path', required=False, + help="Defines the path to the CSV file that contains the engine class and fuel type split " + + "factors for each vessel category.") + p.add_argument('--shipping_port_tonnage_path', required=False, + help="Defines the path to the CSV file that contains the number of annual operations and mean " + + "Gross Tonnage value per port and vessel category.") + p.add_argument('--shipping_port_load_factor_path', required=False, + help="Defines the path to the CSV file that contains the average load factor and time spent " + + "for each vessel, engine and operation.") + p.add_argument('--shipping_port_power_path', required=False, + help="Defines the path to the CSV file that contains the parameters for the main engine power " + + "calculation as a function of the Gross Tonnage and the average vessel's ratio of " + + "auxiliary engines/main engines.") + p.add_argument('--shipping_port_monthly_profiles', required=False, + help="Defines the path to the CSV file that contains the monthly temporal profiles per port " + + "and vessel category.") + p.add_argument('--shipping_port_weekly_profiles', required=False, + help="Defines the path to the CSV file that contains the weekly temporal profiles.") + p.add_argument('--shipping_port_hourly_profiles', required=False, + help="Defines the path to the CSV file that contains the hourly temporal profiles per airport.") + p.add_argument('--shipping_port_speciation_profiles', required=False, + help="Defines the path to the CSV file that contains the speciation profiles.") # ***** LIVESTOCK SECTOR ***** - p.add_argument('--livestock_source_pollutants', required=False, help='...') - p.add_argument('--animal_list', required=False, help='...') - p.add_argument('--gridded_livestock', required=False, help='...') - p.add_argument('--correction_split_factors', required=False, help='...') - p.add_argument('--denominator_yearly_factor_dir', required=False, help='...') - p.add_argument('--livestock_ef_files_dir', required=False, help='...') - p.add_argument('--livestock_monthly_profiles', required=False, help='...') - p.add_argument('--livestock_weekly_profiles', required=False, help='...') - p.add_argument('--livestock_hourly_profiles', required=False, help='...') - p.add_argument('--livestock_speciation_profiles', required=False, help='...') + p.add_argument('--livestock_source_pollutants', required=False, + help="List of pollutants considered for the calculation of the livestock sector.") + p.add_argument('--animal_list', required=False, + help="Defines the list of livestock categories [cattle, chicken, goats, pigs or sheep] to be " + + "considered for the emission calculation.") + p.add_argument('--gridded_livestock', required=False, + help="Defines the path to the GLWv3 livestock population density raster files. The string " + + " is automatically replaced by the different livestock categories considered " + + "for the calculation.") + p.add_argument('--correction_split_factors', required=False, + help="Defines the path to the CSV file that contains the livestock subgroup split factors " + + "and adjusting factors to match the official statistics provided at the NUTS3 level. " + + "The string is automatically replaced by the different livestock categories considered " + + "for the calculation.") + p.add_argument('--denominator_yearly_factor_dir', required=False, + help="Define the path to the NetCDF file that contains the yearly average daily factor per " + + "grid cell.") + p.add_argument('--livestock_ef_files_dir', required=False, + help="Defines the path to the CSV files that contain the emission factors for each pollutant." + + " Each pollutant has its own emission factor file format.") + p.add_argument('--livestock_monthly_profiles', required=False, + help="Defines the path to the CSV file that contains the monthly temporal profiles") + p.add_argument('--livestock_weekly_profiles', required=False, + help="Defines the path to the CSV file that contains the weekly temporal profiles") + p.add_argument('--livestock_hourly_profiles', required=False, + help="Defines the path to the CSV file that contains the hourly temporal profiles") + p.add_argument('--livestock_speciation_profiles', required=False, + help="Defines the path to the CSV file that contains the speciation profiles") # ***** AGRICULTURAL SECTOR***** - p.add_argument('--land_uses_path', required=False, help='...') - p.add_argument('--land_use_by_nut_path', required=False, help='...') - p.add_argument('--crop_by_nut_path', required=False, help='...') - p.add_argument('--crop_from_landuse_path', required=False, help='...') + p.add_argument('--crop_by_nut_path', required=False, + help="Defines the path to the CSV file that contains the annual cultivated area of each crop " + + "by NUTS2") + p.add_argument('--crop_from_landuse_path', required=False, + help="Defines the path to the CSV file that contains the mapping between CLC land use " + + "categories and crop categories") # ***** CROP OPERATIONS SECTOR - p.add_argument('--crop_operations_source_pollutants', required=False, help='...') - p.add_argument('--crop_operations_list', required=False, help='...') - p.add_argument('--crop_operations_ef_files_dir', required=False, help='...') - p.add_argument('--crop_operations_monthly_profiles', required=False, help='...') - p.add_argument('--crop_operations_weekly_profiles', required=False, help='...') - p.add_argument('--crop_operations_hourly_profiles', required=False, help='...') - p.add_argument('--crop_operations_speciation_profiles', required=False, help='...') + p.add_argument('--crop_operations_source_pollutants', required=False, + help="List of pollutants considered for the calculation of the agricultural crop operations " + + "sector.") + p.add_argument('--crop_operations_list', required=False, + help="List of crop categories considered for the calculation of the sector " + + "[wheat, rye, barley, oats].") + p.add_argument('--crop_operations_ef_files_dir', required=False, + help="Defines the path to the CSV file that contains the emission factors for each crop " + + "operations and crop type.") + p.add_argument('--crop_operations_monthly_profiles', required=False, + help="Defines the path to the CSV file that contains the monthly temporal profiles.") + p.add_argument('--crop_operations_weekly_profiles', required=False, + help="Defines the path to the CSV file that contains the weekly temporal profiles.") + p.add_argument('--crop_operations_hourly_profiles', required=False, + help="Defines the path to the CSV file that contains the hourly temporal profiles.") + p.add_argument('--crop_operations_speciation_profiles', required=False, + help="Defines the path to the CSV file that contains the speciation profiles") # ***** CROP FERTILIZERS SECTOR ***** - p.add_argument('--crop_fertilizers_source_pollutants', required=False, help='...') - p.add_argument('--crop_fertilizers_list', required=False, help='...') - p.add_argument('--cultivated_ratio', required=False, help='...') - p.add_argument('--fertilizers_rate', required=False, help='...') - p.add_argument('--crop_f_parameter', required=False, help='...') - p.add_argument('--crop_f_fertilizers', required=False, help='...') - p.add_argument('--gridded_ph', required=False, help='...') - p.add_argument('--gridded_cec', required=False, help='...') - p.add_argument('--fertilizers_denominator_yearly_factor_path', required=False, help='...') - p.add_argument('--crop_calendar', required=False, help='...') - p.add_argument('--crop_fertilizers_hourly_profiles', required=False, help='...') - p.add_argument('--crop_fertilizers_speciation_profiles', required=False, help='...') - p.add_argument('--crop_growing_degree_day_path', required=False, help='...') - - # ***** CROP MACHINERY SECTOR ***** - p.add_argument('--crop_machinery_source_pollutants', required=False, help='...') - p.add_argument('--crop_machinery_list', required=False, help='...') - p.add_argument('--machinery_list', required=False, help='...') - p.add_argument('--crop_machinery_deterioration_factor_path', required=False, help='...') - p.add_argument('--crop_machinery_load_factor_path', required=False, help='...') - p.add_argument('--crop_machinery_vehicle_ratio_path', required=False, help='...') - p.add_argument('--crop_machinery_vehicle_units_path', required=False, help='...') - p.add_argument('--crop_machinery_vehicle_workhours_path', required=False, help='...') - p.add_argument('--crop_machinery_vehicle_power_path', required=False, help='...') - p.add_argument('--crop_machinery_ef_path', required=False, help='...') - p.add_argument('--crop_machinery_monthly_profiles', required=False, help='...') - p.add_argument('--crop_machinery_weekly_profiles', required=False, help='...') - p.add_argument('--crop_machinery_hourly_profiles', required=False, help='...') - p.add_argument('--crop_machinery_speciation_map', required=False, help='...') - p.add_argument('--crop_machinery_speciation_profiles', required=False, help='...') - p.add_argument('--crop_machinery_by_nut', required=False, help='...') + p.add_argument('--crop_fertilizers_source_pollutants', required=False, + help="List of pollutants considered for the calculation of the agricultural crop fertilizers " + + "sector.") + p.add_argument('--crop_fertilizers_list', required=False, + help="List of crop categories considered for the calculation of the sector [alfalfa, almond, " + + "apple, apricot, barley, cherry, cotton, fig, grape, lemonlime, maize, melonetc, oats, " + + "olive, orange, pea, peachetc, pear, potato, rice, rye, sunflower, tangetc, tomato, " + + "triticale, vetch, watermelon, wheat].") + p.add_argument('--cultivated_ratio', required=False, + help="Defines the path to the CSV file that contains the ration of cultivated to fertilised " + + "area for crop category.") + p.add_argument('--fertilizers_rate', required=False, + help="Defines the path to the CSV file that contains the fertilizer application rate for " + + "crop category and NUTS2.") + p.add_argument('--crop_f_parameter', required=False, + help="Defines the path to the CSV file that contains: (i) the parameters for the calculation " + + "of the NH3 emission factor according to Bouwman and Boumans (2002) and (ii) the " + + "fraction of each fertilizer type used by NUTS2.") + p.add_argument('--crop_f_fertilizers', required=False, + help="Defines the path to the CSV file that contains the fertilizer type-related parameters " + + "for the calculation of the NH3 emission factor according to Bouwman and Boumans (2002).") + p.add_argument('--gridded_ph', required=False, + help="Defines the path to the ISRIC pH soil raster file.") + p.add_argument('--gridded_cec', required=False, + help="Defines the path to the ISRIC CEC soil raster file.") + p.add_argument('--fertilizers_denominator_yearly_factor_path', required=False, + help="Define the path to the NetCDF file that contains the yearly average daily factor per " + + "grid cell.") + p.add_argument('--crop_calendar', required=False, + help="Defines the path to the CSV file that contains the parameters needed to define the " + + "timing of the fertilizer application per crop category.") + p.add_argument('--crop_fertilizers_hourly_profiles', required=False, + help="Defines the path to the CSV file that contains the hourly temporal profiles.") + p.add_argument('--crop_fertilizers_speciation_profiles', required=False, + help="Defines the path to the CSV file that contains the speciation profiles.") + p.add_argument('--crop_growing_degree_day_path', required=False, + help="Define the path to the NetCDF file that contains the growing degree day valueper " + + "grid cell. The string is automatically replaced for \"winter\" or \"spring\" " + + "as a function of the crop_calendar file. The string is automatically replaced " + + "for the year of simulation.") + + # ***** AGRICULTURAL MACHINERY SECTOR ***** + p.add_argument('--crop_machinery_source_pollutants', required=False, + help="List of pollutants considered for the calculation of the agricultural machinery sector.") + p.add_argument('--crop_machinery_list', required=False, + help="List of crop categories considered for the calculation of the sector " + + "[barley, oats, rye, wheat].") + p.add_argument('--machinery_list', required=False, + help="List of agricultural equipment categories considered for the calculation of the sector " + + "[tractors, harvesters, rotavators].") + p.add_argument('--crop_machinery_deterioration_factor_path', required=False, + help="Defines the path to the CSV file that contains the deterioration factors per equipment " + + "category and pollutant.") + p.add_argument('--crop_machinery_load_factor_path', required=False, + help="Defines the path to the CSV file that contains the load factors per equipment category.") + p.add_argument('--crop_machinery_vehicle_ratio_path', required=False, + help="Defines the path to the CSV file that contains the equipment subgroup split factors by " + + "technology and NUTS3.") + p.add_argument('--crop_machinery_vehicle_units_path', required=False, + help="Defines the path to the CSV file that contains the total amount of equipment by " + + "category and NUTS3.") + p.add_argument('--crop_machinery_vehicle_workhours_path', required=False, + help="Defines the path to the CSV file that contains the number of hours that each equipment " + + "subgroup is used by NUTS3.") + p.add_argument('--crop_machinery_vehicle_power_path', required=False, + help="Defines the path to the CSV file that contains the engine nominal power associated to " + + "each equipment category by NUTS3.") + p.add_argument('--crop_machinery_ef_path', required=False, + help="Defines the path to the CSV file that contains the emission factors for each " + + "equipment subgroup.") + p.add_argument('--crop_machinery_monthly_profiles', required=False, + help="Defines the path to the CSV file that contains the monthly temporal profiles.") + p.add_argument('--crop_machinery_weekly_profiles', required=False, + help="Defines the path to the CSV file that contains the weekly temporal profiles.") + p.add_argument('--crop_machinery_hourly_profiles', required=False, + help="Defines the path to the CSV file that contains the hourly temporal profiles.") + p.add_argument('--crop_machinery_speciation_profiles', required=False, + help="Defines the path to the CSV file that contains the speciation profiles.") + p.add_argument('--crop_machinery_nuts3', required=False, + help="Defines the path to the CSV file that contains the annual cultivated area of each crop " + + "by NUTS3") # ***** RESIDENTIAL SECTOR ***** - p.add_argument('--fuel_list', required=False, help='...') - p.add_argument('--residential_source_pollutants', required=False, help='...') - p.add_argument('--population_density_map', required=False, help='...') - p.add_argument('--population_type_map', required=False, help='...') - p.add_argument('--population_type_by_ccaa', required=False, help='...') - p.add_argument('--population_type_by_prov', required=False, help='...') - p.add_argument('--energy_consumption_by_prov', required=False, help='...') - p.add_argument('--energy_consumption_by_ccaa', required=False, help='...') - p.add_argument('--residential_spatial_proxies', required=False, help='...') - p.add_argument('--residential_ef_files_path', required=False, help='...') - p.add_argument('--residential_heating_degree_day_path', required=False, help='...') - p.add_argument('--residential_hourly_profiles', required=False, help='...') - p.add_argument('--residential_speciation_profiles', required=False, help='...') + p.add_argument('--fuel_list', required=False, + help="List of fuel types considered for the calculation of the sector. (HD = heating diesel, " + + "LPG = liquefied petroleum gas, NG = natural gas; B = biomass, res = residential, " + + "com = commercial).") + p.add_argument('--residential_source_pollutants', required=False, + help="List of pollutants considered for the calculation of the residential/commercial sector.") + p.add_argument('--energy_consumption_nuts3', required=False, + help="Defines the path to the CSV file that contains the annual amount of energy consumed " + + "per fuel type and NUTS3.") + p.add_argument('--energy_consumption_nuts2', required=False, + help="Defines the path to the CSV file that contains the annual amount of energy consumed " + + "per fuel type and NUTS2.") + p.add_argument('--residential_spatial_proxies', required=False, + help="Defines the path to the CSV file that contains the type of population (urban, rural) " + + "assigned to each fuel for its spatial mapping.") + p.add_argument('--residential_ef_files_path', required=False, + help="Defines the path to the CSV file that contains the emission factors for each fuel type.") + p.add_argument('--residential_heating_degree_day_path', required=False, + help="Define the path to the NetCDF file that contains the yearly average HDD factor per " + + "grid cell.") + p.add_argument('--residential_hourly_profiles', required=False, + help="Defines the path to the CSV file that contains the hourly temporal profiles per " + + "fuel type.") + p.add_argument('--residential_speciation_profiles', required=False, + help="Defines the path to the CSV file that contains the speciation profiles.") # ***** RECREATIONAL BOATS SECTOR ***** - p.add_argument('--recreational_boats_source_pollutants', required=False, help='...') - p.add_argument('--recreational_boats_list', required=False, help='...') - p.add_argument('--recreational_boats_density_map', required=False, help='...') - p.add_argument('--recreational_boats_by_type', required=False, help='...') - p.add_argument('--recreational_boats_ef_path', required=False, help='...') - p.add_argument('--recreational_boats_monthly_profiles', required=False, help='...') - p.add_argument('--recreational_boats_weekly_profiles', required=False, help='...') - p.add_argument('--recreational_boats_hourly_profiles', required=False, help='...') - p.add_argument('--recreational_boats_speciation_profiles', required=False, help='...') + p.add_argument('--recreational_boats_source_pollutants', required=False, + help="List of pollutants considered for the calculation of the recreational boat sector.") + p.add_argument('--recreational_boats_list', required=False, + help="List of recreational boat category codes considered for the calculation of the sector " + + "[YB_001,YB_002,SB_001,SB_002,SP_001,SP_002,OB_001,OB_002,WS_001,WS_002,YB_003,SB_003," + + "SP_004,SP_005,OB_002,WS_003,MB_001,MB_002,MB_003,MB_004,MB_005,MB_006,MS_001,MS_002," + + "SB_004,SB_005]. A description of each category code is available here.") + p.add_argument('--recreational_boats_density_map', required=False, + help="Defines the path to the raster file used for performing the spatial distribution of " + + "the recreational boats.") + p.add_argument('--recreational_boats_by_type', required=False, + help="Defines the path to the CSV file that contains the number of recreational boats per " + + "category and associated information (load factor, annual working hours, nominal engine " + + "power).") + p.add_argument('--recreational_boats_ef_path', required=False, + help="Defines the path to the CSV file that contains the emission factors for each " + + "recreational boat category.") + p.add_argument('--recreational_boats_monthly_profiles', required=False, + help="Defines the path to the CSV file that contains the monthly temporal profiles.") + p.add_argument('--recreational_boats_weekly_profiles', required=False, + help="Defines the path to the CSV file that contains the weekly temporal profiles.") + p.add_argument('--recreational_boats_hourly_profiles', required=False, + help="Defines the path to the CSV file that contains the hourly temporal profiles.") + p.add_argument('--recreational_boats_speciation_profiles', required=False, + help="Defines the path to the CSV file that contains the speciation profiles.") # ***** POINT SOURCE SECTOR ***** - p.add_argument('--point_source_pollutants', required=False, help='...') - p.add_argument('--plume_rise', required=False, help='...') - p.add_argument('--point_source_snaps', required=False, help='...') - p.add_argument('--point_source_catalog', required=False, help='...') - p.add_argument('--point_source_monthly_profiles', required=False, help='...') - p.add_argument('--point_source_weekly_profiles', required=False, help='...') - p.add_argument('--point_source_hourly_profiles', required=False, help='...') - p.add_argument('--point_source_speciation_profiles', required=False, help='...') - p.add_argument('--point_source_measured_emissions', required=False, help='...') + p.add_argument('--point_source_pollutants', required=False, + help="List of pollutants considered for the calculation of the point sources sector.") + p.add_argument('--plume_rise', required=False, + help="Boolean that defines if the plume rise algorithm is activated or not.") + p.add_argument('--point_source_snaps', required=False, + help="Defines the SNAP source categories considered during the emission calculation " + + "[01, 03, 04, 09].") + p.add_argument('--point_source_catalog', required=False, + help="Defines the path to the CSV file that contains the description of each point source " + + "needed for the emission calculation (ID code, geographical location, activity and " + + "emission factors, physical stack parameters, temporal and speciation profile IDs)") + p.add_argument('--point_source_monthly_profiles', required=False, + help="Defines the path to the CSV file that contains the monthly temporal profiles.") + p.add_argument('--point_source_weekly_profiles', required=False, + help="Defines the path to the CSV file that contains the weekly temporal profiles.") + p.add_argument('--point_source_hourly_profiles', required=False, + help="Defines the path to the CSV file that contains the hourly temporal profiles.") + p.add_argument('--point_source_speciation_profiles', required=False, + help="Defines the path to the CSV file that contains the speciation profiles.") + p.add_argument('--point_source_measured_emissions', required=False, + help="Defines the path to the CSV file that contains hourly measured emissions for a specific " + + "point source. The string is automatically replaced by the point source facility " + + "which activity factor in the “point_source_catalog” file is assigned with a “-1” value.") # ***** TRAFFIC SECTOR ***** - p.add_argument('--do_hot', required=False, help='...') - p.add_argument('--do_cold', required=False, help='...') - p.add_argument('--do_tyre_wear', required=False, help='...') - p.add_argument('--do_brake_wear', required=False, help='...') - p.add_argument('--do_road_wear', required=False, help='...') - p.add_argument('--do_resuspension', required=False, help='...') - p.add_argument('--resuspension_correction', required=False, help='...') - p.add_argument('--write_rline', required=False, help='...') - - p.add_argument('--traffic_pollutants', required=False, help='...') - p.add_argument('--vehicle_types', required=False, help='...') - p.add_argument('--load', type=float, required=False, help='...') - p.add_argument('--road_link_path', required=False, help='...') - p.add_argument('--fleet_compo_path', required=False, help='...') - p.add_argument('--traffic_ef_path', required=False, help='...') - p.add_argument('--traffic_speed_hourly_path', required=False, help='...') - p.add_argument('--traffic_monthly_profiles', required=False, help='...') - p.add_argument('--traffic_weekly_profiles', required=False, help='...') - p.add_argument('--traffic_hourly_profiles_mean', required=False, help='...') - p.add_argument('--traffic_hourly_profiles_weekday', required=False, help='...') - p.add_argument('--traffic_hourly_profiles_saturday', required=False, help='...') - p.add_argument('--traffic_hourly_profiles_sunday', required=False, help='...') - p.add_argument('--traffic_speciation_profile_hot_cold', required=False, help='...') - p.add_argument('--traffic_speciation_profile_tyre', required=False, help='...') - p.add_argument('--traffic_speciation_profile_road', required=False, help='...') - p.add_argument('--traffic_speciation_profile_brake', required=False, help='...') - p.add_argument('--traffic_speciation_profile_resuspension', required=False, help='...') + p.add_argument('--do_hot', required=False, + help="Boolean to define if the exhaust hot emissions are considered (1) or dismissed (0) " + + "during the calculation process.") + p.add_argument('--do_cold', required=False, + help="Boolean to define if the exhaust cold-start emissions are considered (1) or dismissed " + + "(0) during the calculation process.") + p.add_argument('--do_tyre_wear', required=False, + help="Boolean to define if the tyre wear emissions are considered (1) or dismissed (0) " + + "during the calculation process.") + p.add_argument('--do_brake_wear', required=False, + help="Boolean to define if the brake wear emissions are considered (1) or dismissed (0) " + + "during the calculation process.") + p.add_argument('--do_road_wear', required=False, + help="Boolean to define if the road wear emissions are considered (1) or dismissed (0) " + + "during the calculation process.") + p.add_argument('--do_resuspension', required=False, + help="Boolean to define if the resuspension emissions are considered (1) or dismissed (0) " + + "during the calculation process.") + p.add_argument('--resuspension_correction', required=False, + help="Boolean to define if the effect of precipitation on resuspension emissions is " + + "considered (1) or dismissed (0) during the calculation process.") + p.add_argument('--write_rline', required=False, + help="Boolean to define if the emission output is written following the conventions and " + + "requirements of the R-LINE model. If the R-LINE option is activated, the user need to " + + "provide the R-LINE road link shapefile.") + + p.add_argument('--traffic_pollutants', required=False, + help="List of pollutants considered for the calculation of the traffic sector.") + p.add_argument('--vehicle_types', required=False, + help="Defines the list of vehicle categories to be considered for the emission calculation.") + p.add_argument('--load', type=float, required=False, + help="Defines the load percentage correction applicable to heavy duty vehicles and buses " + + "[0.0, 0.5 or 1.0].") + p.add_argument('--road_link_path', required=False, + help="Defines the path to the shapefile with the road network and associated traffic flow " + + "information.") + p.add_argument('--fleet_compo_path', required=False, + help="Defines the path to the CSV file that contains the vehicle fleet composition profiles.") + p.add_argument('--traffic_ef_path', required=False, + help="Defines the path to the CSV files that contain the emission factors. Emission factor " + + "CSV files need to be provided separately for each source and pollutant.") + p.add_argument('--traffic_speed_hourly_path', required=False, + help="Defines the path to the CSV files that contain the hourly temporal profiles for the " + + "average speed data.") + p.add_argument('--traffic_monthly_profiles', required=False, + help="Defines the path to the CSV file that contains the monthly temporal profiles.") + p.add_argument('--traffic_weekly_profiles', required=False, + help="Defines the path to the CSV file that contains the weekly temporal profiles.") + p.add_argument('--traffic_hourly_profiles_mean', required=False, + help="Defines the path to the CSV file that contains the hourly profiles file.") + p.add_argument('--traffic_hourly_profiles_weekday', required=False, + help="Defines the path to the CSV file that contains the weekday hourly temporal profiles.") + p.add_argument('--traffic_hourly_profiles_saturday', required=False, + help="Defines the path to the CSV file that contains the Saturday-type hourly temporal " + + "profiles.") + p.add_argument('--traffic_hourly_profiles_sunday', required=False, + help="Defines the path to the CSV file that contains the Sunday-type hourly temporal profiles.") + p.add_argument('--traffic_speciation_profile_hot_cold', required=False, + help="Defines the path to the CSV file that contains the speciation profiles for the hot " + + "and cold-start emissions.") + p.add_argument('--traffic_speciation_profile_tyre', required=False, + help="Defines the path to the CSV file that contains the speciation profiles for the tyre " + + "wear emissions.") + p.add_argument('--traffic_speciation_profile_road', required=False, + help="Defines the path to the CSV file that contains the speciation profiles for the " + + "road wear emissions.") + p.add_argument('--traffic_speciation_profile_brake', required=False, + help="Defines the path to the CSV file that contains the speciation profiles for the " + + "brake wear emissions.") + p.add_argument('--traffic_speciation_profile_resuspension', required=False, + help="Defines the path to the CSV file that contains the speciation profiles for the " + + "resuspension emissions.") # ***** TRAFFIC AREA SECTOR ***** - p.add_argument('--traffic_area_pollutants', required=False, help='...') - p.add_argument('--do_evaporative', required=False, help='...') - p.add_argument('--traffic_area_gas_path', required=False, help='...') - p.add_argument('--popullation_by_municipality', required=False, help='...') - p.add_argument('--traffic_area_speciation_profiles_evaporative', required=False, help='...') - p.add_argument('--traffic_area_evaporative_ef_file', required=False, help='...') - p.add_argument('--do_small_cities', required=False, help='...') - p.add_argument('--traffic_area_small_cities_path', required=False, help='...') - p.add_argument('--traffic_area_speciation_profiles_small_cities', required=False, help='...') - p.add_argument('--traffic_area_small_cities_ef_file', required=False, help='...') - p.add_argument('--small_cities_hourly_profile', required=False, help='...') - p.add_argument('--small_cities_weekly_profile', required=False, help='...') - p.add_argument('--small_cities_monthly_profile', required=False, help='...') + p.add_argument('--traffic_area_pollutants', required=False, + help="List of pollutants considered for the calculation of the traffic area sector.") + p.add_argument('--do_evaporative', required=False, + help="Boolean to define if the gasoline evaporative emissions are considered (1) or " + + "dismissed (0) during the calculation process.") + p.add_argument('--traffic_area_gas_path', required=False, + help="Defines the path to the CSV file that contains the total amount of gasoline vehicles " + + "registered per vehicle category and NUTS3.") + p.add_argument('--population_nuts3', required=False, + help="Defines the path to the CSV file that contains the total amount of urban and rural " + + "population registered at NUTS3 level.") + p.add_argument('--traffic_area_speciation_profiles_evaporative', required=False, + help="Defines the path to the CSV file that contains the speciation profiles.") + p.add_argument('--traffic_area_evaporative_ef_file', required=False, + help="Defines the path to the CSV file that contains the emission factors for each vehicle " + + "category and range of temperatures.") + + p.add_argument('--do_small_cities', required=False, + help="Boolean to define if the small city emissions are considered (1) or dismissed (0) " + + "during the calculation process.") + p.add_argument('--traffic_area_small_cities_path', required=False, + help="Defines the path to the multipolygon shapefile with the small cities.") + p.add_argument('--traffic_area_small_cities_ef_file', required=False, + help="Defines the path to the CSV file that contains the emission factors for the small cities.") + p.add_argument('--small_cities_monthly_profile', required=False, + help="Defines the path to the CSV file that contains the monthly temporal profiles for the " + + "small cities.") + p.add_argument('--small_cities_weekly_profile', required=False, + help="Defines the path to the CSV file that contains the weekly temporal profiles for the " + + "small cities.") + p.add_argument('--small_cities_hourly_profile', required=False, + help="Defines the path to the CSV file that contains the hourly temporal profiles for the " + + "small cities.") + p.add_argument('--traffic_area_speciation_profiles_small_cities', required=False, + help="Defines the path to the CSV file that contains the speciation profiles.") + + # ***** SOLVENTS SECTOR ***** + p.add_argument('--solvents_pollutants', required=False, + help="List of pollutants considered for the calculation of the solvents sector. " + + "Only 'nmvoc' is available.") + # TODO add description for solvents sector + p.add_argument('--solvents_proxies_path', required=False, + help="") + p.add_argument('--solvents_yearly_emissions_by_nut2_path', required=False, + help="") + p.add_argument('--solvents_point_sources_shapefile', required=False, + help="") + p.add_argument('--solvents_point_sources_weight_by_nut2_path', required=False, + help="") + p.add_argument('--solvents_monthly_profile', required=False, + help="Defines the path to the CSV file that contains the monthly temporal profiles.") + p.add_argument('--solvents_weekly_profile', required=False, + help="Defines the path to the CSV file that contains the weekly temporal profiles.") + p.add_argument('--solvents_hourly_profile', required=False, + help="Defines the path to the CSV file that contains the hourly temporal profiles.") + p.add_argument('--solvents_speciation_profiles', required=False, + help="Defines the path to the CSV file that contains the speciation profiles.") arguments = p.parse_args() @@ -384,21 +688,22 @@ class Config(ArgParser): item, arguments.inc_x, arguments.inc_y)) arguments.emission_summary = self._parse_bool(arguments.emission_summary) - arguments.start_date = self._parse_start_date(arguments.start_date) + arguments.start_date = self._parse_start_date(arguments.start_date, self.new_date) arguments.end_date = self._parse_end_date(arguments.end_date, arguments.start_date) arguments.output_name = self.get_output_name(arguments) + arguments.first_time = self._parse_bool(arguments.first_time) arguments.erase_auxiliary_files = self._parse_bool(arguments.erase_auxiliary_files) self.create_dir(arguments.output_dir) if arguments.erase_auxiliary_files: if os.path.exists(arguments.auxiliary_files_path): - comm = MPI.COMM_WORLD - if comm.Get_rank() == 0: + if self.comm.Get_rank() == 0: rmtree(arguments.auxiliary_files_path) - comm.Barrier() + self.comm.Barrier() self.create_dir(arguments.auxiliary_files_path) + # Booleans arguments.do_traffic = arguments.traffic_processors > 0 arguments.do_traffic_area = arguments.traffic_area_processors > 0 arguments.do_aviation = arguments.aviation_processors > 0 @@ -410,6 +715,7 @@ class Config(ArgParser): arguments.do_crop_operations = arguments.crop_operations_processors > 0 arguments.do_crop_fertilizers = arguments.crop_fertilizers_processors > 0 arguments.do_agricultural_machinery = arguments.agricultural_machinery_processors > 0 + arguments.do_solvents = arguments.solvents_processors > 0 # Aviation lists arguments.airport_list = self._parse_list(arguments.airport_list) @@ -475,6 +781,9 @@ class Config(ArgParser): # Traffic area lists arguments.traffic_area_pollutants = self._parse_list(arguments.traffic_area_pollutants) + # Solvents lists + arguments.solvents_pollutants = self._parse_list(arguments.solvents_pollutants) + return arguments @staticmethod @@ -494,8 +803,7 @@ class Config(ArgParser): full_path = os.path.join(arguments.output_dir, file_name) return full_path - @staticmethod - def create_dir(path): + def create_dir(self, path): """ Create the given folder if it is not created yet. @@ -504,8 +812,7 @@ class Config(ArgParser): """ import os from mpi4py import MPI - icomm = MPI.COMM_WORLD - comm = icomm.Split(color=0, key=0) + comm = self.comm.Split(color=0, key=0) rank = comm.Get_rank() if rank == 0: @@ -534,13 +841,12 @@ class Config(ArgParser): elif str_bool in false_options: return False else: - print 'WARNING: Boolean value not contemplated use {0} for True values and {1} for the False ones'.format( - true_options, false_options - ) - print '/t Using False as default' + print('WARNING: Boolean value not contemplated use {0} for True values and {1} for the False ones'.format( + true_options, false_options)) + print('/t Using False as default') return False - def _parse_start_date(self, str_date): + def _parse_start_date(self, str_date, new_date=None): """ Parse the date form string to datetime. It accepts several ways to introduce the date: @@ -555,8 +861,8 @@ class Config(ArgParser): """ from datetime import datetime - if self.new_date is not None: - return self.new_date + if new_date is not None: + return new_date format_types = ['%Y%m%d', '%Y%m%d%H', '%Y%m%d.%H', '%Y/%m/%d_%H:%M:%S', '%Y-%m-%d_%H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S', '%Y/%m/%d_%H', '%Y-%m-%d_%H', '%Y/%m/%d'] @@ -566,11 +872,11 @@ class Config(ArgParser): date = datetime.strptime(str_date, date_format) break except ValueError as e: - if e.message == 'day is out of range for month': - raise ValueError(e) + if str(e) == 'day is out of range for month': + error_exit(e) if date is None: - raise ValueError("Date format '{0}' not contemplated. Use one of this: {1}".format(str_date, format_types)) + error_exit("Date format '{0}' not contemplated. Use one of this: {1}".format(str_date, format_types)) return date diff --git a/hermesv3_bu/grids/grid.py b/hermesv3_bu/grids/grid.py index 4269f20c660ab71f7842a0ea200d0ea5d62c3b6b..6dea0820d92fb62830e3c2c3ce2639456db2975f 100755 --- a/hermesv3_bu/grids/grid.py +++ b/hermesv3_bu/grids/grid.py @@ -5,6 +5,7 @@ import timeit import numpy as np from hermesv3_bu.logger.log import Log +from hermesv3_bu.tools.checker import error_exit def select_grid(comm, logger, arguments): @@ -27,44 +28,45 @@ def select_grid(comm, logger, arguments): if arguments.domain_type == 'regular': from hermesv3_bu.grids.grid_latlon import LatLonGrid grid = LatLonGrid( - comm, logger, arguments.auxiliary_files_path, arguments.output_timestep_num, + logger, arguments.auxiliary_files_path, arguments.output_timestep_num, arguments.vertical_description, arguments.inc_lat, arguments.inc_lon, arguments.lat_orig, arguments.lon_orig, arguments.n_lat, arguments.n_lon) elif arguments.domain_type == 'lcc': from hermesv3_bu.grids.grid_lcc import LccGrid grid = LccGrid( - comm, logger, arguments.auxiliary_files_path, arguments.output_timestep_num, + logger, arguments.auxiliary_files_path, arguments.output_timestep_num, arguments.vertical_description, arguments.lat_1, arguments.lat_2, arguments.lon_0, arguments.lat_0, arguments.nx, arguments.ny, arguments.inc_x, arguments.inc_y, arguments.x_0, arguments.y_0) elif arguments.domain_type == 'rotated': from hermesv3_bu.grids.grid_rotated import RotatedGrid grid = RotatedGrid( - comm, logger, arguments.auxiliary_files_path, arguments.output_timestep_num, + logger, arguments.auxiliary_files_path, arguments.output_timestep_num, arguments.vertical_description, arguments.centre_lat, arguments.centre_lon, arguments.west_boundary, arguments.south_boundary, arguments.inc_rlat, arguments.inc_rlon) elif arguments.domain_type == 'mercator': from hermesv3_bu.grids.grid_mercator import MercatorGrid grid = MercatorGrid( - comm, logger, arguments.auxiliary_files_path, arguments.output_timestep_num, + logger, arguments.auxiliary_files_path, arguments.output_timestep_num, arguments.vertical_description, arguments.lat_ts, arguments.lon_0, arguments.nx, arguments.ny, arguments.inc_x, arguments.inc_y, arguments.x_0, arguments.y_0) else: - raise NameError('Unknown grid type {0}'.format(arguments.domain_type)) + error_exit('Unknown grid type {0}'.format(arguments.domain_type)) else: grid = None grid = comm.bcast(grid, root=0) + logger.write_time_log('Grid', 'select_grid', timeit.default_timer() - spent_time) return grid class Grid(object): - def __init__(self, comm, logger, attributes, auxiliary_path, vertical_description_path): + def __init__(self, logger, attributes, auxiliary_path, vertical_description_path): """ Initialise the Grid class @@ -81,7 +83,6 @@ class Grid(object): :type vertical_description_path: str """ spent_time = timeit.default_timer() - self.comm = comm self.logger = logger self.logger.write_log('\tGrid specifications: {0}'.format(attributes), 3) self.attributes = attributes @@ -169,7 +170,7 @@ class Grid(object): else: bound_coords = np.dstack((coords_left, coords_right, coords_right, coords_left)) else: - raise ValueError('ERROR: The number of vertices of the boundaries must be 2 or 4.') + error_exit('The number of vertices of the boundaries must be 2 or 4.') self.logger.write_time_log('Grid', 'create_bounds', timeit.default_timer() - spent_time, 3) return bound_coords @@ -181,7 +182,6 @@ class Grid(object): :rtype: GeoDataFrame """ import geopandas as gpd - import pandas as pd from shapely.geometry import Polygon spent_time = timeit.default_timer() @@ -217,49 +217,31 @@ class Grid(object): aux_b_lats = y.reshape((y.shape[0] * y.shape[1], y.shape[2])) aux_b_lons = x.reshape((x.shape[0] * x.shape[1], x.shape[2])) - + gdf = gpd.GeoDataFrame(index=range(aux_b_lons.shape[0]), crs={'init': 'epsg:4326'}) + gdf['geometry'] = None # Create one dataframe with 8 columns, 4 points with two coordinates each one - df_lats = pd.DataFrame(aux_b_lats, columns=['b_lat_1', 'b_lat_2', 'b_lat_3', 'b_lat_4']) - df_lons = pd.DataFrame(aux_b_lons, columns=['b_lon_1', 'b_lon_2', 'b_lon_3', 'b_lon_4']) - df = pd.concat([df_lats, df_lons], axis=1) - - # Substituate 8 columns by 4 with the two coordinates - df['p1'] = zip(df.b_lon_1, df.b_lat_1) - del df['b_lat_1'], df['b_lon_1'] - df['p2'] = zip(df.b_lon_2, df.b_lat_2) - del df['b_lat_2'], df['b_lon_2'] - df['p3'] = zip(df.b_lon_3, df.b_lat_3) - del df['b_lat_3'], df['b_lon_3'] - df['p4'] = zip(df.b_lon_4, df.b_lat_4) - del df['b_lat_4'], df['b_lon_4'] - - # Make a list of list of tuples - list_points = df.values - del df['p1'], df['p2'], df['p3'], df['p4'] - - # List of polygons from the list of points - geometry = [Polygon(list(points)) for points in list_points] - - gdf = gpd.GeoDataFrame(index=df.index, crs={'init': 'epsg:4326'}, geometry=geometry) - gdf = gdf.to_crs(self.attributes['crs']) + for i in range(aux_b_lons.shape[0]): + gdf.loc[i, 'geometry'] = Polygon([(aux_b_lons[i, 0], aux_b_lats[i, 0]), + (aux_b_lons[i, 1], aux_b_lats[i, 1]), + (aux_b_lons[i, 2], aux_b_lats[i, 2]), + (aux_b_lons[i, 3], aux_b_lats[i, 3]), + (aux_b_lons[i, 0], aux_b_lats[i, 0])]) + + gdf.to_crs(self.attributes['crs'], inplace=True) gdf['FID'] = gdf.index gdf.to_file(self.shapefile_path) else: gdf = gpd.read_file(self.shapefile_path) - # gdf.set_index('FID', inplace=True, drop=False) + gdf.set_index('FID', inplace=True) self.logger.write_time_log('Grid', 'create_shapefile', timeit.default_timer() - spent_time, 2) return gdf def add_cell_area(self): from cdo import Cdo - # spent_time = timeit.default_timer() - # Initialises the CDO cdo = Cdo() - cell_area = cdo.gridarea(input=self.netcdf_path, returnArray='cell_area') + cell_area = cdo.gridarea(input=input_file, returnArray='cell_area') self.shapefile['cell_area'] = cell_area.flatten() - - # self.logger.write_time_log('Grid', 'add_cell_area', timeit.default_timer() - spent_time) diff --git a/hermesv3_bu/grids/grid_latlon.py b/hermesv3_bu/grids/grid_latlon.py index 8b3def85528278cfdf111b300325e19d1d6706eb..d1c0513d8cf5b56a28c076ae877fbfb9f876c237 100755 --- a/hermesv3_bu/grids/grid_latlon.py +++ b/hermesv3_bu/grids/grid_latlon.py @@ -1,23 +1,5 @@ #!/usr/bin/env python -# Copyright 2018 Earth Sciences Department, BSC-CNS -# -# This file is part of HERMESv3_GR. -# -# HERMESv3_GR is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# HERMESv3_GR is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with HERMESv3_GR. If not, see . - - import os import timeit @@ -29,7 +11,7 @@ from hermesv3_bu.logger.log import Log class LatLonGrid(Grid): - def __init__(self, comm, logger, auxiliary_path, tstep_num, vertical_description_path, inc_lat, inc_lon, lat_orig, + def __init__(self, logger, auxiliary_path, tstep_num, vertical_description_path, inc_lat, inc_lon, lat_orig, lon_orig, n_lat, n_lon): """ Regional regular lat-lon grid object that contains all the information to do a global output. @@ -70,7 +52,7 @@ class LatLonGrid(Grid): attributes = {'inc_lat': inc_lat, 'inc_lon': inc_lon, 'lat_orig': lat_orig, 'lon_orig': lon_orig, 'n_lat': n_lat, 'n_lon': n_lon, 'crs': {'init': 'epsg:4326'}} # Initialize the class using parent - super(LatLonGrid, self).__init__(comm, logger, attributes, auxiliary_path, vertical_description_path) + super(LatLonGrid, self).__init__(logger, attributes, auxiliary_path, vertical_description_path) self.shape = (tstep_num, len(self.vertical_desctiption), n_lat, n_lon) diff --git a/hermesv3_bu/grids/grid_lcc.py b/hermesv3_bu/grids/grid_lcc.py index c5ae6a0ec583988c461209424f8db613a289f3b6..ee6fdeebefbc578a8ddc40b896655d57904ca4db 100755 --- a/hermesv3_bu/grids/grid_lcc.py +++ b/hermesv3_bu/grids/grid_lcc.py @@ -4,14 +4,14 @@ import os import timeit import numpy as np from pyproj import Proj -from grid import Grid +from hermesv3_bu.grids.grid import Grid from hermesv3_bu.logger.log import Log class LccGrid(Grid): - def __init__(self, comm, logger, auxiliary_path, tstep_num, vertical_description_path, lat_1, lat_2, lon_0, lat_0, + def __init__(self, logger, auxiliary_path, tstep_num, vertical_description_path, lat_1, lat_2, lon_0, lat_0, nx, ny, inc_x, inc_y, x_0, y_0, earth_radius=6370000.000): """ Lambert Conformal Conic (LCC) grid object that contains all the information to do a lcc output. @@ -77,7 +77,7 @@ class LccGrid(Grid): lat_1, lat_2, lat_0, lon_0, 0, 0) + "+datum=WGS84 +units=m"} # Initialises with parent class - super(LccGrid, self).__init__(comm, logger, attributes, auxiliary_path, vertical_description_path) + super(LccGrid, self).__init__(logger, attributes, auxiliary_path, vertical_description_path) self.shape = (tstep_num, len(self.vertical_desctiption), ny, nx) self.logger.write_time_log('LccGrid', '__init__', timeit.default_timer() - spent_time) diff --git a/hermesv3_bu/grids/grid_mercator.py b/hermesv3_bu/grids/grid_mercator.py index 2c57d6536464423b42a83a2016e022747be1fd73..24faf7985e0b7057c39c4c1ea37d7300e2736cc9 100755 --- a/hermesv3_bu/grids/grid_mercator.py +++ b/hermesv3_bu/grids/grid_mercator.py @@ -4,13 +4,13 @@ import os import timeit import numpy as np from pyproj import Proj -from grid import Grid +from hermesv3_bu.grids.grid import Grid from hermesv3_bu.logger.log import Log class MercatorGrid(Grid): - def __init__(self, comm, logger, auxiliary_path, tstep_num, vertical_description_path, lat_ts, lon_0, nx, ny, inc_x, + def __init__(self, logger, auxiliary_path, tstep_num, vertical_description_path, lat_ts, lon_0, nx, ny, inc_x, inc_y, x_0, y_0, earth_radius=6370000.000): """ Mercator grid object that contains all the information to do a mercator output. @@ -66,7 +66,7 @@ class MercatorGrid(Grid): self.y = None # Initialises with parent class - super(MercatorGrid, self).__init__(comm, logger, attributes, auxiliary_path, vertical_description_path) + super(MercatorGrid, self).__init__(logger, attributes, auxiliary_path, vertical_description_path) self.shape = (tstep_num, len(self.vertical_desctiption), ny, nx) self.logger.write_time_log('MercatorGrid', '__init__', timeit.default_timer() - spent_time, 3) diff --git a/hermesv3_bu/grids/grid_rotated.py b/hermesv3_bu/grids/grid_rotated.py index 3ddf526237621acfafdb02d31ed345ae8a417cfa..219570769a3dbd3dc6c1dedaa5bbe5c939bc8dc8 100755 --- a/hermesv3_bu/grids/grid_rotated.py +++ b/hermesv3_bu/grids/grid_rotated.py @@ -2,7 +2,7 @@ import os import timeit -from grid import Grid +from hermesv3_bu.grids.grid import Grid import numpy as np import math @@ -10,7 +10,7 @@ from hermesv3_bu.logger.log import Log class RotatedGrid(Grid): - def __init__(self, comm, logger, auxiliary_path, tstep_num, vertical_description_path, centre_lat, centre_lon, + def __init__(self, logger, auxiliary_path, tstep_num, vertical_description_path, centre_lat, centre_lon, west_boundary, south_boundary, inc_rlat, inc_rlon): """ @@ -41,9 +41,9 @@ class RotatedGrid(Grid): 'n_lon': int((abs(west_boundary) / inc_rlon) * 2 + 1), 'crs': {'init': 'epsg:4326'}} # Initialises with parent class - super(RotatedGrid, self).__init__(comm, logger, attributes, auxiliary_path, vertical_description_path) + super(RotatedGrid, self).__init__(logger, attributes, auxiliary_path, vertical_description_path) - self.shape = (tstep_num, len(self.vertical_desctiption), len(self.rlat), len(self.rlon)) + self.shape = (tstep_num, len(self.vertical_desctiption), attributes['n_lat'], attributes['n_lon']) self.logger.write_time_log('RotatedGrid', '__init__', timeit.default_timer() - spent_time, 3) def create_regular_rotated(self): @@ -134,7 +134,6 @@ class RotatedGrid(Grid): # sph = 1. # if sph < -1.: # sph = -1. - # print type(sph) sph[sph > 1.] = 1. sph[sph < -1.] = -1. diff --git a/hermesv3_bu/hermes.py b/hermesv3_bu/hermes.py index 8a8015ccccfd32e5c69b1064c41bf679d5ce6ada..7b678df1f1260183ddb5ceba381e09f80f625485 100755 --- a/hermesv3_bu/hermes.py +++ b/hermesv3_bu/hermes.py @@ -7,7 +7,7 @@ from mpi4py import MPI from datetime import timedelta from hermesv3_bu.config.config import Config -from hermesv3_bu.grids.grid import select_grid +from hermesv3_bu.grids.grid import select_grid, Grid from hermesv3_bu.clipping.clip import select_clip from hermesv3_bu.writer.writer import select_writer from hermesv3_bu.sectors.sector_manager import SectorManager @@ -18,27 +18,39 @@ class Hermes(object): """ Interface class for HERMESv3. """ - def __init__(self, config): + def __init__(self, config, comm=None): + """ + + :param config: Configuration file object + :type config: Config + + :param comm: Communicator + :type comm: MPI.Comm + """ self.initial_time = timeit.default_timer() - self.comm = MPI.COMM_WORLD + if comm is None: + comm = MPI.COMM_WORLD + self.comm = comm self.arguments = config.arguments - self.logger = Log(self.comm, self.arguments) + self.logger = Log(self.arguments) self.logger.write_log('====== Starting HERMESv3_BU simulation =====') self.grid = select_grid(self.comm, self.logger, self.arguments) self.clip = select_clip(self.comm, self.logger, self.arguments.auxiliary_files_path, self.arguments.clipping, self.grid) self.date_array = [self.arguments.start_date + timedelta(hours=hour) for hour in - xrange(self.arguments.output_timestep_num)] - self.logger.write_log('Dates to simulate: {0}'.format( - [aux_date.strftime("%Y/%m/%d, %H:%M:%S") for aux_date in self.date_array]), message_level=2) + range(self.arguments.output_timestep_num)] + + self.logger.write_log('Dates to simulate:', message_level=3) + for aux_date in self.date_array: + self.logger.write_log('\t{0}'.format(aux_date.strftime("%Y/%m/%d, %H:%M:%S")), message_level=3) self.sector_manager = SectorManager( self.comm, self.logger, self.grid, self.clip, self.date_array, self.arguments) self.writer = select_writer(self.logger, self.arguments, self.grid, self.date_array) - self.logger.write_time_log('Hermes', '__init__', timeit.default_timer() - self.initial_time) + self.logger.write_time_log('HERMES', '__init__', timeit.default_timer() - self.initial_time) def main(self): """ @@ -46,17 +58,20 @@ class Hermes(object): """ from datetime import timedelta - emis = self.sector_manager.run() - waiting_time = timeit.default_timer() - self.comm.Barrier() - self.logger.write_log('All emissions calculated!') - self.logger.write_time_log('Hermes', 'Waiting_to_write', timeit.default_timer() - waiting_time) + if self.arguments.first_time: + self.logger.write_log('***** HERMESv3_BU First Time finished successfully *****') + else: + emis = self.sector_manager.run() + waiting_time = timeit.default_timer() + self.comm.Barrier() + self.logger.write_log('All emissions calculated!') + self.logger.write_time_log('HERMES', 'Waiting_to_write', timeit.default_timer() - waiting_time) - self.writer.write(emis) - self.comm.Barrier() + self.writer.write(emis) + self.comm.Barrier() - self.logger.write_log('***** HERMES simulation finished succesful *****') - self.logger.write_time_log('Hermes', 'TOTAL', timeit.default_timer() - self.initial_time) + self.logger.write_log('***** HERMESv3_BU simulation finished successfully *****') + self.logger.write_time_log('HERMES', 'TOTAL', timeit.default_timer() - self.initial_time) self.logger.finish_logs() if self.arguments.start_date < self.arguments.end_date: @@ -65,8 +80,8 @@ class Hermes(object): return None -def run(): - date = Hermes(Config()).main() +def run(comm=None): + date = Hermes(Config(comm), comm).main() while date is not None: date = Hermes(Config(new_date=date)).main() sys.exit(0) diff --git a/hermesv3_bu/io_server/__init__.pyc b/hermesv3_bu/io_server/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6efa510f3322524dfef9eb7db349883b888d6cc7 Binary files /dev/null and b/hermesv3_bu/io_server/__init__.pyc differ diff --git a/hermesv3_bu/io_server/io_netcdf.py b/hermesv3_bu/io_server/io_netcdf.py index ad47f657bdd50119963e6acad5956f548782904d..ed44f4a42ef08af429eeadb28a0e2816b5b58269 100755 --- a/hermesv3_bu/io_server/io_netcdf.py +++ b/hermesv3_bu/io_server/io_netcdf.py @@ -4,15 +4,16 @@ import sys import os from mpi4py import MPI from datetime import timedelta -from hermesv3_bu.io_server.io_server import IoServer import numpy as np import geopandas as gpd from netCDF4 import Dataset from shapely.geometry import Point from cf_units import num2date, CALENDAR_STANDARD - from geopandas import GeoDataFrame +from hermesv3_bu.io_server.io_server import IoServer +from hermesv3_bu.tools.checker import check_files, error_exit + class IoNetcdf(IoServer): def __init__(self, comm): @@ -42,12 +43,19 @@ class IoNetcdf(IoServer): :return: GeoDataframe with the data in the desired points. :rtype: geopandas.GeoDataframe """ + check_files(netcdf_path) nc = Dataset(netcdf_path, mode='r') - lat_o = nc.variables['latitude'][:] - lon_o = nc.variables['longitude'][:] + try: + lat_o = nc.variables['latitude'][:] + lon_o = nc.variables['longitude'][:] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), netcdf_path)) if date_type == 'daily': - time = nc.variables['time'] + try: + time = nc.variables['time'] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), netcdf_path)) # From time array to list of dates. time_array = num2date(time[:], time.units, CALENDAR_STANDARD) time_array = np.array([aux.date() for aux in time_array]) @@ -70,7 +78,10 @@ class IoNetcdf(IoServer): del lat_o, lon_o # Reads the tas variable of the xone and the times needed. - var = nc.variables[var_name][i_time, j_min:j_max, i_min:i_max] + try: + var = nc.variables[var_name][i_time, j_min:j_max, i_min:i_max] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), netcdf_path)) nc.close() var_df = gpd.GeoDataFrame(var.flatten().T, columns=[var_name], crs={'init': 'epsg:4326'}, @@ -106,11 +117,15 @@ class IoNetcdf(IoServer): path = os.path.join(netcdf_dir, '{0}_{1}{2}.nc'.format(var_name, date_array[0].year, str(date_array[0].month).zfill(2))) # self.logger.write_log('Getting temperature from {0}'.format(path), message_level=2) - + check_files(path) nc = Dataset(path, mode='r') - lat_o = nc.variables['latitude'][:] - lon_o = nc.variables['longitude'][:] - time = nc.variables['time'] + try: + lat_o = nc.variables['latitude'][:] + lon_o = nc.variables['longitude'][:] + n_lat = len(lat_o) + time = nc.variables['time'] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), path)) # From time array to list of dates. time_array = num2date(time[:], time.units, CALENDAR_STANDARD) i_time = np.where(time_array == date_array[0])[0][0] @@ -129,10 +144,13 @@ class IoNetcdf(IoServer): # From 1D to 2D lat = np.array([lat_o[:]] * len(lon_o[:])).T.flatten() lon = np.array([lon_o[:]] * len(lat_o[:])).flatten() - del lat_o, lon_o + # del lat_o, lon_o # Reads the var variable of the xone and the times needed. - var = nc.variables[var_name][i_time:i_time + (len(date_array)), j_min:j_max, i_min:i_max] + try: + var = nc.variables[var_name][i_time:i_time + (len(date_array)), j_min:j_max, i_min:i_max] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), path)) nc.close() # That condition is fot the cases that the needed temperature is in a different NetCDF. @@ -141,9 +159,13 @@ class IoNetcdf(IoServer): path = os.path.join(netcdf_dir, '{0}_{1}{2}.nc'.format(var_name, aux_date.year, str(aux_date.month).zfill(2))) # self.logger.write_log('Getting {0} from {1}'.format(var_name, path), message_level=2) + check_files(path) nc = Dataset(path, mode='r') i_time = 0 - new_var = nc.variables[var_name][i_time:i_time + (len(date_array) - len(var)), j_min:j_max, i_min:i_max] + try: + new_var = nc.variables[var_name][i_time:i_time + (len(date_array) - len(var)), j_min:j_max, i_min:i_max] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), path)) var = np.concatenate([var, new_var]) @@ -152,7 +174,7 @@ class IoNetcdf(IoServer): var = var.reshape((var.shape[0], var.shape[1] * var.shape[2])) df = gpd.GeoDataFrame(var.T, geometry=[Point(xy) for xy in zip(lon, lat)]) # df.columns = ['t_{0}'.format(x) for x in df.columns.values[:-1]] + ['geometry'] - df.loc[:, 'REC'] = df.index + df.loc[:, 'REC'] = (((df.index // len(lon_o)) + j_min) * n_lat) + ((df.index % len(lon_o)) + i_min) return df @@ -229,7 +251,7 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l netcdf.createDimension('lat', center_latitudes.shape[0]) lat_dim = ('lon', 'lat', ) else: - print 'ERROR: Latitudes must be on a 1D or 2D array instead of {0}'.format(len(center_latitudes.shape)) + print('ERROR: Latitudes must be on a 1D or 2D array instead of {0}'.format(len(center_latitudes.shape))) sys.exit(1) # Longitude @@ -240,21 +262,21 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l netcdf.createDimension('lon', center_longitudes.shape[1]) lon_dim = ('lon', 'lat', ) else: - print 'ERROR: Longitudes must be on a 1D or 2D array instead of {0}'.format(len(center_longitudes.shape)) + print('ERROR: Longitudes must be on a 1D or 2D array instead of {0}'.format(len(center_longitudes.shape))) sys.exit(1) elif rotated: var_dim = ('rlat', 'rlon',) # Rotated Latitude if rotated_lats is None: - print 'ERROR: For rotated grids is needed the rotated latitudes.' + print('ERROR: For rotated grids is needed the rotated latitudes.') sys.exit(1) netcdf.createDimension('rlat', len(rotated_lats)) lat_dim = ('rlat', 'rlon',) # Rotated Longitude if rotated_lons is None: - print 'ERROR: For rotated grids is needed the rotated longitudes.' + print('ERROR: For rotated grids is needed the rotated longitudes.') sys.exit(1) netcdf.createDimension('rlon', len(rotated_lons)) lon_dim = ('rlat', 'rlon',) @@ -297,7 +319,6 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l else: time = netcdf.createVariable('time', 'd', ('time',), zlib=True) u = Unit('hours') - # print u.offset_by_time(encode_time(date.year, date.month, date.day, date.hour, date.minute, date.second)) # Unit('hour since 1970-01-01 00:00:00.0000000 UTC') time.units = str(u.offset_by_time(encode_time(date.year, date.month, date.day, date.hour, date.minute, date.second))) @@ -317,7 +338,6 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l if boundary_latitudes is not None: lats.bounds = "lat_bnds" lat_bnds = netcdf.createVariable('lat_bnds', 'f', lat_dim + ('nv',), zlib=True) - # print lat_bnds[:].shape, boundary_latitudes.shape lat_bnds[:] = boundary_latitudes # Longitude @@ -327,7 +347,6 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l lons.axis = "X" lons.long_name = "longitude coordinate" lons.standard_name = "longitude" - # print 'lons:', lons[:].shape, center_longitudes.shape lons[:] = center_longitudes if boundary_longitudes is not None: lons.bounds = "lon_bnds" @@ -375,7 +394,6 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l var = netcdf.createVariable('aux_var', 'f', ('time',) + var_dim, zlib=True) var[:] = 0 for variable in data_list: - # print ('time',) + var_dim var = netcdf.createVariable(variable['name'], 'f', ('time',) + var_dim, zlib=True) var.units = Unit(variable['units']).symbol if 'long_name' in variable: @@ -398,7 +416,7 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l try: var[:] = variable['data'] except ValueError: - print 'VAR ERROR, netcdf shape: {0}, variable shape: {1}'.format(var[:].shape, variable['data'].shape) + print('VAR ERROR, netcdf shape: {0}, variable shape: {1}'.format(var[:].shape, variable['data'].shape)) # Grid mapping if regular_latlon: @@ -433,7 +451,6 @@ def write_coords_netcdf(netcdf_path, center_latitudes, center_longitudes, data_l c_area.long_name = "area of the grid cell" c_area.standard_name = "cell_area" c_area.units = Unit("m2").symbol - # print c_area[:].shape, cell_area.shape c_area[:] = cell_area if global_attributes is not None: diff --git a/hermesv3_bu/io_server/io_netcdf.pyc b/hermesv3_bu/io_server/io_netcdf.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e088ae8421ce2fc21d00ee0941b233e9db0886d2 Binary files /dev/null and b/hermesv3_bu/io_server/io_netcdf.pyc differ diff --git a/hermesv3_bu/io_server/io_raster.py b/hermesv3_bu/io_server/io_raster.py index 73dc937a6b1b199c87ef2679efda14f8443708c9..de052025f2c5d77c38df32da44083c165f5900ae 100755 --- a/hermesv3_bu/io_server/io_raster.py +++ b/hermesv3_bu/io_server/io_raster.py @@ -1,19 +1,17 @@ #!/usr/bin/env python -import sys import os import timeit -from warnings import warn from mpi4py import MPI import rasterio from rasterio.mask import mask import geopandas as gpd -import pandas as pd import numpy as np from shapely.geometry import Polygon - from hermesv3_bu.io_server.io_server import IoServer +from hermesv3_bu.io_server.io_shapefile import IoShapefile +from hermesv3_bu.tools.checker import check_files, error_exit class IoRaster(IoServer): @@ -49,7 +47,7 @@ class IoRaster(IoServer): Function to parse features from GeoDataFrame in such a manner that rasterio wants them""" import json return [json.loads(gdf.to_json())['features'][0]['geometry']] - + check_files([raster_path, shape_path]) data = rasterio.open(raster_path) geo = gpd.read_file(shape_path) if len(geo) > 1: @@ -106,7 +104,7 @@ class IoRaster(IoServer): Function to parse features from GeoDataFrame in such a manner that rasterio wants them""" import json return [json.loads(gdf.to_json())['features'][0]['geometry']] - + check_files(raster_path) data = rasterio.open(raster_path) if len(geo) > 1: @@ -172,7 +170,7 @@ class IoRaster(IoServer): else: bound_coords = np.dstack((coords_left, coords_right, coords_right, coords_left)) else: - raise ValueError('ERROR: The number of vertices of the boundaries must be 2 or 4.') + error_exit('ERROR: The number of vertices of the boundaries must be 2 or 4.') # self.logger.write_time_log('IoRaster', 'create_bounds', timeit.default_timer() - spent_time, 3) return bound_coords @@ -198,6 +196,77 @@ class IoRaster(IoServer): return gdf + def to_shapefile_serie_by_cell(self, raster_path, out_path=None, write=False, crs=None, nodata=0): + """ + + :param raster_path: + :param out_path: + :param write: + :param crs: + :param nodata: + :return: + """ + + if out_path is None or not os.path.exists(out_path): + ds = rasterio.open(raster_path) + + grid_info = ds.transform + # TODO remove when new version will be installed + if rasterio.__version__ == '0.36.0': + lons = np.arange(ds.width) * grid_info[1] + grid_info[0] + lats = np.arange(ds.height) * grid_info[5] + grid_info[3] + elif rasterio.__version__ == '1.0.21': + lons = np.arange(ds.width) * grid_info[0] + grid_info[2] + lats = np.arange(ds.height) * grid_info[4] + grid_info[5] + else: + lons = np.arange(ds.width) * grid_info[0] + grid_info[2] + lats = np.arange(ds.height) * grid_info[4] + grid_info[5] + + # 1D to 2D + c_lats = np.array([lats] * len(lons)).T.flatten() + c_lons = np.array([lons] * len(lats)).flatten() + + del lons, lats + if rasterio.__version__ == '0.36.0': + b_lons = self.create_bounds(c_lons, grid_info[1], number_vertices=4) + grid_info[1] / 2 + b_lats = self.create_bounds(c_lats, grid_info[1], number_vertices=4, inverse=True) + grid_info[5] / 2 + elif rasterio.__version__ == '1.0.21': + b_lons = self.create_bounds(c_lons, grid_info[0], number_vertices=4) + grid_info[0] / 2 + b_lats = self.create_bounds(c_lats, grid_info[4], number_vertices=4, inverse=True) + grid_info[4] / 2 + else: + b_lons = self.create_bounds(c_lons, grid_info[0], number_vertices=4) + grid_info[0] / 2 + b_lats = self.create_bounds(c_lats, grid_info[4], number_vertices=4, inverse=True) + grid_info[4] / 2 + + b_lats = b_lats.reshape((b_lats.shape[1], b_lats.shape[2])) + b_lons = b_lons.reshape((b_lons.shape[1], b_lons.shape[2])) + + gdf = gpd.GeoDataFrame(ds.read(1).flatten(), columns=['data'], index=range(b_lons.shape[0]), crs=ds.crs) + gdf['geometry'] = None + + for i in range(b_lons.shape[0]): + gdf.loc[i, 'geometry'] = Polygon([(b_lons[i, 0], b_lats[i, 0]), + (b_lons[i, 1], b_lats[i, 1]), + (b_lons[i, 2], b_lats[i, 2]), + (b_lons[i, 3], b_lats[i, 3]), + (b_lons[i, 0], b_lats[i, 0])]) + + gdf['CELL_ID'] = gdf.index + + gdf = gdf[gdf['data'] != nodata] + + if crs is not None: + gdf = gdf.to_crs(crs) + + if write: + if not os.path.exists(os.path.dirname(out_path)): + os.makedirs(os.path.dirname(out_path)) + gdf.to_file(out_path) + + else: + gdf = gpd.read_file(out_path) + + return gdf + def to_shapefile_serie(self, raster_path, out_path=None, write=False, crs=None, nodata=0): """ @@ -214,6 +283,7 @@ class IoRaster(IoServer): from rasterio.features import shapes mask = None src = rasterio.open(raster_path) + image = src.read(1) # first band image = image.astype(np.float32) geoms = ( @@ -222,10 +292,14 @@ class IoRaster(IoServer): gdf = gpd.GeoDataFrame.from_features(geoms) - gdf.loc[:, 'CELL_ID'] = xrange(len(gdf)) + gdf.loc[:, 'CELL_ID'] = range(len(gdf)) gdf = gdf[gdf['data'] != nodata] - gdf.crs = src.crs + # Error on to_crs function of geopandas that flip lat with lon in the non dict form + if src.crs == 'EPSG:4326': + gdf.crs = {'init': 'epsg:4326'} + else: + gdf.crs = src.crs if crs is not None: gdf = gdf.to_crs(crs) @@ -237,5 +311,75 @@ class IoRaster(IoServer): else: gdf = gpd.read_file(out_path) + gdf.set_index('CELL_ID', inplace=True) + return gdf + def to_shapefile_parallel(self, raster_path, gather=False, bcast=False, crs=None, nodata=0): + spent_time = timeit.default_timer() + if self.comm.Get_rank() == 0: + ds = rasterio.open(raster_path) + grid_info = ds.transform + + # TODO remove when new version will be installed + if rasterio.__version__ == '0.36.0': + lons = np.arange(ds.width) * grid_info[1] + grid_info[0] + lats = np.arange(ds.height) * grid_info[5] + grid_info[3] + elif rasterio.__version__ == '1.0.21': + lons = np.arange(ds.width) * grid_info[0] + grid_info[2] + lats = np.arange(ds.height) * grid_info[4] + grid_info[5] + else: + lons = np.arange(ds.width) * grid_info[0] + grid_info[2] + lats = np.arange(ds.height) * grid_info[4] + grid_info[5] + + # 1D to 2D + c_lats = np.array([lats] * len(lons)).T.flatten() + c_lons = np.array([lons] * len(lats)).flatten() + del lons, lats + if rasterio.__version__ == '0.36.0': + b_lons = self.create_bounds(c_lons, grid_info[1], number_vertices=4) + grid_info[1] / 2 + b_lats = self.create_bounds(c_lats, grid_info[1], number_vertices=4, inverse=True) + grid_info[5] / 2 + elif rasterio.__version__ == '1.0.21': + b_lons = self.create_bounds(c_lons, grid_info[0], number_vertices=4) + grid_info[0] / 2 + b_lats = self.create_bounds(c_lats, grid_info[4], number_vertices=4, inverse=True) + grid_info[4] / 2 + else: + b_lons = self.create_bounds(c_lons, grid_info[0], number_vertices=4) + grid_info[0] / 2 + b_lats = self.create_bounds(c_lats, grid_info[4], number_vertices=4, inverse=True) + grid_info[4] / 2 + + b_lats = b_lats.reshape((b_lats.shape[1], b_lats.shape[2])) + b_lons = b_lons.reshape((b_lons.shape[1], b_lons.shape[2])) + + gdf = gpd.GeoDataFrame(ds.read(1).flatten(), columns=['data'], index=range(b_lons.shape[0]), crs=ds.crs) + # Error on to_crs function of geopandas that flip lat with lon in the non dict form + if gdf.crs == 'EPSG:4326': + gdf.crs = {'init': 'epsg:4326'} + gdf['geometry'] = None + else: + gdf = None + b_lons = None + b_lats = None + self.comm.Barrier() + gdf = IoShapefile(self.comm).split_shapefile(gdf) + + b_lons = IoShapefile(self.comm).split_shapefile(b_lons) + b_lats = IoShapefile(self.comm).split_shapefile(b_lats) + + i = 0 + for j, df_aux in gdf.iterrows(): + gdf.loc[j, 'geometry'] = Polygon([(b_lons[i, 0], b_lats[i, 0]), + (b_lons[i, 1], b_lats[i, 1]), + (b_lons[i, 2], b_lats[i, 2]), + (b_lons[i, 3], b_lats[i, 3]), + (b_lons[i, 0], b_lats[i, 0])]) + i += 1 + + gdf['CELL_ID'] = gdf.index + gdf = gdf[gdf['data'] != nodata] + + if crs is not None: + gdf = gdf.to_crs(crs) + + if gather and not bcast: + gdf = IoShapefile(self.comm).gather_shapefile(gdf) + elif gather and bcast: + gdf = IoShapefile(self.comm).gather_bcast_shapefile(gdf) return gdf diff --git a/hermesv3_bu/io_server/io_server.py b/hermesv3_bu/io_server/io_server.py index 694798fd68072c67125102568fc2bfdf625474b2..46bd918bcaa6dc48c8b6214ea4f9d13595256f34 100755 --- a/hermesv3_bu/io_server/io_server.py +++ b/hermesv3_bu/io_server/io_server.py @@ -1,6 +1,12 @@ #!/usr/bin/env python +from mpi4py import MPI + class IoServer(object): + """ + :param comm: Communicator object + :type comm: MPI.Comm + """ def __init__(self, comm): self.comm = comm diff --git a/hermesv3_bu/io_server/io_shapefile.py b/hermesv3_bu/io_server/io_shapefile.py index f2c89d6b42010f53a186f07e011530e2d7483c3a..59ece649cf221a86df7b49557988fe4114f3affc 100755 --- a/hermesv3_bu/io_server/io_shapefile.py +++ b/hermesv3_bu/io_server/io_shapefile.py @@ -9,7 +9,9 @@ import pandas as pd import geopandas as gpd from mpi4py import MPI +from geopandas import GeoDataFrame from hermesv3_bu.io_server.io_server import IoServer +from hermesv3_bu.tools.checker import check_files class IoShapefile(IoServer): @@ -59,13 +61,14 @@ class IoShapefile(IoServer): return True def read_shapefile_serial(self, path): - + check_files(path) gdf = gpd.read_file(path) return gdf def read_shapefile(self, path, rank=0): if self.comm.Get_rank() == rank: + check_files(path) gdf = gpd.read_file(path) gdf = np.array_split(gdf, self.comm.Get_size()) else: @@ -86,6 +89,13 @@ class IoShapefile(IoServer): return data def split_shapefile(self, data, rank=0): + """ + + :param data: + :param rank: + :return: Splitted Shapefile + :rtype: GeoDataFrame + """ if self.comm.Get_size() == 1: data = data @@ -98,6 +108,32 @@ class IoShapefile(IoServer): return data + def gather_bcast_shapefile(self, data, rank=0): + + if self.comm.Get_size() == 1: + data = data + else: + data = self.comm.gather(data, root=rank) + if self.comm.Get_rank() == rank: + data = pd.concat(data) + else: + data = None + data = self.comm.bcast(data, root=rank) + + return data + + def gather_shapefile(self, data, rank=0): + + if self.comm.Get_size() == 1: + data = data + else: + data = self.comm.gather(data, root=rank) + if self.comm.Get_rank() == rank: + data = pd.concat(data) + else: + data = None + return data + def balance(self, data, rank=0): data = self.comm.gather(data, root=rank) diff --git a/hermesv3_bu/logger/log.py b/hermesv3_bu/logger/log.py index 5c1b3caf389744f52d9bda560a987be495131bc3..919c8f40b1599aaeb9c7e3c14b8e4ff9b079db08 100644 --- a/hermesv3_bu/logger/log.py +++ b/hermesv3_bu/logger/log.py @@ -4,22 +4,21 @@ import os import numpy as np import pandas as pd +from mpi4py import MPI +comm = MPI.COMM_WORLD + class Log(object): - def __init__(self, comm, arguments, log_refresh=1, time_log_refresh=0): + def __init__(self, arguments, log_refresh=1, time_log_refresh=0): """ Initialise the Log class. - :param comm: MPI communicator - :param arguments: Complete argument NameSpace. :type arguments: NameSpace :param log_refresh: :param time_log_refresh: """ - self.comm = comm - self.refresh_rate = (log_refresh, time_log_refresh) self.log_refresh = self.refresh_rate[0] self.time_log_refresh = self.refresh_rate[1] @@ -36,7 +35,7 @@ class Log(object): else: if os.path.exists(self.time_log_path): os.remove(self.time_log_path) - self.time_log = open(self.time_log_path, mode='w') + # self.time_log = open(self.time_log_path, mode='w') else: # Time log only writed by master process self.time_log = None @@ -45,7 +44,7 @@ class Log(object): if os.path.exists(self.log_path): os.remove(self.log_path) - self.log = open(self.log_path, mode='w') + # self.log = open(self.log_path, mode='w') self.df_times = pd.DataFrame(columns=['Class', 'Function', comm.Get_rank()]) @@ -65,13 +64,10 @@ class Log(object): :rtype: bool """ if message_level <= self.log_level: - self.log.write("{0}\n".format(message)) + with open(self.log_path, mode='a') as log_file: + log_file.write("{0}\n".format(message)) + log_file.close() - if self.log_refresh > 0: - self.log_refresh -= 1 - if self.log_refresh == 0: - self.log.flush() - self.log_refresh = self.refresh_rate[0] return True def _write_csv_times_log_file(self, rank=0): @@ -84,20 +80,21 @@ class Log(object): :return: True if everything is ok. :rtype: bool """ + from functools import reduce self.df_times = self.df_times.groupby(['Class', 'Function']).sum().reset_index() - data_frames = self.comm.gather(self.df_times, root=0) - if self.comm.Get_rank() == rank: + data_frames = comm.gather(self.df_times, root=0) + if comm.Get_rank() == rank: df_merged = reduce(lambda left, right: pd.merge(left, right, on=['Class', 'Function'], how='outer'), data_frames) df_merged = df_merged.groupby(['Class', 'Function']).sum() - df_merged['min'] = df_merged.loc[:, range(self.comm.Get_size())].min(axis=1) - df_merged['max'] = df_merged.loc[:, range(self.comm.Get_size())].max(axis=1) - df_merged['mean'] = df_merged.loc[:, range(self.comm.Get_size())].mean(axis=1) + df_merged['min'] = df_merged.loc[:, range(comm.Get_size())].min(axis=1) + df_merged['max'] = df_merged.loc[:, range(comm.Get_size())].max(axis=1) + df_merged['mean'] = df_merged.loc[:, range(comm.Get_size())].mean(axis=1) df_merged = df_merged.replace(0.0, np.NaN) df_merged.to_csv(self.time_log_path) - self.comm.Barrier() + comm.Barrier() return True def write_time_log(self, class_name, function_name, spent_time, message_level=1): @@ -121,7 +118,7 @@ class Log(object): """ if message_level <= self.log_level: self.df_times = self.df_times.append( - {'Class': class_name, 'Function': function_name, self.comm.Get_rank(): spent_time}, ignore_index=True) + {'Class': class_name, 'Function': function_name, comm.Get_rank(): spent_time}, ignore_index=True) # if self.time_log_refresh > 0: # self.time_log_refresh -= 1 # if self.time_log_refresh == 0: @@ -137,5 +134,5 @@ class Log(object): :return: """ self._write_csv_times_log_file() - self.log.flush() - self.log.close() + # self.log.flush() + # self.log.close() diff --git a/hermesv3_bu/sectors/agricultural_crop_fertilizers_sector.py b/hermesv3_bu/sectors/agricultural_crop_fertilizers_sector.py index dcf79751df6bdd887727377c376a510e68d2e0c0..2b0be93dc917553b1cc7e50839afb7a4fda95858 100755 --- a/hermesv3_bu/sectors/agricultural_crop_fertilizers_sector.py +++ b/hermesv3_bu/sectors/agricultural_crop_fertilizers_sector.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +import sys import os import timeit import pandas as pd @@ -10,12 +11,14 @@ from hermesv3_bu.io_server.io_raster import IoRaster from hermesv3_bu.io_server.io_shapefile import IoShapefile from hermesv3_bu.io_server.io_netcdf import IoNetcdf from hermesv3_bu.logger.log import Log +from hermesv3_bu.tools.checker import check_files +from geopandas import GeoDataFrame -formula = True +FORMULA = True class AgriculturalCropFertilizersSector(AgriculturalSector): - def __init__(self, comm_agr, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, + def __init__(self, comm_agr, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, crop_list, nut_shapefile, land_uses_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path, landuse_by_nut, crop_by_nut, crop_from_landuse_path, cultivated_ratio, fertilizer_rate, crop_f_parameter, crop_f_fertilizers, gridded_ph, gridded_cec, @@ -23,8 +26,13 @@ class AgriculturalCropFertilizersSector(AgriculturalSector): crop_growing_degree_day_path): spent_time = timeit.default_timer() logger.write_log('===== AGRICULTURAL CROP FERTILIZERS SECTOR =====') + check_files( + [nut_shapefile, land_uses_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, + molecular_weights_path, landuse_by_nut, crop_by_nut, crop_from_landuse_path, cultivated_ratio, + fertilizer_rate, crop_f_parameter, crop_f_fertilizers, gridded_ph, gridded_cec, crop_calendar, + temperature_path, wind_speed_path]) super(AgriculturalCropFertilizersSector, self).__init__( - comm_agr, comm, logger, auxiliary_dir, grid_shp, clip, date_array, nut_shapefile, source_pollutants, + comm_agr, comm, logger, auxiliary_dir, grid, clip, date_array, nut_shapefile, source_pollutants, vertical_levels, crop_list, land_uses_path, landuse_by_nut, crop_by_nut, crop_from_landuse_path, None, None, None, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path) @@ -32,29 +40,12 @@ class AgriculturalCropFertilizersSector(AgriculturalSector): self.cultivated_ratio = self.read_profiles(cultivated_ratio) self.fertilizer_rate = self.read_profiles(fertilizer_rate) + self.fertilizer_rate.rename(columns={'nuts2_id': 'code'}, inplace=True) self.crop_f_parameter = self.read_profiles(crop_f_parameter) + self.crop_f_parameter.rename(columns={'nuts2_id': 'code'}, inplace=True) self.crop_f_fertilizers = self.read_profiles(crop_f_fertilizers) - - if self.comm.Get_rank() == 0: - self.logger.write_log('Getting gridded constants', message_level=2) - self.gridded_constants = self.get_gridded_constants( - os.path.join(auxiliary_dir, 'fertilizers', 'gridded_constants.shp'), - gridded_ph, - os.path.join(auxiliary_dir, 'fertilizers', 'gridded_ph.tiff'), - gridded_cec, - os.path.join(auxiliary_dir, 'fertilizers', 'gridded_cec.tiff')) - self.ef_by_crop = self.get_ef_by_crop() - else: - self.logger.write_log('Waiting for master to get the gridded constants', message_level=2) - self.gridded_constants = None - self.ef_by_crop = None - - self.gridded_constants = self.comm.bcast(self.gridded_constants, root=0) - # self.gridded_constants = IoShapefile(self.comm).split_shapefile(self.gridded_constants) - self.gridded_constants = self.gridded_constants.loc[self.crop_distribution.index, :] - self.ef_by_crop = self.comm.bcast(self.ef_by_crop, root=0) - # self.ef_by_crop = IoShapefile(self.comm).split_shapefile(self.ef_by_crop) - self.ef_by_crop = self.ef_by_crop.loc[self.crop_distribution.index, :] + self.gridded_constants = self.get_gridded_constants(gridded_ph, gridded_cec) + self.ef_by_crop = self.get_ef_by_crop() self.fertilizer_denominator_yearly_factor_path = fertilizer_denominator_yearly_factor_path self.crop_calendar = self.read_profiles(crop_calendar) @@ -91,7 +82,7 @@ class AgriculturalCropFertilizersSector(AgriculturalSector): for crop in self.crop_list: crop_ef = self.gridded_constants.loc[:, ['geometry', 'nut_code']].copy() # f_ph - if formula: + if FORMULA: # After Zhang et al. (2018) crop_ef['f_ph'] = (0.067 * self.gridded_constants['ph'] ** 2) - \ (0.69 * self.gridded_constants['ph']) + 0.68 @@ -130,10 +121,16 @@ class AgriculturalCropFertilizersSector(AgriculturalSector): def to_dst_resolution(self, src_shapefile, value): spent_time = timeit.default_timer() - intersection = self.spatial_overlays(src_shapefile.to_crs(self.grid_shp.crs), self.grid_shp.reset_index()) + intersection = self.spatial_overlays(src_shapefile.to_crs(self.grid.shapefile.crs).reset_index(), + self.grid.shapefile.reset_index()) + + # intersection = IoShapefile(self.comm).balance(intersection) + intersection['area'] = intersection.geometry.area - dst_shapefile = self.grid_shp.reset_index().copy() + dst_shapefile = self.grid.shapefile.reset_index().copy() + # dst_shapefile = self.grid.shapefile.loc[np.unique(intersection['FID'])].copy() dst_shapefile['involved_area'] = intersection.groupby('FID')['area'].sum() + # dst_shapefile.reset_index(inplace=True) intersection_with_dst_areas = pd.merge(intersection, dst_shapefile.loc[:, ['FID', 'involved_area']], how='left', on='FID') intersection_with_dst_areas['involved_area'] = \ @@ -141,51 +138,47 @@ class AgriculturalCropFertilizersSector(AgriculturalSector): intersection_with_dst_areas[value] = \ intersection_with_dst_areas[value] * intersection_with_dst_areas['involved_area'] + dst_shapefile.set_index('FID', inplace=True) dst_shapefile[value] = intersection_with_dst_areas.groupby('FID')[value].sum() - dst_shapefile.drop('involved_area', axis=1, inplace=True) + # dst_shapefile.drop('involved_area', axis=1, inplace=True) + dst_shapefile.dropna(inplace=True) + + dst_shapefile = IoShapefile(self.comm).gather_shapefile(dst_shapefile.reset_index()) + if self.comm.Get_rank() == 0: + # dst_shapefile['FID_involved_area'] = dst_shapefile.groupby('FID')['involved_area'].sum() + # dst_shapefile['involved_area'] = dst_shapefile['involved_area'] / dst_shapefile['FID_involved_area'] + # dst_shapefile[value] = dst_shapefile[value] * dst_shapefile['involved_area'] + # dst_shapefile[value] = dst_shapefile[value].astype(np.float64) + # dst_shapefile.drop(columns=['involved_area', 'FID_involved_area'], inplace=True) + # dst_shapefile = dst_shapefile.groupby(['FID'])[value].sum() + dst_shapefile = dst_shapefile.groupby(['FID'])[value].mean() + else: + dst_shapefile = None + dst_shapefile = IoShapefile(self.comm).split_shapefile(dst_shapefile) + # print('Rank {0} -Z {1}: \n{2}\n'.format(self.comm.Get_rank(), value, dst_shapefile)) + # sys.stdout.flush() self.logger.write_time_log('AgriculturalCropFertilizersSector', 'to_dst_resolution', timeit.default_timer() - spent_time) - dst_shapefile.set_index('FID', inplace=True) - return dst_shapefile - def to_dst_resolution_parallel(self, src_shapefile, index, value): + def get_gridded_constants(self, ph_path, cec_path): spent_time = timeit.default_timer() - grid_shp = self.grid_shp.loc[index, :].copy() - src_shapefile = self.comm.bcast(src_shapefile, root=0) - src_shapefile = src_shapefile.to_crs(grid_shp.crs) - src_shapefile = src_shapefile[src_shapefile.within(grid_shp.unary_union)] - - intersection = self.spatial_overlays(src_shapefile, grid_shp) - intersection['area'] = intersection.geometry.area - dst_shapefile = grid_shp.copy() - dst_shapefile['involved_area'] = intersection.groupby('FID')['area'].sum() - intersection_with_dst_areas = pd.merge(intersection, dst_shapefile.loc[:, ['FID', 'involved_area']], - how='left', on='FID') - intersection_with_dst_areas['involved_area'] = \ - intersection_with_dst_areas['area'] / intersection_with_dst_areas['involved_area'] - - intersection_with_dst_areas[value] = \ - intersection_with_dst_areas[value] * intersection_with_dst_areas['involved_area'] - dst_shapefile[value] = intersection_with_dst_areas.groupby('FID')[value].sum() - dst_shapefile.drop('involved_area', axis=1, inplace=True) - self.logger.write_time_log('AgriculturalCropFertilizersSector', 'to_dst_resolution_parallel', - timeit.default_timer() - spent_time) - dst_shapefile.set_index('FID', inplace=True) - - return dst_shapefile + self.logger.write_log('Getting gridded constants', message_level=2) - def get_gridded_constants(self, gridded_ph_cec_path, ph_path, clipped_ph_path, cec_path, clipped_cec_path): - spent_time = timeit.default_timer() + gridded_ph_cec_path = os.path.join(self.auxiliary_dir, 'fertilizers', 'gridded_constants') if not os.path.exists(gridded_ph_cec_path): self.logger.write_log('Getting PH from {0}'.format(ph_path), message_level=2) - IoRaster(self.comm).clip_raster_with_shapefile_poly(ph_path, self.clip.shapefile, clipped_ph_path, - nodata=255) + clipped_ph_path = os.path.join(self.auxiliary_dir, 'fertilizers', 'gridded_PH.tiff') + if self.comm.Get_rank() == 0: + IoRaster(self.comm).clip_raster_with_shapefile_poly(ph_path, self.clip.shapefile, clipped_ph_path, + nodata=255) self.logger.write_log('PH clipped done!', message_level=3) - ph_gridded = IoRaster(self.comm).to_shapefile_serie(clipped_ph_path, nodata=255) + ph_gridded = IoRaster(self.comm).to_shapefile_parallel(clipped_ph_path, nodata=255) self.logger.write_log('PH to shapefile done!', message_level=3) + ph_gridded.set_index('CELL_ID', inplace=True) ph_gridded.rename(columns={'data': 'ph'}, inplace=True) + ph_gridded = IoShapefile(self.comm).balance(ph_gridded) # To correct input data ph_gridded['ph'] = ph_gridded['ph'] / 10 self.logger.write_log('PH to destiny resolution ...', message_level=3) @@ -193,85 +186,58 @@ class AgriculturalCropFertilizersSector(AgriculturalSector): self.logger.write_log('PH to destiny resolution done!', message_level=3) self.logger.write_log('Getting CEC from {0}'.format(cec_path), message_level=2) - IoRaster(self.comm).clip_raster_with_shapefile_poly(cec_path, self.clip.shapefile, clipped_cec_path, - nodata=-32768) + clipped_cec_path = os.path.join(self.auxiliary_dir, 'fertilizers', 'gridded_CEC.tiff') + if self.comm.Get_rank() == 0: + IoRaster(self.comm).clip_raster_with_shapefile_poly(cec_path, self.clip.shapefile, clipped_cec_path, + nodata=-32768) self.logger.write_log('CEC clipped done!', message_level=3) - cec_gridded = IoRaster(self.comm).to_shapefile_serie(clipped_cec_path, nodata=-32768) + cec_gridded = IoRaster(self.comm).to_shapefile_parallel(clipped_cec_path, nodata=-32768) self.logger.write_log('CEC to shapefile done!', message_level=3) cec_gridded.rename(columns={'data': 'cec'}, inplace=True) + cec_gridded.set_index('CELL_ID', inplace=True) + cec_gridded = IoShapefile(self.comm).balance(cec_gridded) self.logger.write_log('CEC to destiny resolution ...', message_level=3) - cec_gridded = self.to_dst_resolution(cec_gridded, value='cec') + cec_gridded = self.to_dst_resolution(cec_gridded.reset_index(), value='cec') self.logger.write_log('CEC to destiny resolution done!', message_level=3) - gridded_ph_cec = ph_gridded - gridded_ph_cec['cec'] = cec_gridded['cec'] - gridded_ph_cec.dropna(inplace=True) + ph_gridded = IoShapefile(self.comm).gather_shapefile(ph_gridded.reset_index()) + cec_gridded = IoShapefile(self.comm).gather_shapefile(cec_gridded.reset_index()) + if self.comm.Get_rank() == 0: + gridded_ph_cec = ph_gridded + # gridded_ph_cec = ph_gridded.groupby('FID').mean() + # cec_gridded = cec_gridded.groupby('FID').mean() + # gridded_ph_cec = ph_gridded + gridded_ph_cec['cec'] = cec_gridded['cec'] + gridded_ph_cec.set_index('FID', inplace=True) + # gridded_ph_cec = gridded_ph_cec.loc[(gridded_ph_cec['ph'] > 0) & (gridded_ph_cec['cec'] > 0)] + gridded_ph_cec = GeoDataFrame( + gridded_ph_cec, + geometry=self.grid.shapefile.loc[gridded_ph_cec.index.get_level_values('FID'), 'geometry'].values, + crs=self.grid.shapefile.crs) + else: + gridded_ph_cec = None + gridded_ph_cec = IoShapefile(self.comm).split_shapefile(gridded_ph_cec) + # print('Rank {0} -Z PH: \n{1}\n'.format(self.comm.Get_rank(), np.unique(gridded_ph_cec['ph']))) + # print('Rank {0} -Z CEC: \n{1}\n'.format(self.comm.Get_rank(), np.unique(gridded_ph_cec['cec']))) + # print('Rank {0} -Z FID: \n{1}\n'.format(self.comm.Get_rank(), np.unique(gridded_ph_cec.index))) + # sys.stdout.flush() + # exit() + gridded_ph_cec = self.add_nut_code(gridded_ph_cec.reset_index(), self.nut_shapefile) + gridded_ph_cec = gridded_ph_cec[gridded_ph_cec['nut_code'] != -999] + gridded_ph_cec.set_index('FID', inplace=True) - gridded_ph_cec = self.add_nut_code(gridded_ph_cec, self.nut_shapefile) - gridded_ph_cec.index.name = 'FID' - # gridded_ph_cec.set_index('FID', inplace=True) + IoShapefile(self.comm).write_shapefile_parallel(gridded_ph_cec.reset_index(), gridded_ph_cec_path) - # # Selecting only PH and CEC cells that have also some crop. - # gridded_ph_cec = gridded_ph_cec.loc[self.crop_distribution.index, :] - IoShapefile(self.comm).write_shapefile_serial(gridded_ph_cec.reset_index(), gridded_ph_cec_path) + gridded_ph_cec = IoShapefile(self.comm).gather_bcast_shapefile(gridded_ph_cec) else: gridded_ph_cec = IoShapefile(self.comm).read_shapefile_serial(gridded_ph_cec_path) gridded_ph_cec.set_index('FID', inplace=True) - self.logger.write_time_log('AgriculturalCropFertilizersSector', 'get_gridded_constants', - timeit.default_timer() - spent_time) - return gridded_ph_cec - - def get_gridded_constants_parallel(self, gridded_ph_cec_path, ph_path, clipped_ph_path, cec_path, clipped_cec_path, - index): - spent_time = timeit.default_timer() - if not os.path.exists(gridded_ph_cec_path): - if self.comm.Get_rank() == 0: - self.logger.write_log('Getting PH from {0}'.format(ph_path), message_level=2) - IoRaster(self.comm).clip_raster_with_shapefile_poly(ph_path, self.clip.shapefile, clipped_ph_path, - nodata=255) - self.logger.write_log('PH clipped done!', message_level=3) - ph_gridded = IoRaster(self.comm).to_shapefile_serie(clipped_ph_path, nodata=255) - self.logger.write_log('PH to shapefile done!', message_level=3) - ph_gridded.rename(columns={'data': 'ph'}, inplace=True) - # To correct input data - ph_gridded['ph'] = ph_gridded['ph'] / 10 - else: - ph_gridded = None - self.logger.write_log('PH to destiny resolution ...', message_level=3) - ph_gridded = self.to_dst_resolution_parallel(ph_gridded, index, value='ph') - self.logger.write_log('PH to destiny resolution done!', message_level=3) - if self.comm.Get_rank() == 0: - self.logger.write_log('Getting CEC from {0}'.format(cec_path), message_level=2) - IoRaster(self.comm).clip_raster_with_shapefile_poly(cec_path, self.clip.shapefile, clipped_cec_path, - nodata=-32768) - self.logger.write_log('CEC clipped done!', message_level=3) - cec_gridded = IoRaster(self.comm).to_shapefile_serie(clipped_cec_path, nodata=-32768) - self.logger.write_log('CEC to shapefile done!', message_level=3) - cec_gridded.rename(columns={'data': 'cec'}, inplace=True) - else: - cec_gridded = None - - self.logger.write_log('CEC to destiny resolution ...', message_level=3) - cec_gridded = self.to_dst_resolution_parallel(cec_gridded, index, value='cec') - self.logger.write_log('CEC to destiny resolution done!', message_level=3) - - gridded_ph_cec = ph_gridded - gridded_ph_cec['cec'] = cec_gridded['cec'] - - gridded_ph_cec.dropna(inplace=True) - - gridded_ph_cec = self.add_nut_code(gridded_ph_cec, self.nut_shapefile) - gridded_ph_cec.index.name = 'FID' - # gridded_ph_cec.set_index('FID', inplace=True) + # Selecting only PH and CEC cells that have also some crop. + gridded_ph_cec = gridded_ph_cec.loc[self.crop_distribution.index, :] + # gridded_ph_cec = gridded_ph_cec.loc[(gridded_ph_cec['ph'] > 0) & (gridded_ph_cec['cec'] > 0)] - # # Selecting only PH and CEC cells that have also some crop. - # gridded_ph_cec = gridded_ph_cec.loc[self.crop_distribution.index, :] - IoShapefile(self.comm).write_shapefile_parallel(gridded_ph_cec.reset_index(), gridded_ph_cec_path) - else: - gridded_ph_cec = IoShapefile(self.comm).read_shapefile_parallel(gridded_ph_cec_path) - gridded_ph_cec.set_index('FID', inplace=True) - self.logger.write_time_log('AgriculturalCropFertilizersSector', 'get_gridded_constants_parallel', + self.logger.write_time_log('AgriculturalCropFertilizersSector', 'get_gridded_constants', timeit.default_timer() - spent_time) return gridded_ph_cec @@ -425,7 +391,7 @@ class AgriculturalCropFertilizersSector(AgriculturalSector): spent_time = timeit.default_timer() self.logger.write_log('Calculating daily emissions') df_by_day = self.get_daily_inputs(emissions) - for day, daily_inputs in df_by_day.iteritems(): + for day, daily_inputs in df_by_day.items(): df_by_day[day] = self.calculate_nh3_emissions(day, daily_inputs) self.logger.write_time_log('AgriculturalCropFertilizersSector', 'calculate_daily_emissions', timeit.default_timer() - spent_time) diff --git a/hermesv3_bu/sectors/agricultural_crop_operations_sector.py b/hermesv3_bu/sectors/agricultural_crop_operations_sector.py index 70c69fc69082532807a78693ae40d1d7982d1635..9edb2a9cd4689096617ae453d2b3320dc92ab32a 100755 --- a/hermesv3_bu/sectors/agricultural_crop_operations_sector.py +++ b/hermesv3_bu/sectors/agricultural_crop_operations_sector.py @@ -8,10 +8,12 @@ import numpy as np from hermesv3_bu.sectors.agricultural_sector import AgriculturalSector from hermesv3_bu.io_server.io_shapefile import IoShapefile from hermesv3_bu.logger.log import Log +from hermesv3_bu.grids.grid import Grid +from hermesv3_bu.tools.checker import check_files class AgriculturalCropOperationsSector(AgriculturalSector): - def __init__(self, comm_agr, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, + def __init__(self, comm_agr, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, crop_list, nut_shapefile_path, land_uses_path, ef_dir, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path, landuse_by_nut, crop_by_nut, crop_from_landuse_path): @@ -21,8 +23,8 @@ class AgriculturalCropOperationsSector(AgriculturalSector): not created yet. :type auxiliary_dir: str - :param grid_shp: Shapefile that contains the destination grid. It must contains the 'FID' (cell num). - :type grid_shp: GeoPandas.GeoDataframe + :param grid: Grid object. + :type grid: Grid :param clip: Path to the shapefile that contains the region of interest. :type clip: str @@ -31,7 +33,7 @@ class AgriculturalCropOperationsSector(AgriculturalSector): :type date_array: list(datetime.datetime, ...) :param nut_shapefile_path: Path to the shapefile that contain the NUT polygons. The shapefile must contain - the 'ORDER06' information with the NUT_code. + the 'nuts2_id' information with the NUT_code. :type nut_shapefile_path: str :param source_pollutants: List of input pollutants to take into account. Agricultural livestock module can @@ -84,13 +86,20 @@ class AgriculturalCropOperationsSector(AgriculturalSector): :param crop_by_nut: :param nut_shapefile_path: Path to the shapefile that contain the NUT polygons. The shapefile must contain - the 'ORDER07' information with the NUT_code. + the 'nuts3_id' information with the NUT_code. :type nut_shapefile_path: str """ spent_time = timeit.default_timer() logger.write_log('===== AGRICULTURAL CROP OPERATIONS SECTOR =====') + + check_files( + [nut_shapefile_path, land_uses_path, monthly_profiles_path, weekly_profiles_path, + hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path, + landuse_by_nut, crop_by_nut, crop_from_landuse_path] + + [os.path.join(ef_dir, ef_file) for ef_file in ['{0}.csv'.format(pol) for pol in source_pollutants]]) + super(AgriculturalCropOperationsSector, self).__init__( - comm_agr, comm, logger, auxiliary_dir, grid_shp, clip, date_array, nut_shapefile_path, source_pollutants, + comm_agr, comm, logger, auxiliary_dir, grid, clip, date_array, nut_shapefile_path, source_pollutants, vertical_levels, crop_list, land_uses_path, landuse_by_nut, crop_by_nut, crop_from_landuse_path, ef_dir, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path) @@ -240,7 +249,7 @@ class AgriculturalCropOperationsSector(AgriculturalSector): self.logger.write_log('\tCalculating emissions') distribution_by_month = {} - for month in self.months.iterkeys(): + for month in self.months.keys(): distribution_by_month[month] = self.calculate_distribution_by_month(month) self.crop_distribution = self.add_dates(distribution_by_month) diff --git a/hermesv3_bu/sectors/agricultural_machinery_sector.py b/hermesv3_bu/sectors/agricultural_machinery_sector.py index ffd297769867da91ec08178c7c94175859c315aa..6d6245824bf27ab0b91307f1821dcf57d88560df 100755 --- a/hermesv3_bu/sectors/agricultural_machinery_sector.py +++ b/hermesv3_bu/sectors/agricultural_machinery_sector.py @@ -11,31 +11,37 @@ import numpy as np from hermesv3_bu.sectors.agricultural_sector import AgriculturalSector from hermesv3_bu.io_server.io_shapefile import IoShapefile -from hermesv3_bu.logger.log import Log +from hermesv3_bu.tools.checker import check_files class AgriculturalMachinerySector(AgriculturalSector): - def __init__(self, comm_agr, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, + def __init__(self, comm_agr, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, crop_list, nut_shapefile, machinery_list, land_uses_path, ef_files_dir, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path, landuse_by_nut, crop_by_nut, crop_from_landuse_path, - machinery_distibution_nut_shapefile_path, deterioration_factor_path, load_factor_path, + machinery_distribution_nut_shapefile_path, deterioration_factor_path, load_factor_path, vehicle_ratio_path, vehicle_units_path, vehicle_workhours_path, vehicle_power_path, - crop_machinery_by_nut): + crop_machinery_nuts3): spent_time = timeit.default_timer() logger.write_log('===== AGRICULTURAL MACHINERY SECTOR =====') + check_files( + [nut_shapefile, land_uses_path, ef_files_dir, monthly_profiles_path, weekly_profiles_path, + hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path, + landuse_by_nut, crop_by_nut, crop_from_landuse_path, machinery_distribution_nut_shapefile_path, + deterioration_factor_path, load_factor_path, vehicle_ratio_path, vehicle_units_path, + vehicle_workhours_path, vehicle_power_path, crop_machinery_nuts3]) super(AgriculturalMachinerySector, self).__init__( - comm_agr, comm, logger, auxiliary_dir, grid_shp, clip, date_array, nut_shapefile, source_pollutants, + comm_agr, comm, logger, auxiliary_dir, grid, clip, date_array, nut_shapefile, source_pollutants, vertical_levels, crop_list, land_uses_path, landuse_by_nut, crop_by_nut, crop_from_landuse_path, ef_files_dir, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path) self.machinery_list = machinery_list - self.crop_machinery_by_nut = self.read_profiles(crop_machinery_by_nut) + self.crop_machinery_nuts3 = self.read_profiles(crop_machinery_nuts3) self.crop_distribution = self.get_crop_distribution_by_nut( - self.crop_distribution, machinery_distibution_nut_shapefile_path, nut_code='ORDER07') + self.crop_distribution, machinery_distribution_nut_shapefile_path, nut_code='nuts3_id') self.months = self.get_date_array_by_month() @@ -53,15 +59,15 @@ class AgriculturalMachinerySector(AgriculturalSector): spent_time = timeit.default_timer() def get_fraction(dataframe): - total_crop_sum = self.crop_machinery_by_nut.loc[self.crop_machinery_by_nut[nut_code] == int(dataframe.name), - self.crop_list].values.sum() + total_crop_sum = self.crop_machinery_nuts3.loc[self.crop_machinery_nuts3[nut_code] == int(dataframe.name), + self.crop_list].values.sum() dataframe['fraction'] = dataframe[self.crop_list].sum(axis=1) / total_crop_sum return dataframe.loc[:, ['fraction']] crop_distribution.reset_index(inplace=True) - crop_distribution_nut_path = os.path.join(self.auxiliary_dir, 'crops', 'crops_nut.shp') + crop_distribution_nut_path = os.path.join(self.auxiliary_dir, 'agriculture', 'crops', 'crops_nuts3') if not os.path.exists(crop_distribution_nut_path): nut_shapefile = gpd.read_file(nut_shapefile) if nut_code is not None: @@ -81,12 +87,12 @@ class AgriculturalMachinerySector(AgriculturalSector): if write_crop_by_nut: crop_distribution.loc[:, self.crop_list + [nut_code]].groupby(nut_code).sum().reset_index().to_csv( - self.crop_machinery_by_nut) + self.crop_machinery_nuts3) crop_distribution['fraction'] = crop_distribution.groupby(nut_code).apply(get_fraction) crop_distribution.drop(columns=self.crop_list, inplace=True) crop_distribution.rename(columns={nut_code: 'NUT_code'}, inplace=True) - IoShapefile(self.comm).write_shapefile_serial(crop_distribution, crop_distribution_nut_path) + IoShapefile(self.comm).write_shapefile_parallel(crop_distribution, crop_distribution_nut_path) else: crop_distribution = IoShapefile(self.comm).read_shapefile(crop_distribution_nut_path) @@ -112,30 +118,50 @@ class AgriculturalMachinerySector(AgriculturalSector): spent_time = timeit.default_timer() def get_n(df): - df['N'] = self.vehicle_units.loc[self.vehicle_units['code'] == df.name[0], df.name[1]].values[0] + try: + df['N'] = self.vehicle_units.loc[self.vehicle_units['nuts3_id'] == df.name[0], df.name[1]].values[0] + except IndexError: + warn("*WARNING*: NUT3_ID {0} not found in the {1} file".format( + df.name[0], 'crop_machinery_vehicle_units_path')) + df['N'] = 0.0 return df.loc[:, ['N']] def get_s(df): - df['S'] = self.vehicle_ratio.loc[ - (self.vehicle_ratio['code'] == df.name[0]) & (self.vehicle_ratio['technology'] == df.name[2]), - df.name[1]].values[0] + try: + df['S'] = self.vehicle_ratio.loc[ + (self.vehicle_ratio['nuts3_id'] == df.name[0]) & (self.vehicle_ratio['technology'] == df.name[2]), + df.name[1]].values[0] + except IndexError: + warn("*WARNING*: NUT3_ID {0} not found in the {1} file".format( + df.name[0], 'crop_machinery_vehicle_ratio_path')) + df['S'] = 0.0 return df.loc[:, ['S']] def get_t(df): try: - df['T'] = self.vehicle_workhours.loc[(self.vehicle_workhours['code'] == df.name[0]) & + df['T'] = self.vehicle_workhours.loc[(self.vehicle_workhours['nuts3_id'] == df.name[0]) & (self.vehicle_workhours['technology'] == df.name[2]), df.name[1]].values[0] except IndexError: df['T'] = np.nan - df.loc[df['T'].isna(), 'T'] = self.vehicle_workhours.loc[ - (self.vehicle_workhours['code'] == df.name[0]) & (self.vehicle_workhours['technology'] == 'default'), - df.name[1]].values[0] + try: + df.loc[df['T'].isna(), 'T'] = self.vehicle_workhours.loc[ + (self.vehicle_workhours['nuts3_id'] == df.name[0]) & (self.vehicle_workhours['technology'] == + 'default'), df.name[1]].values[0] + except IndexError: + warn("*WARNING*: NUT3_ID {0} not found in the {1} file".format( + df.name[0], 'crop_machinery_vehicle_workhours_path')) + df.loc[df['T'].isna(), 'T'] = 0.0 return df.loc[:, ['T']] def get_p(df): - df['P'] = self.vehicle_power.loc[self.vehicle_power['code'] == df.name[0], df.name[1]].values[0] + try: + df['P'] = self.vehicle_power.loc[self.vehicle_power['nuts3_id'] == df.name[0], df.name[1]].values[0] + except IndexError: + warn("*WARNING*: NUT3_ID {0} not found in the {1} file".format( + df.name[0], 'crop_machinery_vehicle_power_path')) + df['P'] = 0.0 return df.loc[:, ['P']] def get_lf(df): @@ -314,7 +340,7 @@ class AgriculturalMachinerySector(AgriculturalSector): self.logger.write_log('\tCalculating emissions') distribution_by_month = {} - for month in self.months.iterkeys(): + for month in self.months.keys(): distribution_by_month[month] = self.calculate_monthly_emissions_by_nut(month) distribution_by_month[month] = self.distribute(distribution_by_month[month]) diff --git a/hermesv3_bu/sectors/agricultural_sector.py b/hermesv3_bu/sectors/agricultural_sector.py index cb8fbac85ff4f5274a39d343e0dd0b5227a7f5cf..07a10e91fc8ed8076354c0d075f915edbb92f938 100755 --- a/hermesv3_bu/sectors/agricultural_sector.py +++ b/hermesv3_bu/sectors/agricultural_sector.py @@ -12,11 +12,20 @@ from mpi4py import MPI from hermesv3_bu.sectors.sector import Sector from hermesv3_bu.io_server.io_shapefile import IoShapefile from hermesv3_bu.io_server.io_raster import IoRaster +from hermesv3_bu.tools.checker import error_exit from hermesv3_bu.logger.log import Log +from hermesv3_bu.grids.grid import Grid +from geopandas import GeoDataFrame +from pandas import DataFrame + +from ctypes import cdll, CDLL +cdll.LoadLibrary("libc.so.6") +libc = CDLL("libc.so.6") +libc.malloc_trim(0) class AgriculturalSector(Sector): - def __init__(self, comm_agr, comm, logger, auxiliary_dir, grid_shp, clip, date_array, nut_shapefile, + def __init__(self, comm_agr, comm, logger, auxiliary_dir, grid, clip, date_array, nut_shapefile, source_pollutants, vertical_levels, crop_list, land_uses_path, land_use_by_nut, crop_by_nut, crop_from_landuse_path, ef_files_dir, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path): @@ -36,8 +45,8 @@ class AgriculturalSector(Sector): created yet. :type auxiliary_dir: str - :param grid_shp: Shapefile with the grid horizontal distribution. - :type grid_shp: GeoDataFrame + :param grid: Grid object + :type grid: Grid :param date_array: List of datetimes. :type date_array: list(datetime.datetime, ...) @@ -108,7 +117,7 @@ class AgriculturalSector(Sector): spent_time = timeit.default_timer() super(AgriculturalSector, self).__init__( - comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path) @@ -120,13 +129,15 @@ class AgriculturalSector(Sector): self.land_use_by_nut = land_use_by_nut self.crop_by_nut = crop_by_nut self.crop_from_landuse = self.get_crop_from_land_uses(crop_from_landuse_path) + self.crop_distribution = self.get_crops_by_dst_cell( os.path.join(auxiliary_dir, 'agriculture', 'crops', 'crops.shp')) + self.logger.write_time_log('AgriculturalSector', '__init__', timeit.default_timer() - spent_time) def involved_grid_cells(self, src_shp): spent_time = timeit.default_timer() - grid_shp = IoShapefile(self.comm).split_shapefile(self.grid_shp) + grid_shp = IoShapefile(self.comm).split_shapefile(self.grid.shapefile) src_union = src_shp.to_crs(grid_shp.crs).geometry.unary_union grid_shp = grid_shp.loc[grid_shp.intersects(src_union), :] @@ -162,26 +173,54 @@ class AgriculturalSector(Sector): self.logger.write_time_log('AgriculturalSector', 'calculate_num_days', timeit.default_timer() - spent_time) return day_dict - def get_crop_from_land_uses(self, crop_from_landuse_path): + def get_crop_from_land_uses(self, crop_from_land_use_path): + """ + Get the involved land uses and their weight for each crop. + + Result: + { : [(int(, float()), (int(, float()), ...] + 'alfalfa': [(12, 1.0), (13, 0.3)], + 'almond': [(16, 1.0), (20, 0.3)], + 'apple': [(16, 1.0), (20, 0.3)], + 'apricot': [(16, 1.0), (20, 0.3)], + 'barley': [(12, 1.0)], + ... + } + + :param crop_from_land_use_path: Path to the file that contains the crops and their involved land uses with the + weights. + :type crop_from_land_use_path: str + + :return: Dictionary with the crops as keys and a list as value. That list have as many elements as involved + land uses in that crop. Each element of that list is a tuple with the land use as first element and their + weight on the second place. + :rtype: dict + """ import re spent_time = timeit.default_timer() - crop_from_landuse = pd.read_csv(crop_from_landuse_path, sep=';') + crop_from_landuse = pd.read_csv(crop_from_land_use_path, sep=';') crop_dict = {} for i, element in crop_from_landuse.iterrows(): # if element.crop in self.crop_list: - land_uses = list(map(str, re.split(' , |, | ,|,| ', element.land_use))) - weights = list(map(str, re.split(' , |, | ,|,| ', element.weight))) - crop_dict[element.crop] = zip(land_uses, weights) - self.logger.write_time_log('AgriculturalSector', 'get_crop_from_land_uses', timeit.default_timer() - spent_time) + land_uses = list(map(int, re.split(' , |, | ,|,| ', element.land_use))) + weights = list(map(float, re.split(' , |, | ,|,| ', element.weight))) + crop_dict[element.crop] = list(zip(land_uses, weights)) + self.logger.write_time_log('AgriculturalSector', 'get_crop_from_land_uses', timeit.default_timer() - spent_time) return crop_dict def get_involved_land_uses(self): + """ + Generate the list of involved land uses. + + :return: List of land uses involved in the selected crops + :rtype: list + """ spent_time = timeit.default_timer() land_uses_list = [] - for land_use_and_weight_list in self.crop_from_landuse.itervalues(): + for land_use_and_weight_list in self.crop_from_landuse.values(): for land_use_and_weight in land_use_and_weight_list: land_use = int(land_use_and_weight[0]) if land_use not in land_uses_list: @@ -190,55 +229,76 @@ class AgriculturalSector(Sector): return land_uses_list - def get_land_use_src_by_nut_old(self, land_uses): - spent_time = timeit.default_timer() - - df_land_use_with_nut = gpd.read_file(self.land_uses_path) - - df_land_use_with_nut.rename(columns={'CODE': 'NUT', 'gridcode': 'land_use'}, inplace=True) - - df_land_use_with_nut = df_land_use_with_nut.loc[df_land_use_with_nut['land_use'].isin(land_uses), :] + def get_land_use_src_by_nut(self, land_uses, write=False): + """ + Create a shapefile with the involved source cells from the input raster and only for the given land uses. - df_land_use_with_nut = self.spatial_overlays(df_land_use_with_nut, - self.clip.shapefile.to_crs(df_land_use_with_nut.crs)) + :param land_uses: List of land uses to use. + :type land_uses: list - self.logger.write_time_log('AgriculturalSector', 'get_land_use_src_by_nut', timeit.default_timer() - spent_time) - return df_land_use_with_nut + :param write: Boolean that indicates if you want to write the land use shapefile in the source resolution. + :type write: bool - def get_land_use_src_by_nut(self, land_uses): + :return: Shapefile with the land use and nut_code of each source cell. Index: CELL_ID + :rtype: GeoDataFrame + """ spent_time = timeit.default_timer() - land_use_src_by_nut_path = os.path.join(self.auxiliary_dir, 'agriculture', 'land_uses', 'land_uses_src.shp') + + land_use_src_by_nut_path = os.path.join(self.auxiliary_dir, 'agriculture', 'land_uses', 'land_use_src_nut') if not os.path.exists(land_use_src_by_nut_path): - land_uses_clipped = IoRaster(self.comm_agr).clip_raster_with_shapefile_poly( - self.land_uses_path, self.clip.shapefile, - os.path.join(self.auxiliary_dir, 'agriculture', 'land_uses', 'land_uses_clip.tif'), values=land_uses) - - land_uses_shp = IoRaster(self.comm_agr).to_shapefile_serie(land_uses_clipped) - ccaa_shp = IoShapefile(self.comm_agr).read_shapefile_serial(self.nut_shapefile).to_crs(land_uses_shp.crs) - ccaa_shp.drop(columns=['NAME', 'ORDER06'], inplace=True) - ccaa_shp.rename(columns={'CODE': 'NUT'}, inplace=True) - land_use_src_by_nut = self.spatial_overlays(land_uses_shp, ccaa_shp, how='intersection') - land_use_src_by_nut.drop(columns=['idx1', 'idx2', 'CELL_ID'], inplace=True) + land_uses_clipped = os.path.join(self.auxiliary_dir, 'agriculture', 'land_uses', 'land_uses_clip.tif') + if self.comm_agr.Get_rank() == 0: + land_uses_clipped = IoRaster(self.comm_agr).clip_raster_with_shapefile_poly( + self.land_uses_path, self.clip.shapefile, land_uses_clipped, values=land_uses) + self.comm_agr.Barrier() + self.logger.write_log('\t\tRaster {0} to_shapefile.'.format(land_uses_clipped), message_level=3) + land_use_src_by_nut = IoRaster(self.comm_agr).to_shapefile_parallel(land_uses_clipped) + self.logger.write_log('\t\tFiltering shapefile.'.format(land_uses_clipped), message_level=3) land_use_src_by_nut.rename(columns={'data': 'land_use'}, inplace=True) land_use_src_by_nut['land_use'] = land_use_src_by_nut['land_use'].astype(np.int16) - land_use_src_by_nut.reset_index(inplace=True, drop=True) - IoShapefile(self.comm_agr).write_shapefile_serial(land_use_src_by_nut, land_use_src_by_nut_path) + + land_use_src_by_nut = self.add_nut_code(land_use_src_by_nut, self.nut_shapefile, nut_value='nuts2_id') + land_use_src_by_nut = land_use_src_by_nut[land_use_src_by_nut['nut_code'] != -999] + libc.malloc_trim(0) + land_use_src_by_nut = IoShapefile(self.comm_agr).balance(land_use_src_by_nut) + + land_use_src_by_nut.set_index('CELL_ID', inplace=True) + if write: + self.logger.write_log('\t\tWriting {0} file.'.format(land_use_src_by_nut_path), message_level=3) + IoShapefile(self.comm_agr).write_shapefile_parallel(land_use_src_by_nut.reset_index(), + land_use_src_by_nut_path) else: - land_use_src_by_nut = IoShapefile(self.comm_agr).read_shapefile_serial(land_use_src_by_nut_path) + land_use_src_by_nut = IoShapefile(self.comm_agr).read_shapefile_parallel(land_use_src_by_nut_path) + land_use_src_by_nut.set_index('CELL_ID', inplace=True) self.logger.write_time_log('AgriculturalSector', 'get_land_use_src_by_nut', timeit.default_timer() - spent_time) + return land_use_src_by_nut def get_tot_land_use_by_nut(self, land_uses): + """ + Get the total amount of land use area by NUT of the involved land uses. + + :param land_uses: Involved land uses. + :type land_uses: list + + :return: Total amount of land use area by NUT. + :rtype: DataFrame + """ spent_time = timeit.default_timer() - df = pd.read_csv(self.land_use_by_nut) + + df = pd.read_csv(self.land_use_by_nut, dtype={'nuts2_id': str}) + df.rename(columns={'nuts2_id': 'nut_code'}, inplace=True) df = df.loc[df['land_use'].isin(land_uses), :] - self.logger.write_time_log('AgriculturalSector', 'get_tot_land_use_by_nut', timeit.default_timer() - spent_time) + df['nut_code'] = df['nut_code'].astype(np.int32) + df.set_index(['nut_code', 'land_use'], inplace=True) + self.logger.write_time_log('AgriculturalSector', 'get_tot_land_use_by_nut', timeit.default_timer() - spent_time) return df def get_land_use_by_nut_csv(self, land_use_distribution_src_nut, land_uses): """ + Get the involved area of land use by involved NUT. :param land_use_distribution_src_nut: Shapefile with the polygons of all the land uses for each NUT. :type land_use_distribution_src_nut: GeoDataFrame @@ -246,13 +306,18 @@ class AgriculturalSector(Sector): :param land_uses: Land uses to take into account. :type land_uses: list - :return: + :return: DataFrame with the total amount of land use area by involved NUT. + :rtype: DataFrame """ spent_time = timeit.default_timer() + land_use_by_nut = pd.DataFrame(index=pd.MultiIndex.from_product( + [np.unique(land_use_distribution_src_nut['nut_code'].astype(np.int64)), + np.unique(land_uses).astype(np.int16)], names=['nut_code', 'land_use'])) + land_use_by_nut['area'] = 0.0 land_use_distribution_src_nut['area'] = land_use_distribution_src_nut.area - land_use_by_nut = land_use_distribution_src_nut.groupby(['NUT', 'land_use']).sum().reset_index() - land_use_by_nut = land_use_by_nut.loc[land_use_by_nut['land_use'].isin(land_uses), :] + land_use_by_nut['area'] += land_use_distribution_src_nut.groupby(['nut_code', 'land_use'])['area'].sum() + land_use_by_nut.fillna(0.0, inplace=True) self.logger.write_time_log('AgriculturalSector', 'get_land_use_by_nut_csv', timeit.default_timer() - spent_time) return land_use_by_nut @@ -271,27 +336,24 @@ class AgriculturalSector(Sector): :rtype: DataFrame """ spent_time = timeit.default_timer() - if nuts is not None: - land_use_by_nut = land_use_by_nut.loc[land_use_by_nut['NUT'].isin(nuts), :] - new_dict = pd.DataFrame() - for nut in np.unique(land_use_by_nut['NUT']): - aux_dict = {'NUT': [nut]} - - for crop, landuse_weight_list in self.crop_from_landuse.iteritems(): - aux = 0 - for landuse, weight in landuse_weight_list: - try: - aux += land_use_by_nut.loc[(land_use_by_nut['land_use'] == int(landuse)) & - (land_use_by_nut['NUT'] == nut), 'area'].values[0] * float(weight) - except IndexError: - # TODO understand better that error - pass - aux_dict[crop] = [aux] - new_dict = new_dict.append(pd.DataFrame.from_dict(aux_dict), ignore_index=True) - new_dict.set_index('NUT', inplace=True) + if nuts is not None: + land_use_by_nut = land_use_by_nut.iloc[land_use_by_nut.index.get_level_values('nut_code').isin(nuts)] + + new_df = pd.DataFrame(index=np.unique(land_use_by_nut.index.get_level_values('nut_code')), + columns=self.crop_from_landuse.keys()) + new_df.fillna(0, inplace=True) + + for crop, land_use_weight_list in self.crop_from_landuse.items(): + for land_use, weight in land_use_weight_list: + aux_df = land_use_by_nut.reset_index() + aux_df = aux_df.loc[aux_df['land_use'] == land_use] + aux_df.drop(columns=['land_use'], inplace=True) + aux_df.set_index('nut_code', inplace=True) + new_df[crop] += aux_df['area'] * weight self.logger.write_time_log('AgriculturalSector', 'land_use_to_crop_by_nut', timeit.default_timer() - spent_time) - return new_dict + + return new_df def get_crop_shape_by_nut(self, crop_by_nut, tot_crop_by_nut): """ @@ -311,6 +373,7 @@ class AgriculturalSector(Sector): crop_share_by_nut = crop_by_nut.copy() for crop in crop_by_nut.columns: crop_share_by_nut[crop] = crop_by_nut[crop] / tot_crop_by_nut[crop] + self.logger.write_time_log('AgriculturalSector', 'get_crop_shape_by_nut', timeit.default_timer() - spent_time) return crop_share_by_nut @@ -328,11 +391,13 @@ class AgriculturalSector(Sector): """ spent_time = timeit.default_timer() - crop_by_nut = pd.read_csv(self.crop_by_nut) - crop_by_nut.drop(columns='name', inplace=True) + crop_by_nut = pd.read_csv(self.crop_by_nut, dtype={'nuts2_id': str}) + crop_by_nut.drop(columns='nuts2_na', inplace=True) + crop_by_nut.rename(columns={'nuts2_id': 'nut_code'}, inplace=True) + + crop_by_nut['nut_code'] = crop_by_nut['nut_code'].astype(np.int64) + crop_by_nut.set_index('nut_code', inplace=True) - crop_by_nut['code'] = crop_by_nut['code'].astype(np.int16) - crop_by_nut.set_index('code', inplace=True) crop_by_nut = crop_by_nut.loc[crop_share_by_nut.index, :] crop_area_by_nut = crop_share_by_nut * crop_by_nut @@ -354,25 +419,26 @@ class AgriculturalSector(Sector): """ spent_time = timeit.default_timer() - crop_distribution_src = land_use_distribution_src_nut.loc[:, ['NUT', 'geometry']] - for crop, landuse_weight_list in self.crop_from_landuse.iteritems(): + crop_distribution_src = land_use_distribution_src_nut.loc[:, ['nut_code', 'geometry']] + + for crop, landuse_weight_list in self.crop_from_landuse.items(): crop_distribution_src[crop] = 0 for landuse, weight in landuse_weight_list: crop_distribution_src.loc[land_use_distribution_src_nut['land_use'] == int(landuse), crop] += \ land_use_distribution_src_nut.loc[land_use_distribution_src_nut['land_use'] == int(landuse), 'area'] * float(weight) - for nut in np.unique(crop_distribution_src['NUT']): + for nut in np.unique(crop_distribution_src['nut_code']): for crop in crop_area_by_nut.columns.values: - crop_distribution_src.loc[crop_distribution_src['NUT'] == nut, crop] /= crop_distribution_src.loc[ - crop_distribution_src['NUT'] == nut, crop].sum() - for nut in np.unique(crop_distribution_src['NUT']): + crop_distribution_src.loc[crop_distribution_src['nut_code'] == nut, crop] /= crop_distribution_src.loc[ + crop_distribution_src['nut_code'] == nut, crop].sum() + for nut in np.unique(crop_distribution_src['nut_code']): for crop in crop_area_by_nut.columns.values: - crop_distribution_src.loc[crop_distribution_src['NUT'] == nut, crop] *= \ + crop_distribution_src.loc[crop_distribution_src['nut_code'] == nut, crop] *= \ crop_area_by_nut.loc[nut, crop] self.logger.write_time_log('AgriculturalSector', 'calculate_crop_distribution_src', timeit.default_timer() - spent_time) - + crop_distribution_src = IoShapefile(self.comm_agr).balance(crop_distribution_src) return crop_distribution_src def get_crop_distribution_in_dst_cells(self, crop_distribution): @@ -388,9 +454,12 @@ class AgriculturalSector(Sector): spent_time = timeit.default_timer() crop_list = list(np.setdiff1d(crop_distribution.columns.values, ['NUT', 'geometry'])) - crop_distribution = crop_distribution.to_crs(self.grid_shp.crs) + crop_distribution = crop_distribution.to_crs(self.grid.shapefile.crs) crop_distribution['src_inter_fraction'] = crop_distribution.geometry.area - crop_distribution = self.spatial_overlays(crop_distribution, self.grid_shp, how='intersection') + crop_distribution = self.spatial_overlays(crop_distribution.reset_index(), self.grid.shapefile.reset_index(), + how='intersection') + + crop_distribution = IoShapefile(self.comm_agr).balance(crop_distribution) crop_distribution['src_inter_fraction'] = \ crop_distribution.geometry.area / crop_distribution['src_inter_fraction'] @@ -399,8 +468,8 @@ class AgriculturalSector(Sector): crop_distribution = crop_distribution.loc[:, crop_list + ['FID']].groupby('FID').sum() - crop_distribution = gpd.GeoDataFrame(crop_distribution, crs=self.grid_shp.crs, - geometry=self.grid_shp.loc[crop_distribution.index, 'geometry']) + crop_distribution = gpd.GeoDataFrame(crop_distribution, crs=self.grid.shapefile.crs, + geometry=self.grid.shapefile.loc[crop_distribution.index, 'geometry']) crop_distribution.reset_index(inplace=True) crop_distribution.set_index('FID', inplace=True) @@ -423,45 +492,48 @@ class AgriculturalSector(Sector): """ spent_time = timeit.default_timer() if not os.path.exists(file_path): + self.logger.write_log('Creating the crop distribution shapefile.', message_level=2) + + self.logger.write_log('\tCreating land use distribution on the source resolution.', message_level=3) + involved_land_uses = self.get_involved_land_uses() + land_use_distribution_src_nut = self.get_land_use_src_by_nut(involved_land_uses, write=False) + + land_use_by_nut = self.get_land_use_by_nut_csv(land_use_distribution_src_nut, involved_land_uses) + + self.logger.write_log('\tCreating the crop distribution on the source resolution.', message_level=3) + crop_by_nut = self.land_use_to_crop_by_nut(land_use_by_nut) + tot_land_use_by_nut = self.get_tot_land_use_by_nut(involved_land_uses) + tot_crop_by_nut = self.land_use_to_crop_by_nut( + tot_land_use_by_nut, nuts=list(np.unique(land_use_by_nut.index.get_level_values('nut_code')))) + crop_shape_by_nut = self.get_crop_shape_by_nut(crop_by_nut, tot_crop_by_nut) + crop_area_by_nut = self.get_crop_area_by_nut(crop_shape_by_nut) + crop_distribution_src = self.calculate_crop_distribution_src( + crop_area_by_nut, land_use_distribution_src_nut) + + self.logger.write_log('\tCreating the crop distribution on the grid resolution.', message_level=3) + crop_distribution_dst = self.get_crop_distribution_in_dst_cells(crop_distribution_src) + self.logger.write_log('\tCreating the crop distribution shapefile.', message_level=3) + crop_distribution_dst = IoShapefile(self.comm_agr).gather_shapefile(crop_distribution_dst.reset_index()) if self.comm_agr.Get_rank() == 0: - self.logger.write_log('Creating the crop distribution shapefile on the grid resolution.', - message_level=2) - involved_land_uses = self.get_involved_land_uses() - - land_use_distribution_src_nut = self.get_land_use_src_by_nut(involved_land_uses) - - land_use_by_nut = self.get_land_use_by_nut_csv(land_use_distribution_src_nut, involved_land_uses) - tot_land_use_by_nut = self.get_tot_land_use_by_nut(involved_land_uses) - - crop_by_nut = self.land_use_to_crop_by_nut(land_use_by_nut) - tot_crop_by_nut = self.land_use_to_crop_by_nut( - tot_land_use_by_nut, nuts=list(np.unique(land_use_by_nut['NUT']))) - - crop_shape_by_nut = self.get_crop_shape_by_nut(crop_by_nut, tot_crop_by_nut) - crop_area_by_nut = self.get_crop_area_by_nut(crop_shape_by_nut) - - crop_distribution_src = self.calculate_crop_distribution_src( - crop_area_by_nut, land_use_distribution_src_nut) - - crop_distribution_dst = self.get_crop_distribution_in_dst_cells(crop_distribution_src) - - crop_distribution_dst = self.add_timezone(crop_distribution_dst) - IoShapefile(self.comm).write_shapefile_serial(crop_distribution_dst, file_path) + crop_distribution_dst = crop_distribution_dst.groupby('FID').sum() + crop_distribution_dst = GeoDataFrame( + crop_distribution_dst, + geometry=self.grid.shapefile.loc[crop_distribution_dst.index.get_level_values('FID'), + 'geometry'].values, + crs=self.grid.shapefile.crs) else: - self.logger.write_log('Waiting for the master process that creates the crop distribution shapefile.', - message_level=2) crop_distribution_dst = None - self.comm_agr.Barrier() - if self.comm.Get_rank() == 0 and self.comm_agr.Get_rank() != 0: - # Every master rank read the created crop distribution shapefile. - crop_distribution_dst = IoShapefile(self.comm).read_shapefile_serial(file_path) - self.comm.Barrier() - crop_distribution_dst = IoShapefile(self.comm).split_shapefile(crop_distribution_dst) - else: - crop_distribution_dst = IoShapefile(self.comm).read_shapefile_parallel(file_path) + self.logger.write_log('\tAdding timezone to the shapefile.', message_level=3) + crop_distribution_dst = IoShapefile(self.comm_agr).split_shapefile(crop_distribution_dst) + crop_distribution_dst = self.add_timezone(crop_distribution_dst) + + self.logger.write_log('\tWriting the crop distribution shapefile.', message_level=3) + IoShapefile(self.comm_agr).write_shapefile_parallel(crop_distribution_dst, file_path) + + crop_distribution_dst = IoShapefile(self.comm).read_shapefile_parallel(file_path) crop_distribution_dst.set_index('FID', inplace=True, drop=True) - # Filtering crops by used on the subsector (operations, fertilizers, machinery) + # Filtering crops by used on the sub-sector (operations, fertilizers, machinery) crop_distribution_dst = crop_distribution_dst.loc[:, self.crop_list + ['timezone', 'geometry']] self.logger.write_time_log('AgriculturalSector', 'get_crops_by_dst_cell', timeit.default_timer() - spent_time) @@ -482,7 +554,7 @@ class AgriculturalSector(Sector): """ rank_list = [] - for sector, sector_procs in sector_dict.iteritems(): + for sector, sector_procs in sector_dict.items(): if sector in ['crop_operations', 'crop_fertilizers', 'agricultural_machinery']: rank_list += sector_procs rank_list = sorted(rank_list) diff --git a/hermesv3_bu/sectors/aviation_sector.py b/hermesv3_bu/sectors/aviation_sector.py index a9718fd42da441ca2343a66888fdd8fbacee342c..b5a6101ed00a06f0ddd893a4d747464366baebf0 100755 --- a/hermesv3_bu/sectors/aviation_sector.py +++ b/hermesv3_bu/sectors/aviation_sector.py @@ -10,6 +10,8 @@ import geopandas as gpd from warnings import warn from hermesv3_bu.sectors.sector import Sector +from hermesv3_bu.grids.grid import Grid +from hermesv3_bu.tools.checker import check_files, error_exit PHASE_TYPE = {'taxi_out': 'departure', 'pre-taxi_out': 'departure', 'takeoff': 'departure', 'climbout': 'departure', 'approach': 'arrival', 'taxi_in': 'arrival', 'post-taxi_in': 'arrival', 'landing': 'arrival', @@ -34,7 +36,7 @@ class AviationSector(Sector): - Taxi in - Post-taxi in """ - def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, airport_list, plane_list, airport_shapefile_path, airport_runways_shapefile_path, airport_runways_corners_shapefile_path, airport_trajectories_shapefile_path, operations_path, planes_path, times_path, ef_dir, weekly_profiles_path, hourly_profiles_path, speciation_map_path, @@ -50,8 +52,8 @@ class AviationSector(Sector): created yet. :type auxiliary_dir: str - :param grid_shp: Shapefile with the grid horizontal distribution. - :type grid_shp: GeoDataFrame + :param grid: Grid object. + :type grid: Grid :param date_array: List of datetimes. :type date_array: list(datetime.datetime, ...) @@ -123,19 +125,25 @@ class AviationSector(Sector): file must contain the 'Specie' and 'MW' columns. :type molecular_weights_path: str """ + spent_time = timeit.default_timer() + logger.write_log('===== AVIATION SECTOR =====') + check_files( + [airport_shapefile_path, airport_runways_shapefile_path, airport_runways_corners_shapefile_path, + airport_trajectories_shapefile_path, operations_path, planes_path, times_path, weekly_profiles_path, + hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path] + + [os.path.join(ef_dir, PHASE_EF_FILE[phase]) for phase in PHASE_TYPE.keys()]) + + if 'nmvoc' in source_pollutants or 'ch4' in source_pollutants: + if 'hc' not in source_pollutants: + source_pollutants.append('hc') + super(AviationSector, self).__init__( - comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, None, + comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, None, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path) - if 'hc' in self.source_pollutants: - for poll in ['nmvoc', 'ch4']: - if poll not in self.source_pollutants: - self.source_pollutants.append(poll) - self.source_pollutants.remove('hc') - # self.ef_dir = ef_dir self.ef_files = self.read_ef_files(ef_dir) @@ -228,7 +236,7 @@ class AviationSector(Sector): spent_time = timeit.default_timer() if self.comm.Get_rank() == 0: runway_shapefile = gpd.read_file(airport_runways_shapefile_path) - runway_shapefile.set_index('airport_id', inplace=True) + runway_shapefile.set_index(['airport_id', 'runway_id'], inplace=True) runway_shapefile = runway_shapefile.loc[self.airport_list_full, :] runway_shapefile = runway_shapefile.loc[runway_shapefile['cons'] == 1, ['approach_f', 'climbout_f', 'geometry']] @@ -286,20 +294,16 @@ class AviationSector(Sector): :rtype: DataFrame """ spent_time = timeit.default_timer() - check = False + operations = pd.read_csv(operations_csv_path) - if check: - for index, aux_operations in operations.groupby(['airport_id', 'plane_id', 'operation']): - if len(aux_operations) > 1: - print index, len(aux_operations) if self.plane_list is None: self.plane_list = list(np.unique(operations['plane_id'].values)) else: operations = operations.loc[operations['plane_id'].isin(self.plane_list), :] if len(operations) == 0: - raise NameError("The plane/s defined in the plane_list do not exist.") + error_exit("The plane/s defined in the plane_list do not exist.") operations = operations.loc[operations['airport_id'].isin(self.airport_list), :] operations.set_index(['airport_id', 'plane_id', 'operation'], inplace=True) operations.rename(columns={'1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, '10': 10, @@ -320,13 +324,10 @@ class AviationSector(Sector): :rtype: DataFrame """ spent_time = timeit.default_timer() - check = False + dataframe = pd.read_csv(planes_path) dataframe = dataframe.loc[dataframe['plane_id'].isin(self.plane_list)] - if check: - for index, aux_operations in dataframe.groupby('plane_id'): - if len(aux_operations) > 1: - print index, len(aux_operations) + dataframe.set_index('plane_id', inplace=True) self.logger.write_time_log('AviationSector', 'read_planes', timeit.default_timer() - spent_time) @@ -370,8 +371,9 @@ class AviationSector(Sector): spent_time = timeit.default_timer() if self.comm.Get_rank() == 0: airport_shapefile = airport_shapefile.reset_index() - airport_shapefile = gpd.sjoin(airport_shapefile.to_crs(self.grid_shp.crs), - self.clip.shapefile.to_crs(self.grid_shp.crs), how='inner', op='intersects') + airport_shapefile = gpd.sjoin(airport_shapefile.to_crs(self.grid.shapefile.crs), + self.clip.shapefile.to_crs(self.grid.shapefile.crs), how='inner', + op='intersects') shp_airport_list = list(np.unique(airport_shapefile['airport_id'].values)) @@ -379,15 +381,15 @@ class AviationSector(Sector): shp_airport_list = list(set(conf_airport_list).intersection(shp_airport_list)) if len(shp_airport_list) == 0: - raise NameError("No airports intersect with the defined domain or the defined aiport/s in the " + - "airport_list do no exist ") + error_exit("No airports intersect with the defined domain or the defined aiport/s in the " + + "airport_list do no exist ") airports_with_operations = np.unique(pd.read_csv(operations_file, usecols=['airport_id']).values) new_list = list(set(shp_airport_list) & set(airports_with_operations)) if len(new_list) != len(shp_airport_list): warn('{0} airports have no operations. Ignoring them.'.format( - list(set(new_list) - set(shp_airport_list)))) + list(set(shp_airport_list) - set(new_list)))) max_len = len(new_list) # Only for master (rank == 0) @@ -397,9 +399,9 @@ class AviationSector(Sector): for i in range(self.comm.size)] for sublist in new_list: if len(sublist) == 0: - raise ValueError("ERROR: The selected number of processors is to high. " + - "The maximum number of processors accepted are {0}".format(max_len) + - "(Maximum number of airports included in the working domain") + error_exit("The selected number of processors is to high. " + + "The maximum number of processors accepted are {0}".format(max_len) + + "(Maximum number of airports included in the working domain") else: new_list = None @@ -430,9 +432,9 @@ class AviationSector(Sector): airport_shapefile = airport_shapefile.loc[self.airport_list_full, :].copy() if not os.path.exists(os.path.dirname(airport_distribution_path)): os.makedirs(os.path.dirname(airport_distribution_path)) - airport_shapefile.to_crs(self.grid_shp.crs, inplace=True) + airport_shapefile.to_crs(self.grid.shapefile.crs, inplace=True) airport_shapefile['area'] = airport_shapefile.area - airport_distribution = self.spatial_overlays(airport_shapefile, self.grid_shp.reset_index(), + airport_distribution = self.spatial_overlays(airport_shapefile, self.grid.shapefile.reset_index(), how='intersection') airport_distribution['fraction'] = airport_distribution.area / airport_distribution['area'] airport_distribution.drop(columns=['idx2', 'area', 'geometry', 'cons'], inplace=True) @@ -477,6 +479,7 @@ class AviationSector(Sector): def normalize(df): total_fraction = df['{0}_f'.format(phase_type)].values.sum() df['{0}_f'.format(phase_type)] = df['{0}_f'.format(phase_type)] / total_fraction + return df.loc[:, ['{0}_f'.format(phase_type)]] self.logger.write_log('\t\tCalculating runway distribution for {0}'.format(phase_type), message_level=2) @@ -487,17 +490,14 @@ class AviationSector(Sector): if not os.path.exists(runway_distribution_path): if self.comm.rank == 0: runway_shapefile['{0}_f'.format(phase_type)] = runway_shapefile.groupby('airport_id').apply(normalize) - if not os.path.exists(os.path.dirname(runway_distribution_path)): - os.makedirs(os.path.dirname(runway_distribution_path)) - runway_shapefile.reset_index(inplace=True) - runway_shapefile.to_crs(self.grid_shp.crs, inplace=True) + + runway_shapefile.to_crs(self.grid.shapefile.crs, inplace=True) runway_shapefile['length'] = runway_shapefile.length # duplicating each runway by involved cell - runway_shapefile = gpd.sjoin(runway_shapefile, self.grid_shp.reset_index(), how="inner", - op='intersects') + runway_shapefile = gpd.sjoin(runway_shapefile.reset_index(), self.grid.shapefile.reset_index(), + how="inner", op='intersects') # Adding cell geometry - runway_shapefile = runway_shapefile.merge(self.grid_shp.reset_index().loc[:, ['FID', 'geometry']], - on='FID', how='left') + runway_shapefile = runway_shapefile.merge(self.grid.shapefile.reset_index(), on='FID', how='left') # Intersection between line (roadway) and polygon (cell) # runway_shapefile['geometry'] = runway_shapefile.apply(do_intersection, axis=1) runway_shapefile['mini_length'] = runway_shapefile.apply(get_intersection_length, axis=1) @@ -511,6 +511,8 @@ class AviationSector(Sector): runway_shapefile = runway_shapefile[['airport_id', 'FID', 'layer', 'fraction']] runway_shapefile = runway_shapefile.groupby(['airport_id', 'FID', 'layer']).sum() # runway_shapefile.set_index(['airport_id', 'FID', 'layer'], inplace=True) + if not os.path.exists(os.path.dirname(runway_distribution_path)): + os.makedirs(os.path.dirname(runway_distribution_path)) runway_shapefile.to_csv(runway_distribution_path) else: runway_shapefile = None @@ -601,8 +603,8 @@ class AviationSector(Sector): trajectories_distr.reset_index(inplace=True) # HORIZONTAL DISTRIBUTION - aux_grid = self.grid_shp.to_crs(trajectories_distr.crs).reset_index() - # trajectories_distr.to_crs(self.grid_shp.crs, inplace=True) + aux_grid = self.grid.shapefile.to_crs(trajectories_distr.crs).reset_index() + # trajectories_distr.to_crs(self.grid.shapefile.crs, inplace=True) # duplicating each runway by involved cell trajectories_distr = gpd.sjoin(trajectories_distr, aux_grid, how="inner", op='intersects') # Adding cell geometry @@ -1015,7 +1017,7 @@ class AviationSector(Sector): self.logger.write_log('\t\tTrajectory emissions distributed (approach, climb out)', message_level=2) emissions = pd.concat([airport_emissions, runway_departure_emissions, trajectory_arrival_emissions, - trajectory_departure_emisions, runway_arrival_emissions]) + trajectory_departure_emisions, runway_arrival_emissions], sort=False) emissions = emissions.groupby(['FID', 'layer', 'tstep']).sum() runway_arrival_emissions_wear = runway_arrival_emissions_wear.groupby(['FID', 'layer', 'tstep']).sum() @@ -1024,11 +1026,11 @@ class AviationSector(Sector): emissions['nmvoc'] = 0.9 * emissions['hc'] emissions['ch4'] = 0.1 * emissions['hc'] - # Speceiation + # Speciation runway_arrival_emissions_wear = self.speciate(runway_arrival_emissions_wear, 'landing_wear') emissions = self.speciate(emissions, 'default') - emissions = pd.concat([emissions, runway_arrival_emissions_wear]) + emissions = pd.concat([emissions, runway_arrival_emissions_wear], sort=False) emissions = emissions[(emissions.T != 0).any()] emissions = emissions.groupby(['FID', 'layer', 'tstep']).sum() diff --git a/hermesv3_bu/sectors/livestock_sector.py b/hermesv3_bu/sectors/livestock_sector.py index 1d403cfe5f4abfd17a9f4e80d30938240633bb1a..51c4c69b247184216159ff05df20c8f9e90cda9b 100755 --- a/hermesv3_bu/sectors/livestock_sector.py +++ b/hermesv3_bu/sectors/livestock_sector.py @@ -12,6 +12,10 @@ from hermesv3_bu.sectors.sector import Sector from hermesv3_bu.io_server.io_shapefile import IoShapefile from hermesv3_bu.io_server.io_raster import IoRaster from hermesv3_bu.io_server.io_netcdf import IoNetcdf +from hermesv3_bu.grids.grid import Grid +from hermesv3_bu.tools.checker import check_files, error_exit + +from geopandas import GeoDataFrame # Constants for grassing daily factor estimation SIGMA = 60 @@ -22,7 +26,7 @@ class LivestockSector(Sector): """ Class that contains all the information and methods to calculate the livestock emissions. """ - def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, animal_list, gridded_livestock_path, correction_split_factors_path, temperature_dir, wind_speed_dir, denominator_yearly_factor_dir, ef_dir, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path, @@ -43,8 +47,8 @@ class LivestockSector(Sector): ['nox_no', 'nh3', 'nmvoc', 'pm10', 'pm25'] :type source_pollutants: list - :param grid_shp: Shapefile that contains the destination grid. It must contains the 'FID' (cell num). - :type grid_shp: GeoPandas.GeoDataframe + :param grid: Grid object. + :type grid: Grid :param clip: Clip. :type clip: Clip @@ -148,13 +152,26 @@ class LivestockSector(Sector): :type molecular_weights_path: str :param nut_shapefile_path: Path to the shapefile that contain the NUT polygons. The shapefile must contain - the 'ORDER07' information with the NUT_code. + the 'nuts3_id' information with the NUT_code. :type nut_shapefile_path: str """ spent_time = timeit.default_timer() logger.write_log('===== LIVESTOCK SECTOR =====') + + check_files( + [gridded_livestock_path.replace('', animal) for animal in animal_list] + + [correction_split_factors_path.replace('', animal) for animal in animal_list] + + [temperature_dir, wind_speed_dir, + denominator_yearly_factor_dir, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, + speciation_map_path, speciation_profiles_path, molecular_weights_path, nut_shapefile_path] + + [os.path.join(ef_dir, ef_file) for ef_file in + ['{0}.csv'.format(pol) for pol in source_pollutants if pol not in ['pm10', 'pm25']]]) + for pol in source_pollutants: + if pol in ['pm10', 'pm25']: + check_files(os.path.join(ef_dir, 'pm.csv')) + super(LivestockSector, self).__init__( - comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path) @@ -187,37 +204,32 @@ class LivestockSector(Sector): :type gridded_livestock_path: str :param nut_shapefile_path: Path to the shapefile that contain the NUT polygons. The shapefile must contain - the 'ORDER07' information with the NUT ID. + the 'nuts3_id' information with the NUT ID. :type nut_shapefile_path: str :param correction_split_factors_path: Path to the CSV file that contains the correction factors and the splitting factors to discretizise each animal into theirs different animal types. '' will be replaced by each animal of the animal list. - The CSV file must contain the following columns ["NUT", "nut_code", "_fact", "_01", ...] + The CSV file must contain the following columns ["nuts3_na", "nuts3_id", "_fact", "_01",...] "nut_code" column must contain the NUT ID. :type correction_split_factors_path: str - :return: GeoDataframe with the amount of each animal subtype by destiny cell (FID) + :return: GeoDataFrame with the amount of each animal subtype by destiny cell (FID) Columns: 'FID', 'cattle_01', 'cattle_02', 'cattle_03' 'cattle_04', 'cattle_05', 'cattle_06', 'cattle_07', 'cattle_08', 'cattle_09', 'cattle_10', 'cattle_11', 'chicken_01', 'chicken_02', 'goats_01', 'goats_02', 'goats_03', goats_04', 'goats_05', 'goats_06', 'pigs_01', 'pigs_02', 'pigs_03', 'pigs_04', 'pigs_05', 'pigs_06', 'pigs_07', 'pigs_08', 'pigs_09', 'pigs_10', 'timezone', 'geometry' - :rtype: geopandas.GeoDataframe + :rtype: GeoDataFrame """ spent_time = timeit.default_timer() self.logger.write_log('\tCreating animal distribution', message_level=2) - # Work for master MPI process - if self.comm.Get_rank() == 0: - animals_df = self.create_animals_shapefile(gridded_livestock_path) - animals_df = self.animal_distribution_by_category(animals_df, nut_shapefile_path, - correction_split_factors_path) - else: - animals_df = None - # Split distribution, in a balanced way, between MPI process - animals_df = IoShapefile(self.comm).split_shapefile(animals_df) + animals_df = self.create_animals_shapefile(gridded_livestock_path) + animals_df = self.animal_distribution_by_category(animals_df, nut_shapefile_path, + correction_split_factors_path) + self.logger.write_log('Animal distribution done', message_level=2) self.logger.write_time_log('LivestockSector', 'create_animals_distribution', timeit.default_timer() - spent_time) @@ -253,14 +265,14 @@ class LivestockSector(Sector): /livestock/animal_distribution//.shp Will be created also the clipped raster (TIFF) following the example path - /livestock/animal_distribution//_clip.tiff + /livestock/animal_distribution//_clip.tif :param gridded_livestock_path: Path to the Raster (TIFF) that contains the animal distribution. '' will be replaced by each animal of the animal list. :type gridded_livestock_path: str :return: Shapefile with the amount of each animal of the animal list in the source resolution. - :rtype: geopandas.GeoDataframe + :rtype: GeoDataFrame """ spent_time = timeit.default_timer() self.logger.write_log('\t\tCreating animal shapefile into source resolution', message_level=3) @@ -269,22 +281,22 @@ class LivestockSector(Sector): for animal in self.animal_list: self.logger.write_log('\t\t\t {0}'.format(animal), message_level=3) # Each one of the animal distributions will be stored separately - animal_distribution_path = os.path.join(self.auxiliary_dir, 'livestock', 'animal_distribution', animal, - '{0}.shp'.format(animal)) + animal_distribution_path = os.path.join(self.auxiliary_dir, 'livestock', animal, '{0}.shp'.format(animal)) if not os.path.exists(animal_distribution_path): # Create clipped raster file - clipped_raster_path = IoRaster(self.comm).clip_raster_with_shapefile_poly( - gridded_livestock_path.replace('', animal), self.clip.shapefile, - os.path.join(self.auxiliary_dir, 'livestock', 'animal_distribution', animal, - '{0}_clip.tiff'.format(animal))) - - animal_df = IoRaster(self.comm).to_shapefile_serie(clipped_raster_path, animal_distribution_path, - write=True) + clipped_raster_path = os.path.join( + self.auxiliary_dir, 'livestock', animal, '{0}_clip.tif'.format(animal)) + if self.comm.Get_rank() == 0: + clipped_raster_path = IoRaster(self.comm).clip_raster_with_shapefile_poly( + gridded_livestock_path.replace('', animal), self.clip.shapefile, clipped_raster_path) + + animal_df = IoRaster(self.comm).to_shapefile_parallel(clipped_raster_path) + animal_df.rename(columns={'data': animal}, inplace=True) + animal_df.set_index('CELL_ID', inplace=True) + IoShapefile(self.comm).write_shapefile_parallel(animal_df.reset_index(), animal_distribution_path) else: - animal_df = IoShapefile(self.comm).read_shapefile_serial(animal_distribution_path) - - animal_df.rename(columns={'data': animal}, inplace=True) - animal_df.set_index('CELL_ID', inplace=True) + animal_df = IoShapefile(self.comm).read_shapefile_parallel(animal_distribution_path) + animal_df.set_index('CELL_ID', inplace=True) # Creating full animal shapefile if animal_distribution is None: @@ -298,9 +310,9 @@ class LivestockSector(Sector): # Removing empty data animal_distribution = animal_distribution.loc[(animal_distribution[self.animal_list] != 0).any(axis=1), :] + self.logger.write_time_log('LivestockSector', 'create_animals_shapefile_src_resolution', timeit.default_timer() - spent_time) - return animal_distribution def animals_shapefile_to_dst_resolution(self, animal_distribution): @@ -308,21 +320,24 @@ class LivestockSector(Sector): Interpolates the source distribution into the destiny grid. :param animal_distribution: Animal distribution shapefile in the source resolution. - :type animal_distribution: geopandas.GeoDataframe + :type animal_distribution: GeoDataFrame :return: Animal distribution shapefile in the destiny resolution. - :rtype: geopandas.GeoDataframe + :rtype: GeoDataFrame """ spent_time = timeit.default_timer() self.logger.write_log('\t\tCreating animal shapefile into destiny resolution', message_level=3) - self.grid_shp.reset_index(inplace=True) + self.grid.shapefile.reset_index(inplace=True) + + animal_distribution = IoShapefile(self.comm).balance(animal_distribution) # Changing coordinates system to the grid one - animal_distribution.to_crs(self.grid_shp.crs, inplace=True) + animal_distribution.to_crs(self.grid.shapefile.crs, inplace=True) # Getting src area animal_distribution['src_inter_fraction'] = animal_distribution.geometry.area # Making the intersection between the src distribution and the destiny grid - animal_distribution = self.spatial_overlays(animal_distribution, self.grid_shp, how='intersection') + animal_distribution = self.spatial_overlays(animal_distribution.reset_index(), self.grid.shapefile, + how='intersection') # Getting proportion of intersection in the src cell (src_area/portion_area) animal_distribution['src_inter_fraction'] = \ animal_distribution.geometry.area / animal_distribution['src_inter_fraction'] @@ -332,11 +347,17 @@ class LivestockSector(Sector): # Sum by destiny cell animal_distribution = animal_distribution.loc[:, self.animal_list + ['FID']].groupby('FID').sum() - self.grid_shp.set_index('FID', drop=False, inplace=True) - # Adding geometry and coordinates system from the destiny grid shapefile - animal_distribution = gpd.GeoDataFrame(animal_distribution, crs=self.grid_shp.crs, - geometry=self.grid_shp.loc[animal_distribution.index, 'geometry']) - animal_distribution.reset_index(inplace=True) + animal_distribution = IoShapefile(self.comm).gather_shapefile(animal_distribution.reset_index()) + if self.comm.Get_rank() == 0: + animal_distribution = animal_distribution.groupby('FID').sum() + # Adding geometry and coordinates system from the destiny grid shapefile + animal_distribution = gpd.GeoDataFrame( + animal_distribution, crs=self.grid.shapefile.crs, + geometry=self.grid.shapefile.loc[animal_distribution.index, 'geometry']) + else: + animal_distribution = None + + animal_distribution = IoShapefile(self.comm).split_shapefile(animal_distribution) self.logger.write_time_log('LivestockSector', 'animals_shapefile_to_dst_resolution', timeit.default_timer() - spent_time) @@ -358,15 +379,15 @@ class LivestockSector(Sector): :return: """ spent_time = timeit.default_timer() - animal_distribution_path = os.path.join(self.auxiliary_dir, 'livestock', 'animal_distribution', - 'animal_distribution.shp') + animal_distribution_path = os.path.join(self.auxiliary_dir, 'livestock', 'animal_distribution') if not os.path.exists(animal_distribution_path): dataframe = self.create_animals_shapefile_src_resolution(gridded_livestock_path) dataframe = self.animals_shapefile_to_dst_resolution(dataframe) - IoShapefile(self.comm).write_shapefile_serial(dataframe, animal_distribution_path) + IoShapefile(self.comm).write_shapefile_parallel(dataframe.reset_index(), animal_distribution_path) else: - dataframe = IoShapefile(self.comm).read_shapefile_serial(animal_distribution_path) + dataframe = IoShapefile(self.comm).read_shapefile_parallel(animal_distribution_path) + dataframe.set_index('FID', inplace=True) self.logger.write_time_log('LivestockSector', 'create_animals_shapefile', timeit.default_timer() - spent_time) return dataframe @@ -382,7 +403,7 @@ class LivestockSector(Sector): splitting factors to discretizise each animal into theirs different animal types. '' will be replaced by each animal of the animal list. - The CSV file must contain the following columns ["NUT", "nut_code", "_fact", "_01", ...] + The CSV file must contain the following columns ["nuts3_na", "nuts3_id", "_fact", "_01",...] "nut_code" column must contain the NUT ID. :type correction_split_factors_path: str @@ -393,40 +414,41 @@ class LivestockSector(Sector): splitting_factors_list = [] for animal in self.animal_list: correction_split_factors = pd.read_csv(correction_split_factors_path.replace('', animal)) - correction_split_factors.set_index('nut_code', inplace=True) + correction_split_factors.set_index('nuts3_id', inplace=True) categories = list(correction_split_factors.columns.values) - categories = [e for e in categories if e not in ['NUT', 'nut_code', '{0}_fact'.format(animal)]] + categories = [e for e in categories if e not in ['nuts3_na', 'nuts3_id', '{0}_fact'.format(animal)]] correction_split_factors[categories] = correction_split_factors.loc[:, categories].multiply( correction_split_factors['{0}_fact'.format(animal)], axis='index') - correction_split_factors.drop(columns=['NUT', '{0}_fact'.format(animal)], inplace=True) + + correction_split_factors.drop(columns=['nuts3_na', '{0}_fact'.format(animal)], inplace=True) splitting_factors_list.append(correction_split_factors) splitting_factors = pd.concat(splitting_factors_list, axis=1) splitting_factors.reset_index(inplace=True) - splitting_factors['nut_code'] = splitting_factors['nut_code'].astype(np.int16) + splitting_factors['nuts3_id'] = splitting_factors['nuts3_id'].astype(np.int16) self.logger.write_time_log('LivestockSector', 'get_splitting_factors', timeit.default_timer() - spent_time) return splitting_factors - def animal_distribution_by_category(self, dataframe, nut_shapefile_path, correction_split_factors_path): + def animal_distribution_by_category(self, animal_distribution, nut_shapefile_path, correction_split_factors_path): """ Split the animal categories into as many categories as each animal type has. - :param dataframe: GeoDataframe with the animal distribution by animal type. - :type dataframe: geopandas.GeoDataframe + :param animal_distribution: GeoDataFrame with the animal distribution by animal type. + :type animal_distribution: GeoDataFrame :param nut_shapefile_path: Path to the shapefile that contain the NUT polygons. The shapefile must contain - the 'ORDER07' information with the NUT_code. + the 'nuts3_id' information with the NUT_code. :type nut_shapefile_path: str :param correction_split_factors_path: Path to the CSV file that contains the correction factors and the splitting factors to discretizise each animal into theirs different animal types. '' will be replaced by each animal of the animal list. - The CSV file must contain the following columns ["NUT", "nut_code", "_fact", "_01", + The CSV file must contain the following columns ["nuts3_na", "nuts3_id", "_fact", "_01", ...] - "nut_code" column must contain the NUT ID + "nuts3_id" column must contain the NUT ID :type correction_split_factors_path: str :return: GeoDataframe with the amount of each animal subtype by destiny cell (FID) @@ -435,36 +457,44 @@ class LivestockSector(Sector): 'cattle_08', 'cattle_09', 'cattle_10', 'cattle_11', 'chicken_01', 'chicken_02', 'goats_01', 'goats_02', 'goats_03', goats_04', 'goats_05', 'goats_06', 'pigs_01', 'pigs_02', 'pigs_03', 'pigs_04', 'pigs_05', 'pigs_06', 'pigs_07', 'pigs_08', 'pigs_09', 'pigs_10', 'timezone', 'geometry' - :rtype: geopandas.GeoDataframe + :rtype: GeoDataFrame """ spent_time = timeit.default_timer() - animal_distribution_path = os.path.join(self.auxiliary_dir, 'livestock', 'animal_distribution', - 'animal_distribution_by_cat.shp') + animal_distribution_path = os.path.join(self.auxiliary_dir, 'livestock', 'animal_distribution_by_cat') if not os.path.exists(animal_distribution_path): - dataframe = self.add_nut_code(dataframe, nut_shapefile_path, nut_value='ORDER07') + animal_distribution = self.add_nut_code(animal_distribution.reset_index(), nut_shapefile_path, + nut_value='nuts3_id') + animal_distribution.rename(columns={'nut_code': 'nuts3_id'}, inplace=True) + animal_distribution = animal_distribution[animal_distribution['nuts3_id'] != -999] + animal_distribution = IoShapefile(self.comm).balance(animal_distribution) + animal_distribution.set_index('FID', inplace=True) splitting_factors = self.get_splitting_factors(correction_split_factors_path) # Adding the splitting factors by NUT code - dataframe = pd.merge(dataframe, splitting_factors, how='left', on='nut_code') - - dataframe.drop(columns=['nut_code'], inplace=True) + animal_distribution = pd.merge(animal_distribution.reset_index(), splitting_factors, how='left', + on='nuts3_id') + animal_distribution.set_index('FID', inplace=True) + animal_distribution.drop(columns=['nuts3_id'], inplace=True) for animal in self.animal_list: - animal_types = [i for i in list(dataframe.columns.values) if i.startswith(animal)] - dataframe.loc[:, animal_types] = dataframe.loc[:, animal_types].multiply(dataframe[animal], - axis='index') - dataframe.drop(columns=[animal], inplace=True) + animal_types = [i for i in list(animal_distribution.columns.values) if i.startswith(animal)] + animal_distribution.loc[:, animal_types] = animal_distribution.loc[:, animal_types].multiply( + animal_distribution[animal], axis='index') + animal_distribution.drop(columns=[animal], inplace=True) + + animal_distribution = self.add_timezone(animal_distribution) + animal_distribution.set_index('FID', inplace=True) - dataframe = self.add_timezone(dataframe) - IoShapefile(self.comm).write_shapefile_serial(dataframe, animal_distribution_path) + IoShapefile(self.comm).write_shapefile_parallel(animal_distribution.reset_index(), animal_distribution_path) else: - dataframe = IoShapefile(self.comm).read_shapefile_serial(animal_distribution_path) + animal_distribution = IoShapefile(self.comm).read_shapefile_parallel(animal_distribution_path) + animal_distribution.set_index('FID', inplace=True) self.logger.write_time_log('LivestockSector', 'animal_distribution_by_category', timeit.default_timer() - spent_time) - return dataframe + return animal_distribution def get_daily_factors(self, animal_shp, day): """ @@ -484,13 +514,13 @@ class LivestockSector(Sector): 'cattle_08', 'cattle_09', 'cattle_10', 'cattle_11', 'chicken_01', 'chicken_02', 'goats_01', 'goats_02', 'goats_03', goats_04', 'goats_05', 'goats_06', 'pigs_01', 'pigs_02', 'pigs_03', 'pigs_04', 'pigs_05', 'pigs_06', 'pigs_07', 'pigs_08', 'pigs_09', 'pigs_10', 'timezone', 'geometry' - :type animal_shp: geopandas.GeoDataframe + :type animal_shp: GeoDataFrame :param day: Date of the day to generate. :type day: datetime.date :return: Shapefile with the daily factors. - :rtype: geopandas.GeoDataframe + :rtype: GeoDataFrame """ import math spent_time = timeit.default_timer() @@ -645,15 +675,15 @@ class LivestockSector(Sector): 'cattle_08', 'cattle_09', 'cattle_10', 'cattle_11', 'chicken_01', 'chicken_02', 'goats_01', 'goats_02', 'goats_03', goats_04', 'goats_05', 'goats_06', 'pigs_01', 'pigs_02', 'pigs_03', 'pigs_04', 'pigs_05', 'pigs_06', 'pigs_07', 'pigs_08', 'pigs_09', 'pigs_10', 'timezone', 'geometry' - :type animals_df: geopandas.GeoDataframe + :type animals_df: GeoDataFrame :param daily_factors: GeoDataframe with the daily factors. Columns: 'REC', 'geometry', 'FD_housing_open', 'FD_housing_closed, 'FD_storage', 'FD_grassing' - :type daily_factors: geopandas.GeoDataframe + :type daily_factors: GeoDataFrame :return: Animal distribution with the daily factors. - :rtype: geopandas.GeoDataframe + :rtype: GeoDataFrame """ spent_time = timeit.default_timer() animals_df = animals_df.to_crs({'init': 'epsg:4326'}) @@ -675,21 +705,23 @@ class LivestockSector(Sector): """ Calculate the emissions, already speciated, corresponding to the given day. - :param animals_df: GeoDataframe with the amount of each animal subtype by destiny cell (FID) + :param animals_df: GeoDataFrame with the amount of each animal subtype by destiny cell (FID) Columns: 'FID', 'cattle_01', 'cattle_02', 'cattle_03' 'cattle_04', 'cattle_05', 'cattle_06', 'cattle_07', 'cattle_08', 'cattle_09', 'cattle_10', 'cattle_11', 'chicken_01', 'chicken_02', 'goats_01', 'goats_02', 'goats_03', goats_04', 'goats_05', 'goats_06', 'pigs_01', 'pigs_02', 'pigs_03', 'pigs_04', 'pigs_05', 'pigs_06', 'pigs_07', 'pigs_08', 'pigs_09', 'pigs_10', 'timezone', 'geometry' - :type animals_df: geopandas.GeoDataframe + :type animals_df: GeoDataFrame :param day: Date of the day to generate. :type day: datetime.date - :return: GeoDataframe with the daily emissions by destiny cell. - :rtype: geopandas.GeoDataframe + :return: GeoDataFrame with the daily emissions by destiny cell. + :rtype: GeoDataFrame """ spent_time = timeit.default_timer() + + animals_df.reset_index(inplace=True) daily_factors = self.get_daily_factors(animals_df.loc[:, ['FID', 'geometry']], day) animals_df = self.add_daily_factors_to_animal_distribution(animals_df, daily_factors) @@ -719,7 +751,7 @@ class LivestockSector(Sector): (animals_df[animal['Code']] * animals_df['FD_housing_closed']).multiply( animal['EF_housing']) else: - raise KeyError('Animal {0} not found on the nh3 emission factors file.'.format(animal.Code)) + error_exit('Animal {0} not found on the nh3 emission factors file.'.format(animal.Code)) # Storage emissions out_df.loc[:, out_p] += \ (animals_df[animal['Code']] * animals_df['FD_storage']).multiply(animal['EF_yarding']) @@ -794,7 +826,7 @@ class LivestockSector(Sector): out_df.loc[:, out_p] = out_df.loc[:, out_p].multiply(1000. * (1. / self.molecular_weights['pm10'])) # Preparing PM10 for PMC - if 'pmc' in [x.lower() for x in self.speciation_map.iterkeys()]: + if 'pmc' in [x.lower() for x in self.speciation_map.keys()]: out_df['aux_pm10'] = 0 for i, animal in pd.read_csv(os.path.join(self.ef_dir, 'pm.csv')).iterrows(): # Iterating by animal subtype @@ -846,7 +878,7 @@ class LivestockSector(Sector): out_df.loc[:, out_p] = out_df.loc[:, out_p].multiply(1000. * (1. / self.molecular_weights['pm25'])) # Preparing PM2.5 for PMC - if 'pmc' in [x.lower() for x in self.speciation_map.iterkeys()]: + if 'pmc' in [x.lower() for x in self.speciation_map.keys()]: out_df['aux_pm25'] = 0 for i, animal in pd.read_csv(os.path.join(self.ef_dir, 'pm.csv')).iterrows(): if animal.Code.startswith(tuple(self.animal_list)): @@ -890,7 +922,7 @@ class LivestockSector(Sector): (30. / 14.) * 1000. * (1. / self.molecular_weights['nox_no'])) # ===== PMC ===== - if 'pmc' in [x.lower() for x in self.speciation_map.iterkeys()]: + if 'pmc' in [x.lower() for x in self.speciation_map.keys()]: pmc_name = 'PMC' self.logger.write_log('\t\t\tCalculating {0} emissions'.format(pmc_name), message_level=3) if all(x in [x.lower() for x in self.source_pollutants] for x in ['pm10', 'pm25']): @@ -921,7 +953,7 @@ class LivestockSector(Sector): 'cattle_08', 'cattle_09', 'cattle_10', 'cattle_11', 'chicken_01', 'chicken_02', 'goats_01', 'goats_02', 'goats_03', goats_04', 'goats_05', 'goats_06', 'pigs_01', 'pigs_02', 'pigs_03', 'pigs_04', 'pigs_05', 'pigs_06', 'pigs_07', 'pigs_08', 'pigs_09', 'pigs_10', 'timezone', 'geometry' - :type animals_df: geopandas.GeoDataframe + :type animals_df: GeoDataFrame :return: Dictionary with the day as key (same key as self.day_dict) and the daily emissions as value. :rtype: dict @@ -943,7 +975,7 @@ class LivestockSector(Sector): :type df_by_day: dict :return: GeoDataframe with all the time steps (each time step have the daily emission) - :rtype: geopandas.GeoDataframe + :rtype: GeoDataFrame """ spent_time = timeit.default_timer() df_list = [] @@ -970,7 +1002,7 @@ class LivestockSector(Sector): :type dict_by_day: dict :return: GeoDataframe with the hourly distribution. - :rtype: geopandas.GeoDataframe + :rtype: GeoDataFrame """ spent_time = timeit.default_timer() @@ -1024,7 +1056,7 @@ class LivestockSector(Sector): Calculate the livestock emissions hourly distributed. :return: GeoDataframe with all the emissions. - :rtype: geopandas.GeoDataframe + :rtype: GeoDataFrame """ spent_time = timeit.default_timer() self.logger.write_log('\tCalculating emissions') diff --git a/hermesv3_bu/sectors/point_source_sector.py b/hermesv3_bu/sectors/point_source_sector.py index d313d068f532ec6b7113f1864c3e3f1b9a40144b..2b7d1b010dd7aefd4d126d10f2506ff1ceb43f83 100755 --- a/hermesv3_bu/sectors/point_source_sector.py +++ b/hermesv3_bu/sectors/point_source_sector.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +import sys import os import timeit import numpy as np @@ -10,6 +11,7 @@ from hermesv3_bu.sectors.sector import Sector from hermesv3_bu.io_server.io_shapefile import IoShapefile # from hermesv3_bu.io_server.io_netcdf import IoNetcdf from hermesv3_bu.logger.log import Log +from hermesv3_bu.tools.checker import check_files, error_exit INTERPOLATION_TYPE = 'linear' # GRAVITI m/s-2 @@ -22,8 +24,8 @@ class PointSourceSector(Sector): """ Class to calculate the Point Source emissions - :param grid_shp: Grid of the destination domain - :type grid_shp: Grid + :param grid: Grid of the destination domain + :type grid: Grid :param catalog_path: Path to the fine that contains all the information for each point source. :type catalog_path: str @@ -46,27 +48,60 @@ class PointSourceSector(Sector): :param sector_list: List os sectors (SNAPS) to take into account. 01, 03, 04, 09 :type sector_list: list """ - def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, catalog_path, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, sector_list, measured_emission_path, molecular_weights_path, plume_rise=False, plume_rise_pahts=None): spent_time = timeit.default_timer() - + logger.write_log('===== POINT SOURCES SECTOR =====') + check_files( + [catalog_path, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path]) super(PointSourceSector, self).__init__( - comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path) self.plume_rise = plume_rise - self.catalog = self.read_catalog(catalog_path, sector_list) - + self.catalog = self.read_catalog_shapefile(catalog_path, sector_list) + self.check_catalog() self.catalog_measured = self.read_catalog_for_measured_emissions(catalog_path, sector_list) self.measured_path = measured_emission_path self.plume_rise_pahts = plume_rise_pahts self.logger.write_time_log('PointSourceSector', '__init__', timeit.default_timer() - spent_time) - def read_catalog(self, catalog_path, sector_list): + def check_catalog(self): + # Checking monthly profiles IDs + links_month = set(np.unique(self.catalog['P_month'].dropna().values)) + month = set(self.monthly_profiles.index.values) + month_res = links_month - month + if len(month_res) > 0: + error_exit("The following monthly profile IDs reported in the point sources shapefile do not appear " + + "in the monthly profiles file. {0}".format(month_res)) + # Checking weekly profiles IDs + links_week = set(np.unique(self.catalog['P_week'].dropna().values)) + week = set(self.weekly_profiles.index.values) + week_res = links_week - week + if len(week_res) > 0: + error_exit("The following weekly profile IDs reported in the point sources shapefile do not appear " + + "in the weekly profiles file. {0}".format(week_res)) + # Checking hourly profiles IDs + links_hour = set(np.unique(self.catalog['P_hour'].dropna().values)) + hour = set(self.hourly_profiles.index.values) + hour_res = links_hour - hour + if len(hour_res) > 0: + error_exit("The following hourly profile IDs reported in the point sources shapefile do not appear " + + "in the hourly profiles file. {0}".format(hour_res)) + # Checking specly profiles IDs + links_spec = set(np.unique(self.catalog['P_spec'].dropna().values)) + spec = set(self.speciation_profile.index.values) + spec_res = links_spec - spec + if len(spec_res) > 0: + error_exit("The following speciation profile IDs reported in the point sources shapefile do not appear " + + "in the speciation profiles file. {0}".format(spec_res)) + + def read_catalog_csv(self, catalog_path, sector_list): """ Read the catalog @@ -121,7 +156,64 @@ class PointSourceSector(Sector): self.logger.write_time_log('PointSourceSector', 'read_catalog', timeit.default_timer() - spent_time) return catalog_df - def read_catalog_for_measured_emissions(self, catalog_path, sector_list): + def read_catalog_shapefile(self, catalog_path, sector_list): + """ + Read the catalog + + :param catalog_path: path to the catalog + :type catalog_path: str + + :param sector_list: List of sectors to take into account + :type sector_list: list + + :return: catalog + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + if self.comm.Get_rank() == 0: + if self.plume_rise: + columns = {"Code": np.str, "Cons": np.bool, "SNAP": np.str, "Height": np.float64, + "Diameter": np.float64, "Speed": np.float64, "Temp": np.float64, "AF": np.float64, + "P_month": np.str, "P_week": np.str, "P_hour": np.str, "P_spec": np.str} + else: + columns = {"Code": np.str, "Cons": np.bool, "SNAP": np.str, "Height": np.float64, "AF": np.float64, + "P_month": np.str, "P_week": np.str, "P_hour": np.str, "P_spec": np.str} + for pollutant in self.source_pollutants: + # EF in Kg / Activity factor + columns['EF_{0}'.format(pollutant)] = np.float64 + + catalog_df = gpd.read_file(catalog_path) + + columns_to_drop = list(set(catalog_df.columns.values) - set(list(columns.keys()) + ['geometry'])) + + if len(columns_to_drop) > 0: + catalog_df.drop(columns=columns_to_drop, inplace=True) + for col, typ in columns.items(): + catalog_df[col] = catalog_df[col].astype(typ) + + # Filtering + catalog_df = catalog_df.loc[catalog_df['Cons'] == 1, :] + catalog_df.drop('Cons', axis=1, inplace=True) + + # Filtering + catalog_df = catalog_df.loc[catalog_df['AF'] != -1, :] + + if sector_list is not None: + catalog_df = catalog_df.loc[catalog_df['SNAP'].str[:2].isin(sector_list)] + catalog_df.drop('SNAP', axis=1, inplace=True) + + catalog_df = gpd.sjoin(catalog_df, self.clip.shapefile.to_crs(catalog_df.crs), how='inner') + catalog_df.drop(columns=['index_right'], inplace=True) + + else: + catalog_df = None + self.comm.Barrier() + catalog_df = IoShapefile(self.comm).split_shapefile(catalog_df) + self.logger.write_time_log('PointSourceSector', 'read_catalog', timeit.default_timer() - spent_time) + return catalog_df + + def read_catalog_for_measured_emissions_csv(self, catalog_path, sector_list): """ Read the catalog @@ -164,6 +256,56 @@ class PointSourceSector(Sector): timeit.default_timer() - spent_time) return catalog_df + def read_catalog_for_measured_emissions(self, catalog_path, sector_list): + """ + Read the catalog + + :param catalog_path: path to the catalog + :type catalog_path: str + + :param sector_list: List of sectors to take into account + :type sector_list: list + + :return: catalog + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + if self.plume_rise: + columns = {"Code": np.str, "Cons": np.bool, "SNAP": np.str, "Lon": np.float64, "Lat": np.float64, + "Height": np.float64, "Diameter": np.float64, "Speed": np.float64, "Temp": np.float64, + "AF": np.float64, "P_spec": np.str} + else: + columns = {"Code": np.str, "Cons": np.bool, "SNAP": np.str, "Lon": np.float64, "Lat": np.float64, + "Height": np.float64, "AF": np.float64, "P_spec": np.str} + # for pollutant in self.pollutant_list: + # columns['EF_{0}'.format(pollutant)] = settings.precision + + catalog_df = gpd.read_file(catalog_path) + + columns_to_drop = list(set(catalog_df.columns.values) - set(list(columns.keys()) + ['geometry'])) + + if len(columns_to_drop) > 0: + catalog_df.drop(columns=columns_to_drop, inplace=True) + for col, typ in columns.items(): + catalog_df[col] = catalog_df[col].astype(typ) + + # Filtering + catalog_df = catalog_df.loc[catalog_df['Cons'] == 1, :] + catalog_df.drop('Cons', axis=1, inplace=True) + + # Filtering + catalog_df = catalog_df.loc[catalog_df['AF'] == -1, :] + catalog_df.drop('AF', axis=1, inplace=True) + + if sector_list is not None: + catalog_df = catalog_df.loc[catalog_df['SNAP'].str[:2].isin(sector_list)] + catalog_df.drop('SNAP', axis=1, inplace=True) + + self.logger.write_time_log('PointSourceSector', 'read_catalog_for_measured_emissions', + timeit.default_timer() - spent_time) + return catalog_df + def to_geodataframe(self, catalog): """ Convert a simple DataFrame with Lat, Lon columns into a GeoDataFrame as a shape @@ -277,8 +419,9 @@ class PointSourceSector(Sector): timeit.default_timer() - spent_time) return catalog - @staticmethod - def get_meteo_xy(dataframe, netcdf_path): + def get_meteo_xy(self, dataframe, netcdf_path): + spent_time = timeit.default_timer() + def nearest(row, geom_union, df1, df2, geom1_col='geometry', geom2_col='geometry', src_column=None): """Finds the nearest point and return the corresponding value from specified column. https://automating-gis-processes.github.io/2017/lessons/L3/nearest-neighbour.html @@ -295,16 +438,19 @@ class PointSourceSector(Sector): import numpy as np import pandas as pd import geopandas as gpd - + check_files(netcdf_path) nc = Dataset(netcdf_path, mode='r') - lats = nc.variables['lat'][:] - lons = nc.variables['lon'][:] + try: + lats = nc.variables['lat'][:] + lons = nc.variables['lon'][:] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), netcdf_path)) x = np.array([np.arange(lats.shape[1])] * lats.shape[0]) y = np.array([np.arange(lats.shape[0]).T] * lats.shape[1]).T nc_dataframe = pd.DataFrame.from_dict({'X': x.flatten(), 'Y': y.flatten()}) nc_dataframe = gpd.GeoDataFrame(nc_dataframe, - geometry=[Point(xy) for xy in zip(lons.flatten(), lats.flatten())], + geometry=[Point(xy) for xy in list(zip(lons.flatten(), lats.flatten()))], crs={'init': 'epsg:4326'}) nc_dataframe['index'] = nc_dataframe.index @@ -315,6 +461,7 @@ class PointSourceSector(Sector): dataframe['X'] = nc_dataframe.loc[dataframe['meteo_index'], 'X'].values dataframe['Y'] = nc_dataframe.loc[dataframe['meteo_index'], 'Y'].values + self.logger.write_time_log('PointSourceSector', 'get_meteo_xy', timeit.default_timer() - spent_time) return dataframe[['X', 'Y']] def get_plumerise_meteo(self, catalog): @@ -322,16 +469,23 @@ class PointSourceSector(Sector): from netCDF4 import Dataset, num2date nc_path = os.path.join(dir_path, '{0}_{1}.nc'.format(var_name, dataframe.name.replace(hour=0).strftime("%Y%m%d%H"))) + check_files(nc_path) netcdf = Dataset(nc_path, mode='r') # time_index - time = netcdf.variables['time'] + try: + time = netcdf.variables['time'] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), nc_path)) nc_times = [x.replace(minute=0, second=0, microsecond=0) for x in num2date(time[:], time.units, time.calendar)] time_index = nc_times.index(dataframe.name.to_pydatetime().replace(tzinfo=None)) - var = netcdf.variables[var_name][time_index, 0, :] + try: + var = netcdf.variables[var_name][time_index, 0, :] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), nc_path)) netcdf.close() - dataframe[var_name] = var[dataframe['X'], dataframe['Y']] + dataframe[var_name] = var[dataframe['Y'], dataframe['X']] return dataframe[[var_name]] @@ -339,16 +493,23 @@ class PointSourceSector(Sector): from netCDF4 import Dataset, num2date nc_path = os.path.join(dir_path, '{0}_{1}.nc'.format(var_name, dataframe.name.replace(hour=0).strftime("%Y%m%d%H"))) + check_files(nc_path) netcdf = Dataset(nc_path, mode='r') # time_index - time = netcdf.variables['time'] + try: + time = netcdf.variables['time'] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), nc_path)) nc_times = [x.replace(minute=0, second=0, microsecond=0) for x in num2date(time[:], time.units, time.calendar)] time_index = nc_times.index(dataframe.name.to_pydatetime().replace(tzinfo=None)) - var = np.flipud(netcdf.variables[var_name][time_index, :, :, :]) + try: + var = np.flipud(netcdf.variables[var_name][time_index, :, :, :]) + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), nc_path)) netcdf.close() - var = var[:, dataframe['X'], dataframe['Y']] + var = var[:, dataframe['Y'], dataframe['X']] pre_t_lay = 0 lay_list = [] @@ -370,16 +531,23 @@ class PointSourceSector(Sector): nc_path = os.path.join(dir_path, '{0}_{1}.nc'.format(var_name, dataframe.name.replace(hour=0).strftime("%Y%m%d%H"))) + check_files(nc_path) netcdf = Dataset(nc_path, mode='r') # time_index - time = netcdf.variables['time'] + try: + time = netcdf.variables['time'] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), nc_path)) nc_times = [x.replace(minute=0, second=0, microsecond=0) for x in num2date(time[:], time.units, time.calendar)] time_index = nc_times.index(dataframe.name.to_pydatetime().replace(tzinfo=None)) - var = np.flipud(netcdf.variables[var_name][time_index, :, :, :]) + try: + var = np.flipud(netcdf.variables[var_name][time_index, :, :, :]) + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), nc_path)) netcdf.close() - var = var[:, dataframe['X'], dataframe['Y']] + var = var[:, dataframe['Y'], dataframe['X']] lay_list = ['temp_sfc'] for i, t_lay in enumerate(var): @@ -403,25 +571,36 @@ class PointSourceSector(Sector): # === u10 === u10_nc_path = os.path.join( u_dir_path, '{0}_{1}.nc'.format(u_var_name, dataframe.name.replace(hour=0).strftime("%Y%m%d%H"))) + check_files(u10_nc_path) u10_netcdf = Dataset(u10_nc_path, mode='r') # time_index - time = u10_netcdf.variables['time'] + try: + time = u10_netcdf.variables['time'] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), u10_nc_path)) nc_times = [x.replace(minute=0, second=0, microsecond=0) for x in num2date(time[:], time.units, time.calendar)] time_index = nc_times.index(dataframe.name.to_pydatetime().replace(tzinfo=None)) - var = u10_netcdf.variables[u_var_name][time_index, 0, :] + try: + var = u10_netcdf.variables[u_var_name][time_index, 0, :] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), u10_nc_path)) u10_netcdf.close() - dataframe['u10'] = var[dataframe['X'], dataframe['Y']] + dataframe['u10'] = var[dataframe['Y'], dataframe['X']] # === v10 === v10_nc_path = os.path.join( v_dir_path, '{0}_{1}.nc'.format(v_var_name, dataframe.name.replace(hour=0).strftime("%Y%m%d%H"))) + check_files(v10_nc_path) v10_netcdf = Dataset(v10_nc_path, mode='r') - var = v10_netcdf.variables[v_var_name][time_index, 0, :] + try: + var = v10_netcdf.variables[v_var_name][time_index, 0, :] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), v10_nc_path)) v10_netcdf.close() - dataframe['v10'] = var[dataframe['X'], dataframe['Y']] + dataframe['v10'] = var[dataframe['Y'], dataframe['X']] # === wind speed === dataframe['wSpeed_10'] = np.linalg.norm(dataframe[['u10', 'v10']].values, axis=1) @@ -434,16 +613,24 @@ class PointSourceSector(Sector): # === u10 === u10_nc_path = os.path.join( u_dir_path, '{0}_{1}.nc'.format(u_var_name, dataframe.name.replace(hour=0).strftime("%Y%m%d%H"))) + check_files(u10_nc_path) u10_netcdf = Dataset(u10_nc_path, mode='r') # time_index - time = u10_netcdf.variables['time'] + try: + time = u10_netcdf.variables['time'] + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), u10_nc_path)) nc_times = [x.replace(minute=0, second=0, microsecond=0) for x in num2date(time[:], time.units, time.calendar)] time_index = nc_times.index(dataframe.name.to_pydatetime().replace(tzinfo=None)) - var = np.flipud(u10_netcdf.variables[u_var_name][time_index, :, :, :]) + try: + var = np.flipud(u10_netcdf.variables[u_var_name][time_index, :, :, :]) + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), u10_nc_path)) + u10_netcdf.close() - var = var[:, dataframe['X'], dataframe['Y']] + var = var[:, dataframe['Y'], dataframe['X']] for i, t_lay in enumerate(var): dataframe['u_{0}'.format(i)] = t_lay @@ -451,11 +638,15 @@ class PointSourceSector(Sector): # === v10 === v10_nc_path = os.path.join( v_dir_path, '{0}_{1}.nc'.format(v_var_name, dataframe.name.replace(hour=0).strftime("%Y%m%d%H"))) + check_files(v10_nc_path) v10_netcdf = Dataset(v10_nc_path, mode='r') - var = np.flipud(v10_netcdf.variables[v_var_name][time_index, :, :, :]) + try: + var = np.flipud(v10_netcdf.variables[v_var_name][time_index, :, :, :]) + except KeyError as e: + error_exit("{0} variable not found in {1} file.".format(str(e), v10_nc_path)) v10_netcdf.close() - var = var[:, dataframe['X'], dataframe['Y']] + var = var[:, dataframe['Y'], dataframe['X']] ws_lay_list = ['wSpeed_10'] for i, t_lay in enumerate(var): @@ -475,7 +666,7 @@ class PointSourceSector(Sector): # TODO Use IoNetCDF spent_time = timeit.default_timer() - # Adding meteo X, Y array index to the catalog + meteo_xy = self.get_meteo_xy(catalog.groupby('Code').first(), os.path.join( self.plume_rise_pahts['temperature_sfc_dir'], 't2_{0}.nc'.format(self.date_array[0].replace(hour=0).strftime("%Y%m%d%H")))) @@ -525,7 +716,7 @@ class PointSourceSector(Sector): def get_plume_rise_top_bot(self, catalog): spent_time = timeit.default_timer() - catalog = self.get_plumerise_meteo(catalog) + catalog = self.get_plumerise_meteo(catalog).reset_index() # Step 1: Bouyancy flux catalog.loc[catalog['Temp'] <= catalog['temp_top'], 'Fb'] = 0 @@ -539,7 +730,7 @@ class PointSourceSector(Sector): 0.047 / catalog['temp_top']) # Step 3: Plume thickness - catalog.reset_index(inplace=True) + # catalog.reset_index(inplace=True) neutral_atm = (catalog['obukhov_len'] > 2. * catalog['Height']) | ( catalog['obukhov_len'] < -0.25 * catalog['Height']) stable_atm = ((catalog['obukhov_len'] > 0) & (catalog['obukhov_len'] < 2 * catalog['Height'])) | ( @@ -673,7 +864,7 @@ class PointSourceSector(Sector): try: test.set_index(x.index, inplace=True) except ValueError: - raise IOError('No measured emissions for the selected dates: {0}'.format(x.values)) + error_exit('No measured emissions for the selected dates: {0}'.format(x.values)) return test[pollutant] @@ -690,7 +881,7 @@ class PointSourceSector(Sector): catalog = None else: catalog = self.to_geodataframe(catalog) - catalog = self.add_dates(catalog) + catalog = self.add_dates(catalog, drop_utc=False) catalog = self.add_measured_emissions(catalog) catalog.set_index(['Code', 'tstep'], inplace=True) @@ -701,7 +892,8 @@ class PointSourceSector(Sector): def merge_catalogs(self, catalog_list): spent_time = timeit.default_timer() - catalog = pd.concat(catalog_list) + catalog = pd.concat(catalog_list).reset_index() + catalog.set_index(['Code', 'tstep'], inplace=True) self.logger.write_time_log('PointSourceSector', 'merge_catalogs', timeit.default_timer() - spent_time) return catalog @@ -747,9 +939,9 @@ class PointSourceSector(Sector): def point_source_to_fid(self, catalog): catalog.reset_index(inplace=True) - catalog = catalog.to_crs(self.grid_shp.crs) + catalog = catalog.to_crs(self.grid.shapefile.crs) - catalog = gpd.sjoin(catalog, self.grid_shp.reset_index(), how="inner", op='intersects') + catalog = gpd.sjoin(catalog, self.grid.shapefile.reset_index(), how="inner", op='intersects') # Drops duplicates when the point source is on the boundary of the cell catalog = catalog[~catalog.index.duplicated(keep='first')] diff --git a/hermesv3_bu/sectors/recreational_boats_sector.py b/hermesv3_bu/sectors/recreational_boats_sector.py index 59177fcfa3cf83a97ce0f5c2d911fd443383eb79..4b7c5c7cce7ecf40c9b78cdfc762d7e9583f0b81 100755 --- a/hermesv3_bu/sectors/recreational_boats_sector.py +++ b/hermesv3_bu/sectors/recreational_boats_sector.py @@ -10,18 +10,21 @@ import geopandas as gpd from hermesv3_bu.sectors.sector import Sector from hermesv3_bu.io_server.io_shapefile import IoShapefile from hermesv3_bu.io_server.io_raster import IoRaster -from hermesv3_bu.logger.log import Log +from hermesv3_bu.tools.checker import check_files class RecreationalBoatsSector(Sector): - def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, boat_list, density_map_path, boats_data_path, ef_file_path, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path): spent_time = timeit.default_timer() - + logger.write_log('===== RECREATIONAL BOATS SECTOR =====') + check_files( + [density_map_path, boats_data_path, ef_file_path, monthly_profiles_path, weekly_profiles_path, + hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path]) super(RecreationalBoatsSector, self).__init__( - comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path) @@ -42,9 +45,9 @@ class RecreationalBoatsSector(Sector): src_density_map = IoRaster(self.comm).to_shapefile_serie(density_map_path, nodata=0) src_density_map = src_density_map.loc[src_density_map['data'] > 0] src_density_map['data'] = src_density_map['data'] / src_density_map['data'].sum() - src_density_map.to_crs(self.grid_shp.crs, inplace=True) + src_density_map.to_crs(self.grid.shapefile.crs, inplace=True) src_density_map['src_inter_fraction'] = src_density_map.area - src_density_map = self.spatial_overlays(src_density_map, self.grid_shp.reset_index(), + src_density_map = self.spatial_overlays(src_density_map, self.grid.shapefile.reset_index(), how='intersection') src_density_map['src_inter_fraction'] = src_density_map.area / src_density_map['src_inter_fraction'] @@ -52,8 +55,8 @@ class RecreationalBoatsSector(Sector): axis="index") src_density_map = src_density_map.loc[:, ['FID', 'data']].groupby('FID').sum() - src_density_map = gpd.GeoDataFrame(src_density_map, crs=self.grid_shp.crs, - geometry=self.grid_shp.loc[src_density_map.index, 'geometry']) + src_density_map = gpd.GeoDataFrame(src_density_map, crs=self.grid.shapefile.crs, + geometry=self.grid.shapefile.loc[src_density_map.index, 'geometry']) src_density_map.reset_index(inplace=True) IoShapefile(self.comm).write_shapefile_serial(src_density_map, density_map_auxpath) @@ -115,7 +118,7 @@ class RecreationalBoatsSector(Sector): new_dataframe = self.density_map.copy() new_dataframe.drop(columns='data', inplace=True) - for pollutant, annual_value in annual_emissions.iteritems(): + for pollutant, annual_value in annual_emissions.items(): new_dataframe[pollutant] = self.density_map['data'] * annual_value self.logger.write_time_log('RecreationalBoatsSector', 'calculate_yearly_emissions', @@ -160,15 +163,15 @@ class RecreationalBoatsSector(Sector): dataframe['date_as_date'] = dataframe['date'].dt.date dataframe['MF'] = dataframe.groupby('month').apply(get_mf) - dataframe[self.output_pollutants] = dataframe[self.output_pollutants].multiply(dataframe['MF'], axis=0) + dataframe[self.output_pollutants] = dataframe[self.output_pollutants].mul(dataframe['MF'], axis=0) dataframe.drop(columns=['month', 'MF'], inplace=True) dataframe['WF'] = dataframe.groupby('date_as_date').apply(get_wf) - dataframe[self.output_pollutants] = dataframe[self.output_pollutants].multiply(dataframe['WF'], axis=0) + dataframe[self.output_pollutants] = dataframe[self.output_pollutants].mul(dataframe['WF'], axis=0) dataframe.drop(columns=['weekday', 'date', 'date_as_date', 'WF'], inplace=True) dataframe['HF'] = dataframe.groupby('hour').apply(get_hf) - dataframe[self.output_pollutants] = dataframe[self.output_pollutants].multiply(dataframe['HF'], axis=0) + dataframe[self.output_pollutants] = dataframe[self.output_pollutants].mul(dataframe['HF'], axis=0) dataframe.drop(columns=['hour', 'HF'], inplace=True) self.logger.write_time_log('RecreationalBoatsSector', 'calculate_hourly_emissions', diff --git a/hermesv3_bu/sectors/residential_sector.py b/hermesv3_bu/sectors/residential_sector.py index fdd12479b6002f20f3da2d05dcea02d313dcd2fc..4f34981bdbbec04c4cb752671111287a899ffc39 100755 --- a/hermesv3_bu/sectors/residential_sector.py +++ b/hermesv3_bu/sectors/residential_sector.py @@ -7,25 +7,31 @@ import timeit import numpy as np import pandas as pd import geopandas as gpd +from warnings import warn from hermesv3_bu.sectors.sector import Sector from hermesv3_bu.io_server.io_raster import IoRaster from hermesv3_bu.io_server.io_shapefile import IoShapefile from hermesv3_bu.io_server.io_netcdf import IoNetcdf -from hermesv3_bu.logger.log import Log +from hermesv3_bu.tools.checker import check_files class ResidentialSector(Sector): - def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, fuel_list, prov_shapefile, ccaa_shapefile, population_density_map, population_type_map, - population_type_by_ccaa, population_type_by_prov, energy_consumption_by_prov, - energy_consumption_by_ccaa, residential_spatial_proxies, residential_ef_files_path, + population_type_nuts2, population_type_nuts3, energy_consumption_nuts3, + energy_consumption_nuts2, residential_spatial_proxies, residential_ef_files_path, heating_degree_day_path, temperature_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path): spent_time = timeit.default_timer() - + logger.write_log('===== RESIDENTIAL COMBUSTION SECTOR =====') + check_files( + [prov_shapefile, ccaa_shapefile, population_density_map, population_type_map, population_type_nuts2, + population_type_nuts3, energy_consumption_nuts3, energy_consumption_nuts2, residential_spatial_proxies, + residential_ef_files_path, temperature_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path]) super(ResidentialSector, self).__init__( - comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, None, None, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path) @@ -37,24 +43,18 @@ class ResidentialSector(Sector): self.fuel_list = fuel_list self.day_dict = self.calculate_num_days() - self.pop_type_by_prov = population_type_by_prov - self.pop_type_by_ccaa = population_type_by_ccaa + self.pop_type_by_prov = population_type_nuts3 + self.pop_type_by_ccaa = population_type_nuts2 - self.energy_consumption_by_prov = pd.read_csv(energy_consumption_by_prov) - self.energy_consumption_by_ccaa = pd.read_csv(energy_consumption_by_ccaa) + self.energy_consumption_nuts3 = pd.read_csv(energy_consumption_nuts3) + self.energy_consumption_nuts2 = pd.read_csv(energy_consumption_nuts2) self.residential_spatial_proxies = self.read_residential_spatial_proxies(residential_spatial_proxies) self.ef_profiles = self.read_ef_file(residential_ef_files_path) - if self.comm.Get_rank() == 0: - self.fuel_distribution = self.get_fuel_distribution(prov_shapefile, ccaa_shapefile, population_density_map, - population_type_map, create_pop_csv=False) - else: - self.fuel_distribution = None - self.fuel_distribution = IoShapefile(self.comm).split_shapefile(self.fuel_distribution) - + self.fuel_distribution = self.get_fuel_distribution( + prov_shapefile, ccaa_shapefile, population_density_map, population_type_map, create_pop_csv=False) self.heating_degree_day_path = heating_degree_day_path self.temperature_path = temperature_path - self.logger.write_time_log('ResidentialSector', '__init__', timeit.default_timer() - spent_time) def read_ef_file(self, path): @@ -125,11 +125,14 @@ class ResidentialSector(Sector): def to_dst_resolution(self, src_distribution): spent_time = timeit.default_timer() - src_distribution.to_crs(self.grid_shp.crs, inplace=True) - src_distribution.to_file(os.path.join(self.auxiliary_dir, 'residential', 'fuel_distribution_src.shp')) + src_distribution.to_crs(self.grid.shapefile.crs, inplace=True) + # src_distribution.reset_index().to_file( + # os.path.join(self.auxiliary_dir, 'residential', 'fuel_distribution_src.shp')) src_distribution['src_inter_fraction'] = src_distribution.geometry.area - src_distribution = self.spatial_overlays(src_distribution, self.grid_shp.reset_index(), how='intersection') - src_distribution.to_file(os.path.join(self.auxiliary_dir, 'residential', 'fuel_distribution_raw.shp')) + src_distribution = self.spatial_overlays(src_distribution, self.grid.shapefile.reset_index(), + how='intersection') + # src_distribution.reset_index().to_file( + # os.path.join(self.auxiliary_dir, 'residential', 'fuel_distribution_raw.shp')) src_distribution['src_inter_fraction'] = src_distribution.geometry.area / src_distribution[ 'src_inter_fraction'] @@ -137,8 +140,8 @@ class ResidentialSector(Sector): src_distribution["src_inter_fraction"], axis="index") src_distribution = src_distribution.loc[:, self.fuel_list + ['FID']].groupby('FID').sum() - src_distribution = gpd.GeoDataFrame(src_distribution, crs=self.grid_shp.crs, - geometry=self.grid_shp.loc[src_distribution.index, 'geometry']) + src_distribution = gpd.GeoDataFrame(src_distribution, crs=self.grid.shapefile.crs, + geometry=self.grid.shapefile.loc[src_distribution.index, 'geometry']) src_distribution.reset_index(inplace=True) self.logger.write_time_log('ResidentialSector', 'to_dst_resolution', timeit.default_timer() - spent_time) @@ -151,30 +154,33 @@ class ResidentialSector(Sector): fuel_distribution_path = os.path.join(self.auxiliary_dir, 'residential', 'fuel_distribution.shp') if not os.path.exists(fuel_distribution_path): - - population_density = IoRaster(self.comm).clip_raster_with_shapefile_poly( - population_density_map, self.clip.shapefile, - os.path.join(self.auxiliary_dir, 'residential', 'population_density.tif')) - population_density = IoRaster(self.comm).to_shapefile_serie(population_density) + population_density = os.path.join(self.auxiliary_dir, 'residential', 'population_density.tif') + if self.comm.Get_rank() == 0: + population_density = IoRaster(self.comm).clip_raster_with_shapefile_poly( + population_density_map, self.clip.shapefile, population_density) + population_density = IoRaster(self.comm).to_shapefile_parallel(population_density) population_density.rename(columns={'data': 'pop'}, inplace=True) - population_type = IoRaster(self.comm).clip_raster_with_shapefile_poly( - population_type_map, self.clip.shapefile, - os.path.join(self.auxiliary_dir, 'residential', 'population_type.tif')) - population_type = IoRaster(self.comm).to_shapefile_serie(population_type) + population_type = os.path.join(self.auxiliary_dir, 'residential', 'population_type.tif') + if self.comm.Get_rank() == 0: + population_type = IoRaster(self.comm).clip_raster_with_shapefile_poly( + population_type_map, self.clip.shapefile, population_type) + population_type = IoRaster(self.comm).to_shapefile_parallel(population_type) population_type.rename(columns={'data': 'type'}, inplace=True) + population_type['type'] = population_type['type'].astype(np.int16) + population_type.loc[population_type['type'] == 2, 'type'] = 3 + population_density['type'] = population_type['type'] - population_density.loc[population_density['type'] == 2, 'type'] = 3 - population_density = self.add_nut_code(population_density, prov_shapefile, nut_value='ORDER07') + population_density = self.add_nut_code(population_density, prov_shapefile, nut_value='nuts3_id') population_density.rename(columns={'nut_code': 'prov'}, inplace=True) - population_density = population_density.loc[population_density['prov'] != -999, :] - population_density = self.add_nut_code(population_density, ccaa_shapefile, nut_value='ORDER06') + population_density = self.add_nut_code(population_density, ccaa_shapefile, nut_value='nuts2_id') population_density.rename(columns={'nut_code': 'ccaa'}, inplace=True) population_density = population_density.loc[population_density['ccaa'] != -999, :] + population_density = IoShapefile(self.comm).balance(population_density) if create_pop_csv: population_density.loc[:, ['prov', 'pop', 'type']].groupby(['prov', 'type']).sum().reset_index().to_csv( @@ -182,10 +188,13 @@ class ResidentialSector(Sector): population_density.loc[:, ['ccaa', 'pop', 'type']].groupby(['ccaa', 'type']).sum().reset_index().to_csv( self.pop_type_by_ccaa) - self.pop_type_by_ccaa = pd.read_csv(self.pop_type_by_ccaa).set_index(['ccaa', 'type']) - self.pop_type_by_prov = pd.read_csv(self.pop_type_by_prov).set_index(['prov', 'type']) + self.pop_type_by_ccaa = pd.read_csv(self.pop_type_by_ccaa).rename( + columns={'nuts2_id': 'ccaa'}).set_index(['ccaa', 'type']) + self.pop_type_by_prov = pd.read_csv(self.pop_type_by_prov).rename( + columns={'nuts3_id': 'prov'}).set_index(['prov', 'type']) - fuel_distribution = population_density.loc[:, ['CELL_ID', 'geometry']].copy() + fuel_distribution = population_density[['geometry']].copy() + fuel_distribution.index.name = 'CELL_ID' for fuel in self.fuel_list: fuel_distribution[fuel] = 0 @@ -197,50 +206,73 @@ class ResidentialSector(Sector): if spatial_proxy['proxy_type'] == 'all': total_pop = self.pop_type_by_ccaa.loc[ self.pop_type_by_ccaa.index.get_level_values('ccaa') == ccaa, 'pop'].sum() - energy_consumption = self.energy_consumption_by_ccaa.loc[ - self.energy_consumption_by_ccaa['code'] == ccaa, fuel].values[0] + energy_consumption = self.energy_consumption_nuts2.loc[ + self.energy_consumption_nuts2['nuts2_id'] == ccaa, fuel].values[0] fuel_distribution.loc[ population_density['ccaa'] == ccaa, fuel] = population_density['pop'].multiply( energy_consumption / total_pop) else: - total_pop = self.pop_type_by_ccaa.loc[ - (self.pop_type_by_ccaa.index.get_level_values('ccaa') == ccaa) & - (self.pop_type_by_ccaa.index.get_level_values('type') == spatial_proxy['proxy_type']), - 'pop'].values[0] - energy_consumption = self.energy_consumption_by_ccaa.loc[ - self.energy_consumption_by_ccaa['code'] == ccaa, fuel].values[0] - - fuel_distribution.loc[(population_density['ccaa'] == ccaa) & - (population_density['type'] == spatial_proxy['proxy_type']), - fuel] = population_density['pop'].multiply( - energy_consumption / total_pop) + try: + total_pop = self.pop_type_by_ccaa.loc[ + (self.pop_type_by_ccaa.index.get_level_values('ccaa') == ccaa) & + (self.pop_type_by_ccaa.index.get_level_values('type') == spatial_proxy[ + 'proxy_type']), + 'pop'].values[0] + try: + energy_consumption = self.energy_consumption_nuts2.loc[ + self.energy_consumption_nuts2['nuts2_id'] == ccaa, fuel].values[0] + except IndexError: + warn("*WARNING*: NUT2_ID {0} not found in the ".format(ccaa) + + "energy_consumption_nuts2 file. Setting it to 0.") + energy_consumption = 0.0 + fuel_distribution.loc[(population_density['ccaa'] == ccaa) & + (population_density['type'] == spatial_proxy['proxy_type']), + fuel] = population_density['pop'].multiply( + energy_consumption / total_pop) + except IndexError: + warn("*WARNING*: NUT2_ID {0} not found in the ".format(ccaa) + + "population_type_nuts2 file. Setting it to 0.") + fuel_distribution.loc[(population_density['ccaa'] == ccaa) & + (population_density['type'] == spatial_proxy['proxy_type']), + fuel] = 0.0 if spatial_proxy['nut_level'] == 'prov': for prov in np.unique(population_density['prov']): if spatial_proxy['proxy_type'] == 'all': total_pop = self.pop_type_by_prov.loc[self.pop_type_by_prov.index.get_level_values( 'prov') == prov, 'pop'].sum() - energy_consumption = self.energy_consumption_by_prov.loc[ - self.energy_consumption_by_prov['code'] == prov, fuel].values[0] - - fuel_distribution.loc[population_density['prov'] == prov, fuel] = population_density[ - 'pop'].multiply(energy_consumption / total_pop) + try: + energy_consumption = self.energy_consumption_nuts3.loc[ + self.energy_consumption_nuts3['nuts3_id'] == prov, fuel].values[0] + fuel_distribution.loc[population_density['prov'] == prov, fuel] = population_density[ + 'pop'].multiply(energy_consumption / total_pop) + except IndexError: + warn("*WARNING*: NUT3_ID {0} not found in the ".format(prov) + + "energy_consumption_nuts3 file. Setting it to 0.") + fuel_distribution.loc[population_density['prov'] == prov, fuel] = 0.0 else: total_pop = self.pop_type_by_prov.loc[ (self.pop_type_by_prov.index.get_level_values('prov') == prov) & (self.pop_type_by_prov.index.get_level_values('type') == spatial_proxy['proxy_type']), 'pop'].values[0] - energy_consumption = self.energy_consumption_by_prov.loc[ - self.energy_consumption_by_prov['code'] == prov, fuel].values[0] + energy_consumption = self.energy_consumption_nuts3.loc[ + self.energy_consumption_nuts3['nuts3_id'] == prov, fuel].values[0] fuel_distribution.loc[(population_density['prov'] == prov) & (population_density['type'] == spatial_proxy['proxy_type']), fuel] = population_density['pop'].multiply( energy_consumption / total_pop) fuel_distribution = self.to_dst_resolution(fuel_distribution) - IoShapefile(self.comm).write_shapefile_serial(fuel_distribution, fuel_distribution_path) + fuel_distribution = IoShapefile(self.comm).gather_shapefile(fuel_distribution, rank=0) + if self.comm.Get_rank() == 0: + fuel_distribution.groupby('FID').sum() + IoShapefile(self.comm).write_shapefile_serial(fuel_distribution, fuel_distribution_path) + else: + fuel_distribution = None + fuel_distribution = IoShapefile(self.comm).split_shapefile(fuel_distribution) else: - fuel_distribution = IoShapefile(self.comm).read_shapefile_serial(fuel_distribution_path) + fuel_distribution = IoShapefile(self.comm).read_shapefile_parallel(fuel_distribution_path) + fuel_distribution.set_index('FID', inplace=True) self.logger.write_time_log('ResidentialSector', 'get_fuel_distribution', timeit.default_timer() - spent_time) return fuel_distribution diff --git a/hermesv3_bu/sectors/sector.py b/hermesv3_bu/sectors/sector.py index b2dc6ab862f3f20bf8fc5899c63ed51221a5772b..bac090adb382ad76e84f6b7f94ebf6eb29375cfb 100755 --- a/hermesv3_bu/sectors/sector.py +++ b/hermesv3_bu/sectors/sector.py @@ -3,16 +3,22 @@ import sys import os import timeit -from hermesv3_bu.logger.log import Log import numpy as np import pandas as pd import geopandas as gpd from mpi4py import MPI +from hermesv3_bu.io_server.io_raster import IoRaster +from hermesv3_bu.io_server.io_shapefile import IoShapefile +from geopandas import GeoDataFrame +from hermesv3_bu.logger.log import Log +from hermesv3_bu.grids.grid import Grid +from geopandas import GeoDataFrame + class Sector(object): - def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path): """ @@ -28,8 +34,8 @@ class Sector(object): created yet. :type auxiliary_dir: str - :param grid_shp: Shapefile with the grid horizontal distribution. - :type grid_shp: GeoDataFrame + :param grid: Grid object + :type grid: Grid :param date_array: List of datetimes. :type date_array: list(datetime.datetime, ...) @@ -73,7 +79,7 @@ class Sector(object): self.comm = comm self.logger = logger self.auxiliary_dir = auxiliary_dir - self.grid_shp = grid_shp + self.grid = grid self.clip = clip self.date_array = date_array self.source_pollutants = source_pollutants @@ -90,7 +96,7 @@ class Sector(object): self.speciation_profile = self.read_speciation_profiles(speciation_profiles_path) self.molecular_weights = self.read_molecular_weights(molecular_weights_path) - self.output_pollutants = self.speciation_map.keys() + self.output_pollutants = list(self.speciation_map.keys()) self.logger.write_time_log('Sector', '__init__', timeit.default_timer() - spent_time) @@ -320,11 +326,11 @@ class Sector(object): spent_time = timeit.default_timer() weekdays_factors = 0 num_days = 0 - for day in xrange(7): + for day in range(7): weekdays_factors += profile[day] * weekdays[day] num_days += weekdays[day] increment = float(num_days - weekdays_factors) / num_days - for day in xrange(7): + for day in range(7): profile[day] = (increment + profile[day]) / num_days self.logger.write_time_log('Sector', 'calculate_weekday_factor_full_month', timeit.default_timer() - spent_time) @@ -343,7 +349,7 @@ class Sector(object): from calendar import monthrange, weekday, MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY spent_time = timeit.default_timer() weekdays = [MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY] - days = [weekday(date.year, date.month, d + 1) for d in xrange(monthrange(date.year, date.month)[1])] + days = [weekday(date.year, date.month, d + 1) for d in range(monthrange(date.year, date.month)[1])] weekdays_dict = {} for i, day in enumerate(weekdays): @@ -420,7 +426,7 @@ class Sector(object): return dataframe - def add_nut_code(self, shapefile, nut_shapefile_path, nut_value='ORDER06'): + def add_nut_code(self, shapefile, nut_shapefile_path, nut_value='nuts2_id'): """ Add 'nut_code' column into the shapefile based on the 'nut_value' column of the 'nut_shapefile_path' shapefile. @@ -438,18 +444,18 @@ class Sector(object): :type nut_value: str :return: Shapefile with the 'nut_code' column set. - :rtype: geopandas.GeoDataframe + :rtype: GeoDataFrame """ spent_time = timeit.default_timer() nut_shapefile = gpd.read_file(nut_shapefile_path).to_crs(shapefile.crs) shapefile = gpd.sjoin(shapefile, nut_shapefile.loc[:, [nut_value, 'geometry']], how='left', op='intersects') - + del nut_shapefile shapefile = shapefile[~shapefile.index.duplicated(keep='first')] shapefile.drop('index_right', axis=1, inplace=True) shapefile.rename(columns={nut_value: 'nut_code'}, inplace=True) shapefile.loc[shapefile['nut_code'].isna(), 'nut_code'] = -999 - shapefile['nut_code'] = shapefile['nut_code'].astype(np.int16) + shapefile['nut_code'] = shapefile['nut_code'].astype(np.int64) self.logger.write_time_log('Sector', 'add_nut_code', timeit.default_timer() - spent_time) return shapefile @@ -465,6 +471,8 @@ class Sector(object): :param how: Operation to do :return: GeoDataFrame """ + from functools import reduce + spent_time = timeit.default_timer() df1 = df1.copy() df2 = df2.copy() @@ -556,6 +564,77 @@ class Sector(object): def get_output_pollutants(self, input_pollutant): spent_time = timeit.default_timer() - return_value = [outs for outs, ints in self.speciation_map.iteritems() if ints == input_pollutant] + return_value = [outs for outs, ints in self.speciation_map.items() if ints == input_pollutant] self.logger.write_time_log('Sector', 'get_output_pollutants', timeit.default_timer() - spent_time) return return_value + + def calculate_land_use_by_nut(self, land_use_raster_path, nut_shapefile_path, out_land_use_by_nut_path): + # 1st Clip the raster + lu_raster_path = os.path.join(self.auxiliary_dir, 'clipped_land_use.tif') + + if self.comm.Get_rank() == 0: + if not os.path.exists(lu_raster_path): + lu_raster_path = IoRaster(self.comm).clip_raster_with_shapefile_poly( + land_use_raster_path, self.clip.shapefile, lu_raster_path) + + # 2nd Raster to shapefile + land_use_shp = IoRaster(self.comm).to_shapefile_parallel(lu_raster_path, gather=False, bcast=False) + + # 3rd Add NUT code + land_use_shp.drop(columns='CELL_ID', inplace=True) + land_use_shp.rename(columns={'data': 'land_use'}, inplace=True) + land_use_shp = self.add_nut_code(land_use_shp, nut_shapefile_path, nut_value='nuts2_id') + land_use_shp = land_use_shp[land_use_shp['nut_code'] != -999] + land_use_shp = IoShapefile(self.comm).balance(land_use_shp) + + # 4th Calculate land_use percent + land_use_shp['area'] = land_use_shp.geometry.area + + land_use_by_nut = GeoDataFrame(index=pd.MultiIndex.from_product( + [np.unique(land_use_shp['nut_code']), np.unique(land_use_shp['land_use'])], names=['nuts2_id', 'land_use'])) + + for nut_code in np.unique(land_use_shp['nut_code']): + for land_use in np.unique(land_use_shp['land_use']): + land_use_by_nut.loc[(nut_code, land_use), 'area'] = land_use_shp.loc[ + (land_use_shp['land_use'] == land_use) & (land_use_shp['nut_code'] == nut_code), 'area'].sum() + + land_use_by_nut.reset_index(inplace=True) + land_use_by_nut = IoShapefile(self.comm).gather_shapefile(land_use_by_nut, rank=0) + + if self.comm.Get_rank() == 0: + land_use_by_nut = land_use_by_nut.groupby(['nuts2_id', 'land_use']).sum() + land_use_by_nut.to_csv(out_land_use_by_nut_path) + print('DONE -> {0}'.format(out_land_use_by_nut_path)) + self.comm.Barrier() + + def create_population_by_nut(self, population_raster_path, nut_shapefile_path, output_path, nut_column='nuts3_id'): + # 1st Clip the raster + self.logger.write_log("\t\tCreating clipped population raster", message_level=3) + if self.comm.Get_rank() == 0: + clipped_population_path = IoRaster(self.comm).clip_raster_with_shapefile_poly( + population_raster_path, self.clip.shapefile, + os.path.join(self.auxiliary_dir, 'traffic_area', 'pop.tif')) + else: + clipped_population_path = None + + # 2nd Raster to shapefile + self.logger.write_log("\t\tRaster to shapefile", message_level=3) + pop_shp = IoRaster(self.comm).to_shapefile_parallel( + clipped_population_path, gather=False, bcast=False, crs={'init': 'epsg:4326'}) + + # 3rd Add NUT code + self.logger.write_log("\t\tAdding nut codes to the shapefile", message_level=3) + # if self.comm.Get_rank() == 0: + pop_shp.drop(columns='CELL_ID', inplace=True) + pop_shp.rename(columns={'data': 'population'}, inplace=True) + pop_shp = self.add_nut_code(pop_shp, nut_shapefile_path, nut_value=nut_column) + pop_shp = pop_shp[pop_shp['nut_code'] != -999] + pop_shp.rename(columns={'nut_code': nut_column}, inplace=True) + + pop_shp = IoShapefile(self.comm).gather_shapefile(pop_shp) + if self.comm.Get_rank() == 0: + popu_dist = pop_shp.groupby(nut_column).sum() + popu_dist.to_csv(output_path) + self.comm.Barrier() + + return True diff --git a/hermesv3_bu/sectors/sector_manager.py b/hermesv3_bu/sectors/sector_manager.py index f36f32c1ed19a93eae922540e67489579c60166d..7bb7d0ec036239170532c18719bccc8931915dab 100755 --- a/hermesv3_bu/sectors/sector_manager.py +++ b/hermesv3_bu/sectors/sector_manager.py @@ -2,9 +2,10 @@ import timeit from hermesv3_bu.logger.log import Log +from hermesv3_bu.tools.checker import error_exit SECTOR_LIST = ['traffic', 'traffic_area', 'aviation', 'point_sources', 'recreational_boats', 'shipping_port', - 'residential', 'livestock', 'crop_operations', 'crop_fertilizers', 'agricultural_machinery'] + 'residential', 'livestock', 'crop_operations', 'crop_fertilizers', 'agricultural_machinery', 'solvents'] class SectorManager(object): @@ -25,20 +26,20 @@ class SectorManager(object): :type arguments: NameSpace """ spent_time = timeit.default_timer() - self.logger = logger + self.__logger = logger self.sector_list = self.make_sector_list(arguments, comm_world.Get_size()) - self.logger.write_log('Sector process distribution:') - for sect, procs in self.sector_list.iteritems(): - self.logger.write_log('\t{0}: {1}'.format(sect, procs)) + self.__logger.write_log('Sector process distribution:') + for sect, procs in self.sector_list.items(): + self.__logger.write_log('\t{0}: {1}'.format(sect, procs)) color = 10 agg_color = 99 - for sector, sector_procs in self.sector_list.iteritems(): + for sector, sector_procs in self.sector_list.items(): if sector == 'aviation' and comm_world.Get_rank() in sector_procs: from hermesv3_bu.sectors.aviation_sector import AviationSector self.sector = AviationSector( - comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, - arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.__logger, + arguments.auxiliary_files_path, grid, clip, date_array, arguments.aviation_source_pollutants, grid.vertical_desctiption, arguments.airport_list, arguments.plane_list, arguments.airport_shapefile_path, arguments.airport_runways_shapefile_path, arguments.airport_runways_corners_shapefile_path, arguments.airport_trajectories_shapefile_path, @@ -49,8 +50,8 @@ class SectorManager(object): elif sector == 'shipping_port' and comm_world.Get_rank() in sector_procs: from hermesv3_bu.sectors.shipping_port_sector import ShippingPortSector self.sector = ShippingPortSector( - comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, - arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.__logger, + arguments.auxiliary_files_path, grid, clip, date_array, arguments.shipping_port_source_pollutants, grid.vertical_desctiption, arguments.vessel_list, arguments.port_list, arguments.hoteling_shapefile_path, arguments.maneuvering_shapefile_path, arguments.shipping_port_ef_path, arguments.shipping_port_engine_percent_path, @@ -62,15 +63,15 @@ class SectorManager(object): elif sector == 'livestock' and comm_world.Get_rank() in sector_procs: from hermesv3_bu.sectors.livestock_sector import LivestockSector self.sector = LivestockSector( - comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, - arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.__logger, + arguments.auxiliary_files_path, grid, clip, date_array, arguments.livestock_source_pollutants, grid.vertical_desctiption, arguments.animal_list, arguments.gridded_livestock, arguments.correction_split_factors, arguments.temperature_daily_files_path, arguments.wind_speed_daily_files_path, arguments.denominator_yearly_factor_dir, arguments.livestock_ef_files_dir, arguments.livestock_monthly_profiles, arguments.livestock_weekly_profiles, arguments.livestock_hourly_profiles, arguments.speciation_map, - arguments.livestock_speciation_profiles, arguments.molecular_weights, arguments.nut_shapefile_prov) + arguments.livestock_speciation_profiles, arguments.molecular_weights, arguments.nuts3_shapefile) elif sector == 'crop_operations' and comm_world.Get_rank() in sector_procs: from hermesv3_bu.sectors.agricultural_crop_operations_sector import AgriculturalCropOperationsSector @@ -78,14 +79,14 @@ class SectorManager(object): comm_agr = comm_world.Split(agg_color, agg_procs.index(comm_world.Get_rank())) comm = comm_agr.Split(color, sector_procs.index(comm_world.Get_rank())) self.sector = AgriculturalCropOperationsSector( - comm_agr, comm, logger, arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + comm_agr, comm, logger, arguments.auxiliary_files_path, grid, clip, date_array, arguments.crop_operations_source_pollutants, - grid.vertical_desctiption, arguments.crop_operations_list, arguments.nut_shapefile_ccaa, + grid.vertical_desctiption, arguments.crop_operations_list, arguments.nuts2_shapefile, arguments.land_uses_path, arguments.crop_operations_ef_files_dir, arguments.crop_operations_monthly_profiles, arguments.crop_operations_weekly_profiles, arguments.crop_operations_hourly_profiles, arguments.speciation_map, arguments.crop_operations_speciation_profiles, arguments.molecular_weights, - arguments.land_use_by_nut_path, arguments.crop_by_nut_path, arguments.crop_from_landuse_path) + arguments.land_uses_nuts2_path, arguments.crop_by_nut_path, arguments.crop_from_landuse_path) elif sector == 'crop_fertilizers' and comm_world.Get_rank() in sector_procs: from hermesv3_bu.sectors.agricultural_crop_fertilizers_sector import AgriculturalCropFertilizersSector @@ -93,12 +94,12 @@ class SectorManager(object): comm_agr = comm_world.Split(agg_color, agg_procs.index(comm_world.Get_rank())) comm = comm_agr.Split(color, sector_procs.index(comm_world.Get_rank())) self.sector = AgriculturalCropFertilizersSector( - comm_agr, comm, logger, arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + comm_agr, comm, logger, arguments.auxiliary_files_path, grid, clip, date_array, arguments.crop_fertilizers_source_pollutants, grid.vertical_desctiption, - arguments.crop_fertilizers_list, arguments.nut_shapefile_ccaa, arguments.land_uses_path, + arguments.crop_fertilizers_list, arguments.nuts2_shapefile, arguments.land_uses_path, arguments.crop_fertilizers_hourly_profiles, arguments.speciation_map, arguments.crop_fertilizers_speciation_profiles, arguments.molecular_weights, - arguments.land_use_by_nut_path, arguments.crop_by_nut_path, arguments.crop_from_landuse_path, + arguments.land_uses_nuts2_path, arguments.crop_by_nut_path, arguments.crop_from_landuse_path, arguments.cultivated_ratio, arguments.fertilizers_rate, arguments.crop_f_parameter, arguments.crop_f_fertilizers, arguments.gridded_ph, arguments.gridded_cec, arguments.fertilizers_denominator_yearly_factor_path, arguments.crop_calendar, @@ -111,28 +112,28 @@ class SectorManager(object): comm_agr = comm_world.Split(agg_color, agg_procs.index(comm_world.Get_rank())) comm = comm_agr.Split(color, sector_procs.index(comm_world.Get_rank())) self.sector = AgriculturalMachinerySector( - comm_agr, comm, logger, arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + comm_agr, comm, logger, arguments.auxiliary_files_path, grid, clip, date_array, arguments.crop_machinery_source_pollutants, grid.vertical_desctiption, - arguments.crop_machinery_list, arguments.nut_shapefile_ccaa, arguments.machinery_list, + arguments.crop_machinery_list, arguments.nuts2_shapefile, arguments.machinery_list, arguments.land_uses_path, arguments.crop_machinery_ef_path, arguments.crop_machinery_monthly_profiles, arguments.crop_machinery_weekly_profiles, arguments.crop_machinery_hourly_profiles, arguments.speciation_map, arguments.crop_machinery_speciation_profiles, arguments.molecular_weights, - arguments.land_use_by_nut_path, arguments.crop_by_nut_path, arguments.crop_from_landuse_path, - arguments.nut_shapefile_prov, arguments.crop_machinery_deterioration_factor_path, + arguments.land_uses_nuts2_path, arguments.crop_by_nut_path, arguments.crop_from_landuse_path, + arguments.nuts3_shapefile, arguments.crop_machinery_deterioration_factor_path, arguments.crop_machinery_load_factor_path, arguments.crop_machinery_vehicle_ratio_path, arguments.crop_machinery_vehicle_units_path, arguments.crop_machinery_vehicle_workhours_path, - arguments.crop_machinery_vehicle_power_path, arguments.crop_machinery_by_nut) + arguments.crop_machinery_vehicle_power_path, arguments.crop_machinery_nuts3) elif sector == 'residential' and comm_world.Get_rank() in sector_procs: from hermesv3_bu.sectors.residential_sector import ResidentialSector self.sector = ResidentialSector( - comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, - arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.__logger, + arguments.auxiliary_files_path, grid, clip, date_array, arguments.residential_source_pollutants, grid.vertical_desctiption, arguments.fuel_list, - arguments.nut_shapefile_prov, arguments.nut_shapefile_ccaa, arguments.population_density_map, - arguments.population_type_map, arguments.population_type_by_ccaa, arguments.population_type_by_prov, - arguments.energy_consumption_by_prov, arguments.energy_consumption_by_ccaa, + arguments.nuts3_shapefile, arguments.nuts2_shapefile, arguments.population_density_map, + arguments.population_type_map, arguments.population_type_nuts2, arguments.population_type_nuts3, + arguments.energy_consumption_nuts3, arguments.energy_consumption_nuts2, arguments.residential_spatial_proxies, arguments.residential_ef_files_path, arguments.residential_heating_degree_day_path, arguments.temperature_daily_files_path, arguments.residential_hourly_profiles, arguments.speciation_map, @@ -141,8 +142,8 @@ class SectorManager(object): elif sector == 'recreational_boats' and comm_world.Get_rank() in sector_procs: from hermesv3_bu.sectors.recreational_boats_sector import RecreationalBoatsSector self.sector = RecreationalBoatsSector( - comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, - arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.__logger, + arguments.auxiliary_files_path, grid, clip, date_array, arguments.recreational_boats_source_pollutants, grid.vertical_desctiption, arguments.recreational_boats_list, arguments.recreational_boats_density_map, arguments.recreational_boats_by_type, arguments.recreational_boats_ef_path, @@ -153,8 +154,8 @@ class SectorManager(object): elif sector == 'point_sources' and comm_world.Get_rank() in sector_procs: from hermesv3_bu.sectors.point_source_sector import PointSourceSector self.sector = PointSourceSector( - comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, - arguments.auxiliary_files_path, grid.shapefile, clip, date_array, + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.__logger, + arguments.auxiliary_files_path, grid, clip, date_array, arguments.point_source_pollutants, grid.vertical_desctiption, arguments.point_source_catalog, arguments.point_source_monthly_profiles, arguments.point_source_weekly_profiles, arguments.point_source_hourly_profiles, @@ -174,8 +175,8 @@ class SectorManager(object): elif sector == 'traffic' and comm_world.Get_rank() in sector_procs: from hermesv3_bu.sectors.traffic_sector import TrafficSector self.sector = TrafficSector( - comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, - arguments.auxiliary_files_path, grid.shapefile, clip, date_array, arguments.traffic_pollutants, + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.__logger, + arguments.auxiliary_files_path, grid, clip, date_array, arguments.traffic_pollutants, grid.vertical_desctiption, arguments.road_link_path, arguments.fleet_compo_path, arguments.traffic_speed_hourly_path, arguments.traffic_monthly_profiles, arguments.traffic_weekly_profiles, arguments.traffic_hourly_profiles_mean, @@ -192,26 +193,38 @@ class SectorManager(object): elif sector == 'traffic_area' and comm_world.Get_rank() in sector_procs: from hermesv3_bu.sectors.traffic_area_sector import TrafficAreaSector self.sector = TrafficAreaSector( - comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.logger, - arguments.auxiliary_files_path, grid.shapefile, clip, date_array, arguments.traffic_area_pollutants, + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.__logger, + arguments.auxiliary_files_path, grid, clip, date_array, arguments.traffic_area_pollutants, grid.vertical_desctiption, arguments.population_density_map, arguments.speciation_map, arguments.molecular_weights, arguments.do_evaporative, arguments.traffic_area_gas_path, - arguments.popullation_by_municipality, arguments.nut_shapefile_prov, + arguments.population_nuts3, arguments.nuts3_shapefile, arguments.traffic_area_speciation_profiles_evaporative, arguments.traffic_area_evaporative_ef_file, arguments.temperature_hourly_files_path, arguments.do_small_cities, arguments.traffic_area_small_cities_path, arguments.traffic_area_speciation_profiles_small_cities, arguments.traffic_area_small_cities_ef_file, arguments.small_cities_monthly_profile, arguments.small_cities_weekly_profile, arguments.small_cities_hourly_profile ) + elif sector == 'solvents' and comm_world.Get_rank() in sector_procs: + from hermesv3_bu.sectors.solvents_sector import SolventsSector + self.sector = SolventsSector( + comm_world.Split(color, sector_procs.index(comm_world.Get_rank())), self.__logger, + arguments.auxiliary_files_path, grid, clip, date_array, arguments.solvents_pollutants, + grid.vertical_desctiption, arguments.speciation_map, arguments.molecular_weights, + arguments.solvents_speciation_profiles, arguments.solvents_monthly_profile, + arguments.solvents_weekly_profile, arguments.solvents_hourly_profile, + arguments.solvents_proxies_path, arguments.solvents_yearly_emissions_by_nut2_path, + arguments.solvents_point_sources_shapefile, arguments.solvents_point_sources_weight_by_nut2_path, + arguments.population_density_map, arguments.population_nuts2, arguments.land_uses_path, + arguments.land_uses_nuts2_path, arguments.nuts2_shapefile) color += 1 - self.logger.write_time_log('SectorManager', '__init__', timeit.default_timer() - spent_time) + self.__logger.write_time_log('SectorManager', '__init__', timeit.default_timer() - spent_time) def run(self): spent_time = timeit.default_timer() emis = self.sector.calculate_emissions() - self.logger.write_time_log('SectorManager', 'run', timeit.default_timer() - spent_time) + self.__logger.write_time_log('SectorManager', 'run', timeit.default_timer() - spent_time) return emis def make_sector_list(self, arguments, max_procs): @@ -221,13 +234,13 @@ class SectorManager(object): for sector in SECTOR_LIST: if vars(arguments)['do_{0}'.format(sector)]: n_procs = vars(arguments)['{0}_processors'.format(sector)] - sector_dict[sector] = [accum + x for x in xrange(n_procs)] + sector_dict[sector] = [accum + x for x in range(n_procs)] accum += n_procs if accum != max_procs: - raise ValueError("The selected number of processors '{0}' does not fit ".format(max_procs) + - "with the sum of processors dedicated for all the sectors " + - "'{0}': {1}".format(accum, {sector: len(sector_procs) - for sector, sector_procs in sector_dict.iteritems()})) + error_exit("The selected number of processors '{0}' does not fit ".format(max_procs) + + "with the sum of processors dedicated for all the sectors " + + "'{0}': {1}".format( + accum, {sector: len(sector_procs) for sector, sector_procs in sector_dict.items()})) - self.logger.write_time_log('SectorManager', 'make_sector_list', timeit.default_timer() - spent_time) + self.__logger.write_time_log('SectorManager', 'make_sector_list', timeit.default_timer() - spent_time) return sector_dict diff --git a/hermesv3_bu/sectors/shipping_port_sector.py b/hermesv3_bu/sectors/shipping_port_sector.py index bb6823bd1373cee9970276815873482073f4e4b3..88759a2bc2ae2cde8aa395f63acb6ab6d5a0c464 100755 --- a/hermesv3_bu/sectors/shipping_port_sector.py +++ b/hermesv3_bu/sectors/shipping_port_sector.py @@ -7,16 +7,18 @@ import numpy as np import timeit from hermesv3_bu.logger.log import Log from hermesv3_bu.io_server.io_shapefile import IoShapefile +from hermesv3_bu.grids.grid import Grid +from hermesv3_bu.tools.checker import check_files, error_exit class ShippingPortSector(Sector): - def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, vessel_list, port_list, hoteling_shapefile_path, maneuvering_shapefile_path, ef_dir, engine_percent_path, tonnage_path, load_factor_path, power_path, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path): """ - Initialise the Shipping port sectopr class + Initialise the Shipping port sector class :param comm: Communicator for the sector calculation. :type comm: MPI.COMM @@ -28,8 +30,8 @@ class ShippingPortSector(Sector): created yet. :type auxiliary_dir: str - :param grid_shp: Shapefile with the grid horizontal distribution. - :type grid_shp: GeoDataFrame + :param grid: Grid object. + :type grid: Grid :param date_array: List of datetimes. :type date_array: list(datetime.datetime, ...) @@ -82,8 +84,13 @@ class ShippingPortSector(Sector): spent_time = timeit.default_timer() logger.write_log('===== SHIPPING PORT SECTOR =====') + check_files( + [hoteling_shapefile_path, maneuvering_shapefile_path, engine_percent_path, tonnage_path, load_factor_path, + power_path, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path, ef_dir]) + super(ShippingPortSector, self).__init__( - comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, monthly_profiles_path, weekly_profiles_path, hourly_profiles_path, speciation_map_path, speciation_profiles_path, molecular_weights_path) @@ -110,9 +117,8 @@ class ShippingPortSector(Sector): port_shp = gpd.sjoin(port_shp, self.clip.shapefile.to_crs(port_shp.crs), how='inner', op='intersects') port_list = np.unique(port_shp['code'].values) - print port_list if len(port_list) < self.comm.Get_size(): - raise ValueError("The chosen number of processors {0} exceeds the number of involved ports {1}.".format( + error_exit("The chosen number of processors {0} exceeds the number of involved ports {1}.".format( self.comm.Get_size(), len(port_list)) + " Set {0} at shipping_port_processors value.".format( len(port_list))) port_list = np.array_split(port_list, self.comm.Get_size()) @@ -121,8 +127,6 @@ class ShippingPortSector(Sector): port_list = self.comm.scatter(port_list, root=0) - if len(port_list) == 0: - raise ValueError("The number ") return list(port_list) def read_monthly_profiles(self, path): @@ -580,9 +584,9 @@ class ShippingPortSector(Sector): dataframe.reset_index(inplace=True) dataframe.drop(columns=['code'], inplace=True) - dataframe.to_crs(self.grid_shp.crs, inplace=True) + dataframe.to_crs(self.grid.shapefile.crs, inplace=True) dataframe['src_inter_fraction'] = dataframe.geometry.area - dataframe = self.spatial_overlays(dataframe, self.grid_shp, how='intersection') + dataframe = self.spatial_overlays(dataframe, self.grid.shapefile, how='intersection') dataframe['src_inter_fraction'] = dataframe.geometry.area / dataframe['src_inter_fraction'] dataframe[self.source_pollutants] = dataframe[self.source_pollutants].multiply(dataframe["src_inter_fraction"], @@ -609,8 +613,7 @@ class ShippingPortSector(Sector): self.logger.write_log('\t\tCalculating yearly emissions', message_level=2) manoeuvring, hoteling = self.calculate_yearly_emissions_by_port_vessel() - # print manoeuvring.reset_index().groupby('code').sum() - # print hoteling.reset_index().groupby('code').sum() + manoeuvring = self.add_timezone(manoeuvring, self.maneuvering_shapefile_path) hoteling = self.add_timezone(hoteling, self.hoteling_shapefile_path) diff --git a/hermesv3_bu/sectors/solvents_sector.py b/hermesv3_bu/sectors/solvents_sector.py new file mode 100755 index 0000000000000000000000000000000000000000..8468fb7d9ece0e72a99452acd50e39c2ec902381 --- /dev/null +++ b/hermesv3_bu/sectors/solvents_sector.py @@ -0,0 +1,747 @@ +#!/usr/bin/env python + +import sys +import os +import timeit +import geopandas as gpd +import pandas as pd +import numpy as np +from warnings import warn +from hermesv3_bu.sectors.sector import Sector +from hermesv3_bu.io_server.io_shapefile import IoShapefile +from hermesv3_bu.io_server.io_raster import IoRaster +from hermesv3_bu.tools.checker import check_files, error_exit +from pandas import DataFrame +from geopandas import GeoDataFrame +from hermesv3_bu.grids.grid import Grid +from hermesv3_bu.logger.log import Log +from hermesv3_bu.clipping.clip import Clip + +PROXY_NAMES = {'boat_building': 'boat', + 'automobile_manufacturing': 'automobile', + 'car_repairing': 'car_repair', + 'dry_cleaning': 'dry_clean', + 'rubber_processing': 'rubber', + 'paints_manufacturing': 'paints', + 'inks_manufacturing': 'ink', + 'glues_manufacturing': 'glues', + 'pharmaceutical_products_manufacturing': 'pharma', + 'leather_taning': 'leather', + 'printing': 'printing', + } + + +class SolventsSector(Sector): + """ + Solvents sector allows to calculate the solvents emissions. + + It first calculates the horizontal distribution for the different sources and store them in an auxiliary file + during the initialization part. + + Once the initialization is finished it distribute the emissions of the different sub sectors ont he grid to start + the temporal disaggregation. + """ + def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, + speciation_map_path, molecular_weights_path, speciation_profiles_path, monthly_profile_path, + weekly_profile_path, hourly_profile_path, proxies_map_path, yearly_emissions_by_nut2_path, + point_sources_shapefile_path, point_sources_weight_by_nut2_path, population_raster_path, + population_nuts2_path, land_uses_raster_path, land_uses_nuts2_path, nut2_shapefile_path): + """ + :param comm: Communicator for the sector calculation. + :type comm: MPI.COMM + + :param logger: Logger + :type logger: Log + + :param auxiliary_dir: Path to the directory where the necessary auxiliary files will be created if them are not + created yet. + :type auxiliary_dir: str + + :param grid: Grid object. + :type grid: Grid + + :param clip: Clip object + :type clip: Clip + + :param date_array: List of datetimes. + :type date_array: list(datetime.datetime, ...) + + :param source_pollutants: List of input pollutants to take into account. + :type source_pollutants: list + + :param vertical_levels: List of top level of each vertical layer. + :type vertical_levels: list + + :param speciation_map_path: Path to the CSV file that contains the speciation map. The CSV file must contain + the following columns [dst, src, description] + The 'dst' column will be used as output pollutant list and the 'src' column as their onw input pollutant + to be used as a fraction in the speciation profiles. + :type speciation_map_path: str + + :param molecular_weights_path: Path to the CSV file that contains all the molecular weights needed. The CSV + file must contain the 'Specie' and 'MW' columns. + :type molecular_weights_path: str + + :param speciation_profiles_path: Path to the file that contains all the speciation profiles. The CSV file + must contain the "Code" column with the value of each animal of the animal_list. The rest of columns + have to be the sames as the column 'dst' of the 'speciation_map_path' file. + :type speciation_profiles_path: str + + :param hourly_profile_path: Path to the CSV file that contains all the monthly profiles. The CSV file must + contain the following columns [P_month, January, February, ..., November, December] + The P_month code have to match with the proxies_map_path file. + :type hourly_profile_path: str + + :param weekly_profile_path: Path to the CSV file that contains all the weekly profiles. The CSV file must + contain the following columns [P_week, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday] + The P_week code have to match with the proxies_map_path file. + :type weekly_profile_path: str + + :param hourly_profile_path: Path to the CSV file that contains all the hourly profiles. The CSV file must + contain the following columns [P_hour, 0, 1, 2, 3, ..., 22, 23] + The P_week code have to match with the proxies_map_path file. + :type hourly_profile_path: str + + :param proxies_map_path: Path to the CSV file that contains the proxies map. + :type proxies_map_path: str + + :param yearly_emissions_by_nut2_path: Path to the CSV file that contains the yearly emissions by subsecotr and + nuts2 level. + :type yearly_emissions_by_nut2_path: str + + :param point_sources_shapefile_path: Path to the shapefile that contains the point sources for solvents. + :type point_sources_shapefile_path: str + + :param point_sources_weight_by_nut2_path: Path to the CSV file that contains the weight for each proxy and nut2. + :type point_sources_weight_by_nut2_path: str + + :param population_raster_path: Path to the population raster. + :type population_raster_path: str + + :param population_nuts2_path: Path to the CSV file that contains the amount of population for each nut2. + :type population_nuts2_path: str + + :param land_uses_raster_path: Path to the land use raster. + :type land_uses_raster_path: str + + :param land_uses_nuts2_path: Path to the CSV file that contains the amount of land use for each nut2. + :type land_uses_nuts2_path: str + + :param nut2_shapefile_path: Path to the shapefile that contains the nut2. + :type nut2_shapefile_path: str + """ + spent_time = timeit.default_timer() + logger.write_log('===== SOLVENTS SECTOR =====') + + check_files([speciation_map_path, molecular_weights_path, speciation_profiles_path, monthly_profile_path, + weekly_profile_path, hourly_profile_path, proxies_map_path, yearly_emissions_by_nut2_path, + point_sources_shapefile_path, point_sources_weight_by_nut2_path, population_raster_path, + population_nuts2_path, land_uses_raster_path, land_uses_nuts2_path, nut2_shapefile_path]) + + super(SolventsSector, self).__init__( + comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, + monthly_profile_path, weekly_profile_path, hourly_profile_path, speciation_map_path, + speciation_profiles_path, molecular_weights_path) + + self.proxies_map = self.read_proxies(proxies_map_path) + self.check_profiles() + + self.proxy = self.get_proxy_shapefile( + population_raster_path, population_nuts2_path, land_uses_raster_path, land_uses_nuts2_path, + nut2_shapefile_path, point_sources_shapefile_path, point_sources_weight_by_nut2_path) + + self.yearly_emissions_path = yearly_emissions_by_nut2_path + self.logger.write_time_log('SolventsSector', '__init__', timeit.default_timer() - spent_time) + + def read_proxies(self, path): + """ + Read the proxy map. + + It will filter the CONS == '1' snaps and add the 'spatial_proxy' column that the content will match with some + column of the proxy shapefile. + + :param path: path to the CSV file that have the proxy map. + :type path: str + + :return: Proxy map. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + proxies_df = pd.read_csv(path, dtype=str) + + proxies_df.set_index('snap', inplace=True) + proxies_df = proxies_df.loc[proxies_df['CONS'] == '1'] + proxies_df.drop(columns=['activity', 'CONS', 'gnfr'], inplace=True) + + proxies_df.loc[proxies_df['spatial_proxy'] == 'population', 'proxy_name'] = 'population' + proxies_df.loc[proxies_df['spatial_proxy'] == 'land_use', 'proxy_name'] = \ + 'lu_' + proxies_df['land_use_code'].replace(' ', '_', regex=True) + proxies_df.loc[proxies_df['spatial_proxy'] == 'shapefile', 'proxy_name'] = \ + proxies_df['industry_code'].map(PROXY_NAMES) + + self.logger.write_time_log('SolventsSector', 'read_proxies', timeit.default_timer() - spent_time) + return proxies_df + + def check_profiles(self): + """ + Check that the profiles appear on the profile files. + + It will check the content of the proxies map. + Check that the 'P_month' content appears on the monthly profiles. + Check that the 'P_week' content appears on the weekly profiles. + Check that the 'P_hour' content appears on the hourly profiles. + Check that the 'P_spec' content appears on the speciation profiles. + + It will stop teh execution if the requirements are not satisfied. + + :return: True when everything is OK. + :rtype: bool + """ + spent_time = timeit.default_timer() + # Checking monthly profiles IDs + links_month = set(np.unique(self.proxies_map['P_month'].dropna().values)) + month = set(self.monthly_profiles.index.values) + month_res = links_month - month + if len(month_res) > 0: + error_exit("The following monthly profile IDs reported in the solvent proxies CSV file do not appear " + + "in the monthly profiles file. {0}".format(month_res)) + # Checking weekly profiles IDs + links_week = set(np.unique(self.proxies_map['P_week'].dropna().values)) + week = set(self.weekly_profiles.index.values) + week_res = links_week - week + if len(week_res) > 0: + error_exit("The following weekly profile IDs reported in the solvent proxies CSV file do not appear " + + "in the weekly profiles file. {0}".format(week_res)) + # Checking hourly profiles IDs + links_hour = set(np.unique(self.proxies_map['P_hour'].dropna().values)) + hour = set(self.hourly_profiles.index.values) + hour_res = links_hour - hour + if len(hour_res) > 0: + error_exit("The following hourly profile IDs reported in the solvent proxies CSV file do not appear " + + "in the hourly profiles file. {0}".format(hour_res)) + # Checking speciation profiles IDs + links_spec = set(np.unique(self.proxies_map['P_spec'].dropna().values)) + spec = set(self.speciation_profile.index.values) + spec_res = links_spec - spec + if len(spec_res) > 0: + error_exit("The following speciation profile IDs reported in the solvent proxies CSV file do not appear " + + "in the speciation profiles file. {0}".format(spec_res)) + + self.logger.write_time_log('SolventsSector', 'check_profiles', timeit.default_timer() - spent_time) + return True + + def read_yearly_emissions(self, path, nut_list): + """ + Read the yearly emission by snap and nuts2. + + Select only the nuts2 IDs that appear in the selected domain. + + Emissions are provided in T/year -> g/year + + :param path: Path to the CSV file that contains the yearly emissions by snap and nuts2. + :type path: str + + :param nut_list: List of nut codes + :type nut_list: list + + :return: Dataframe with thew amount of NMVOC for each snap and nut2 + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + year_emis = pd.read_csv(path, dtype={'nuts2_id': int, 'snap': str, 'nmvoc': np.float64}) + # T/year -> g/year + year_emis['nmvoc'] = year_emis['nmvoc'] * 1000000 + year_emis = year_emis[year_emis['nuts2_id'].isin(nut_list)] + year_emis.set_index(['nuts2_id', 'snap'], inplace=True) + year_emis.drop(columns=['gnfr_description', 'gnfr', 'snap_description', 'nuts2_na'], inplace=True) + + self.logger.write_time_log('SolventsSector', 'read_yearly_emissions', timeit.default_timer() - spent_time) + return year_emis + + def get_population_by_nut2(self, path): + """ + Read the CSV file that contains the amount of population by nut2. + + :param path: Path to the CSV file that contains the amount of population by nut2. + :type path: str + + :return: Dataframe with the amount of population by nut2. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + pop_by_nut2 = pd.read_csv(path) + pop_by_nut2.set_index('nuts2_id', inplace=True) + pop_by_nut2 = pop_by_nut2.to_dict()['pop'] + + self.logger.write_time_log('SolventsSector', 'get_pop_by_nut2', timeit.default_timer() - spent_time) + return pop_by_nut2 + + def get_point_sources_weights_by_nut2(self, path, proxy_name): + """ + Read the CSV file that contains the amount of weight by industry and nut2. + + :param path: Path to the CSV file that contains the amount of weight by industry and nut2. + :type path: str + + :param proxy_name: Proxy to calculate. + :type proxy_name: str + + :return: DataFrame with the amount of weight by industry and nut2. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + weights_by_nut2 = pd.read_csv(path) + weights_by_nut2['nuts2_id'] = weights_by_nut2['nuts2_id'].astype(int) + weights_by_nut2 = weights_by_nut2[weights_by_nut2['industry_c'] == proxy_name] + weights_by_nut2.drop(columns=['industry_c'], inplace=True) + weights_by_nut2.set_index("nuts2_id", inplace=True) + weights_by_nut2 = weights_by_nut2.to_dict()['weight'] + + self.logger.write_time_log('SolventsSector', 'get_point_sources_weights_by_nut2', + timeit.default_timer() - spent_time) + return weights_by_nut2 + + def get_land_use_by_nut2(self, path, land_uses, nut_codes): + """ + Read the CSV file that contains the amount of land use by nut2. + + :param path: Path to the CSV file that contains the amount of land use by nut2. + :type path: str + + :param land_uses: List of land uses to take into account. + :type land_uses: list + + :param nut_codes: List of nut2 codes to take into account. + :type nut_codes: list + + :return: DataFrame with the amount of land use by nut2. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + + land_use_by_nut2 = pd.read_csv(path) + land_use_by_nut2 = land_use_by_nut2[land_use_by_nut2['nuts2_id'].isin(nut_codes)] + land_use_by_nut2 = land_use_by_nut2[land_use_by_nut2['land_use'].isin(land_uses)] + land_use_by_nut2.set_index(['nuts2_id', 'land_use'], inplace=True) + + self.logger.write_time_log('SolventsSector', 'get_land_use_by_nut2', timeit.default_timer() - spent_time) + return land_use_by_nut2 + + def get_population_proxy(self, pop_raster_path, pop_by_nut2_path, nut2_shapefile_path): + """ + Calculate the distribution based on the amount of population. + + :param pop_raster_path: Path to the raster file that contains the population information. + :type pop_raster_path: str + + :param pop_by_nut2_path: Path to the CSV file that contains the amount of population by nut2. + :type pop_by_nut2_path: str + + :param nut2_shapefile_path: Path to the shapefile that contains the nut2. + :type nut2_shapefile_path: str + + :return: GeoDataFrame with the population distribution by destiny cell. + :rtype: GeoDataFrame + """ + spent_time = timeit.default_timer() + + # 1st Clip the raster + self.logger.write_log("\t\tCreating clipped population raster", message_level=3) + if self.comm.Get_rank() == 0: + pop_raster_path = IoRaster(self.comm).clip_raster_with_shapefile_poly( + pop_raster_path, self.clip.shapefile, os.path.join(self.auxiliary_dir, 'solvents', 'pop.tif')) + + # 2nd Raster to shapefile + self.logger.write_log("\t\tRaster to shapefile", message_level=3) + pop_shp = IoRaster(self.comm).to_shapefile_parallel( + pop_raster_path, gather=False, bcast=False, crs={'init': 'epsg:4326'}) + + # 3rd Add NUT code + self.logger.write_log("\t\tAdding nut codes to the shapefile", message_level=3) + # if self.comm.Get_rank() == 0: + pop_shp.drop(columns='CELL_ID', inplace=True) + pop_shp.rename(columns={'data': 'population'}, inplace=True) + pop_shp = self.add_nut_code(pop_shp, nut2_shapefile_path, nut_value='nuts2_id') + pop_shp = pop_shp[pop_shp['nut_code'] != -999] + pop_shp = IoShapefile(self.comm).balance(pop_shp) + # pop_shp = IoShapefile(self.comm).split_shapefile(pop_shp) + + # 4th Calculate population percent + self.logger.write_log("\t\tCalculating population percentage on source resolution", message_level=3) + pop_by_nut2 = self.get_population_by_nut2(pop_by_nut2_path) + pop_shp['tot_pop'] = pop_shp['nut_code'].map(pop_by_nut2) + pop_shp['pop_percent'] = pop_shp['population'] / pop_shp['tot_pop'] + pop_shp.drop(columns=['tot_pop', 'population'], inplace=True) + + # 5th Calculate percent by destiny cell + self.logger.write_log("\t\tCalculating population percentage on destiny resolution", message_level=3) + pop_shp.to_crs(self.grid.shapefile.crs, inplace=True) + pop_shp['src_inter_fraction'] = pop_shp.geometry.area + pop_shp = self.spatial_overlays(pop_shp.reset_index(), self.grid.shapefile.reset_index()) + pop_shp.drop(columns=['idx1', 'idx2', 'index'], inplace=True) + pop_shp['src_inter_fraction'] = pop_shp.geometry.area / pop_shp['src_inter_fraction'] + pop_shp['pop_percent'] = pop_shp['pop_percent'] * pop_shp['src_inter_fraction'] + pop_shp.drop(columns=['src_inter_fraction'], inplace=True) + + popu_dist = pop_shp.groupby(['FID', 'nut_code']).sum() + popu_dist.rename(columns={'pop_percent': 'population'}, inplace=True) + + self.logger.write_time_log('SolventsSector', 'get_population_proxie', timeit.default_timer() - spent_time) + return popu_dist + + def get_land_use_proxy(self, land_use_raster, land_use_by_nut2_path, land_uses, nut2_shapefile_path): + """ + Calculate the distribution based on the amount of land use. + + :param land_use_raster: Path to the raster file that contains the land use information. + :type land_use_raster: str + + :param land_use_by_nut2_path: Path to the CSV file that contains the amount of land use by nut2. + :type land_use_by_nut2_path: str + + :param land_uses: List of land uses to take into account on the distribution. + :type land_uses: list + + :param nut2_shapefile_path: Path to the shapefile that contains the nut2. + :type nut2_shapefile_path: str + + :return: GeoDataFrame with the land use distribution for the selected land uses by destiny cell. + :rtype: GeoDataFrame + """ + spent_time = timeit.default_timer() + # 1st Clip the raster + self.logger.write_log("\t\tCreating clipped land use raster", message_level=3) + lu_raster_path = os.path.join(self.auxiliary_dir, 'solvents', 'lu_{0}.tif'.format( + '_'.join([str(x) for x in land_uses]))) + + if self.comm.Get_rank() == 0: + if not os.path.exists(lu_raster_path): + lu_raster_path = IoRaster(self.comm).clip_raster_with_shapefile_poly( + land_use_raster, self.clip.shapefile, lu_raster_path, values=land_uses) + + # 2nd Raster to shapefile + self.logger.write_log("\t\tRaster to shapefile", message_level=3) + land_use_shp = IoRaster(self.comm).to_shapefile_parallel(lu_raster_path, gather=False, bcast=False) + + # 3rd Add NUT code + self.logger.write_log("\t\tAdding nut codes to the shapefile", message_level=3) + # if self.comm.Get_rank() == 0: + land_use_shp.drop(columns='CELL_ID', inplace=True) + land_use_shp.rename(columns={'data': 'land_use'}, inplace=True) + land_use_shp = self.add_nut_code(land_use_shp, nut2_shapefile_path, nut_value='nuts2_id') + land_use_shp = land_use_shp[land_use_shp['nut_code'] != -999] + land_use_shp = IoShapefile(self.comm).balance(land_use_shp) + # land_use_shp = IoShapefile(self.comm).split_shapefile(land_use_shp) + + # 4th Calculate land_use percent + self.logger.write_log("\t\tCalculating land use percentage on source resolution", message_level=3) + + land_use_shp['area'] = land_use_shp.geometry.area + land_use_by_nut2 = self.get_land_use_by_nut2( + land_use_by_nut2_path, land_uses, np.unique(land_use_shp['nut_code'])) + land_use_shp.drop(columns=['land_use'], inplace=True) + + land_use_shp['fraction'] = land_use_shp.apply( + lambda row: row['area'] / land_use_by_nut2.xs(row['nut_code'], level='nuts2_id').sum(), axis=1) + land_use_shp.drop(columns='area', inplace=True) + + # 5th Calculate percent by dest_cell + self.logger.write_log("\t\tCalculating land use percentage on destiny resolution", message_level=3) + + land_use_shp.to_crs(self.grid.shapefile.crs, inplace=True) + land_use_shp['src_inter_fraction'] = land_use_shp.geometry.area + land_use_shp = self.spatial_overlays(land_use_shp.reset_index(), self.grid.shapefile.reset_index()) + land_use_shp.drop(columns=['idx1', 'idx2', 'index'], inplace=True) + land_use_shp['src_inter_fraction'] = land_use_shp.geometry.area / land_use_shp['src_inter_fraction'] + land_use_shp['fraction'] = land_use_shp['fraction'] * land_use_shp['src_inter_fraction'] + land_use_shp.drop(columns=['src_inter_fraction'], inplace=True) + + land_use_dist = land_use_shp.groupby(['FID', 'nut_code']).sum() + land_use_dist.rename(columns={'fraction': 'lu_{0}'.format('_'.join([str(x) for x in land_uses]))}, inplace=True) + + self.logger.write_time_log('SolventsSector', 'get_land_use_proxy', timeit.default_timer() - spent_time) + return land_use_dist + + def get_point_shapefile_proxy(self, proxy_name, point_shapefile_path, point_sources_weight_by_nut2_path, + nut2_shapefile_path): + """ + Calculate the distribution for the solvent sub sector in the destiny grid cell. + + :param proxy_name: Name of the proxy to be calculated. + :type proxy_name: str + + :param point_shapefile_path: Path to the shapefile that contains all the point sources ant their weights. + :type point_shapefile_path: str + + :param point_sources_weight_by_nut2_path: Path to the CSV file that contains the amount of weight by industry + and nut2. + :type point_sources_weight_by_nut2_path: str + + :param nut2_shapefile_path: Path to the shapefile that contains the nut2. + :type nut2_shapefile_path: str + + :return: GeoDataFrame with the distribution of the selected proxy on the destiny grid cells. + :rtype: GeoDataFrame + """ + spent_time = timeit.default_timer() + + point_shapefile = IoShapefile(self.comm).read_shapefile_parallel(point_shapefile_path) + point_shapefile.drop(columns=['Empresa', 'Empleados', 'Ingresos', 'Consumos', 'LON', 'LAT'], inplace=True) + point_shapefile = point_shapefile[point_shapefile['industry_c'] == + [key for key, value in PROXY_NAMES.items() if value == proxy_name][0]] + point_shapefile = IoShapefile(self.comm).balance(point_shapefile) + point_shapefile.drop(columns=['industry_c'], inplace=True) + point_shapefile = self.add_nut_code(point_shapefile, nut2_shapefile_path, nut_value='nuts2_id') + point_shapefile = point_shapefile[point_shapefile['nut_code'] != -999] + + point_shapefile = IoShapefile(self.comm).gather_shapefile(point_shapefile, rank=0) + if self.comm.Get_rank() == 0: + weight_by_nut2 = self.get_point_sources_weights_by_nut2( + point_sources_weight_by_nut2_path, + [key for key, value in PROXY_NAMES.items() if value == proxy_name][0]) + point_shapefile[proxy_name] = point_shapefile.apply( + lambda row: row['weight'] / weight_by_nut2[row['nut_code']], axis=1) + point_shapefile.drop(columns=['weight'], inplace=True) + # print(point_shapefile.groupby('nut_code')['weight'].sum()) + + point_shapefile = IoShapefile(self.comm).split_shapefile(point_shapefile) + point_shapefile = gpd.sjoin(point_shapefile.to_crs(self.grid.shapefile.crs), self.grid.shapefile.reset_index()) + point_shapefile.drop(columns=['geometry', 'index_right'], inplace=True) + point_shapefile = point_shapefile.groupby(['FID', 'nut_code']).sum() + + self.logger.write_time_log('SolventsSector', 'get_point_shapefile_proxy', timeit.default_timer() - spent_time) + return point_shapefile + + def get_proxy_shapefile(self, population_raster_path, population_nuts2_path, land_uses_raster_path, + land_uses_nuts2_path, nut2_shapefile_path, point_sources_shapefile_path, + point_sources_weight_by_nut2_path): + """ + Calcualte (or read) the proxy shapefile. + + It will split the entire shapoefile into as many processors as selected to split the calculation part. + + :param population_raster_path: Path to the raster file that contains the population information. + :type population_raster_path: str + + :param population_nuts2_path: Path to the CSV file that contains the amount of population by nut2. + :type population_nuts2_path: str + + :param land_uses_raster_path: Path to the raster file that contains the land use information. + :type land_uses_raster_path: str + + :param land_uses_nuts2_path: Path to the CSV file that contains the amount of land use by nut2. + :type land_uses_nuts2_path: str + + :param nut2_shapefile_path: Path to the shapefile that contains the nut2. + :type nut2_shapefile_path: str + + :param point_sources_shapefile_path: Path to the shapefile that contains all the point sources ant their + weights. + :type point_sources_shapefile_path: str + + :param point_sources_weight_by_nut2_path: Path to the CSV file that contains the amount of weight by industry + and nut2. + :type point_sources_weight_by_nut2_path: str + + :return: GeoDataFrame with all the proxies + :rtype: GeoDataFrame + """ + spent_time = timeit.default_timer() + + self.logger.write_log("Getting proxies shapefile", message_level=1) + proxy_names_list = np.unique(self.proxies_map['proxy_name']) + proxy_path = os.path.join(self.auxiliary_dir, 'solvents', 'proxy_distributions.shp') + if not os.path.exists(proxy_path): + proxy_list = [] + for proxy_name in proxy_names_list: + self.logger.write_log("\tGetting proxy for {0}".format(proxy_name), message_level=2) + if proxy_name == 'population': + proxy = self.get_population_proxy(population_raster_path, population_nuts2_path, + nut2_shapefile_path) + elif proxy_name[:3] == 'lu_': + land_uses = [int(x) for x in proxy_name[3:].split('_')] + + proxy = self.get_land_use_proxy(land_uses_raster_path, land_uses_nuts2_path, land_uses, + nut2_shapefile_path) + else: + proxy = self.get_point_shapefile_proxy(proxy_name, point_sources_shapefile_path, + point_sources_weight_by_nut2_path, nut2_shapefile_path) + proxy = IoShapefile(self.comm).gather_shapefile(proxy.reset_index()) + if self.comm.Get_rank() == 0: + proxy_list.append(proxy) + if self.comm.Get_rank() == 0: + proxies = pd.concat(proxy_list, sort=False) + proxies['FID'] = proxies['FID'].astype(int) + proxies['nut_code'] = proxies['nut_code'].astype(int) + proxies = proxies.groupby(['FID', 'nut_code']).sum() + proxies = GeoDataFrame(proxies) + # print(self.grid.shapefile.loc[proxies.index.get_level_values('FID'), 'geometry'].values) + # exit() + proxies = GeoDataFrame( + proxies, geometry=self.grid.shapefile.loc[proxies.index.get_level_values('FID'), 'geometry'].values, + crs=self.grid.shapefile.crs) + IoShapefile(self.comm).write_shapefile_serial(proxies.reset_index(), proxy_path) + else: + proxies = None + else: + if self.comm.Get_rank() == 0: + proxies = IoShapefile(self.comm).read_shapefile_serial(proxy_path) + proxies.set_index(['FID', 'nut_code'], inplace=True) + else: + proxies = None + proxies = IoShapefile(self.comm).split_shapefile(proxies) + + self.logger.write_time_log('SolventsSector', 'get_proxy_shapefile', timeit.default_timer() - spent_time) + return proxies + + def calculate_hourly_emissions(self, yearly_emissions): + """ + Disaggrate to hourly level the yearly emissions. + + :param yearly_emissions: GeoDataFrame with the yearly emissions by destiny cell ID and snap code. + :type yearly_emissions: GeoDataFrame + + :return: GeoDataFrame with the hourly distribution by FID, snap code and time step. + :rtype: GeoDataFrame + """ + def get_mf(df): + month_factor = self.monthly_profiles.loc[df.name[1], df.name[0]] + + df['MF'] = month_factor + return df.loc[:, ['MF']] + + def get_wf(df): + weekly_profile = self.calculate_rebalanced_weekly_profile(self.weekly_profiles.loc[df.name[1], :].to_dict(), + df.name[0]) + df['WF'] = weekly_profile[df.name[0].weekday()] + return df.loc[:, ['WF']] + + def get_hf(df): + hourly_profile = self.hourly_profiles.loc[df.name[1], :].to_dict() + hour_factor = hourly_profile[df.name[0]] + + df['HF'] = hour_factor + return df.loc[:, ['HF']] + + spent_time = timeit.default_timer() + + self.logger.write_log('\tHourly disaggregation', message_level=2) + emissions = self.add_dates(yearly_emissions, drop_utc=True) + + emissions['month'] = emissions['date'].dt.month + emissions['weekday'] = emissions['date'].dt.weekday + emissions['hour'] = emissions['date'].dt.hour + emissions['date_as_date'] = emissions['date'].dt.date + + emissions['MF'] = emissions.groupby(['month', 'P_month']).apply(get_mf) + emissions['WF'] = emissions.groupby(['date_as_date', 'P_week']).apply(get_wf) + emissions['HF'] = emissions.groupby(['hour', 'P_hour']).apply(get_hf) + + emissions['temp_factor'] = emissions['MF'] * emissions['WF'] * emissions['HF'] + emissions.drop(columns=['MF', 'P_month', 'month', 'WF', 'P_week', 'weekday', 'HF', 'P_hour', 'hour', 'date', + 'date_as_date'], inplace=True) + emissions['nmvoc'] = emissions['nmvoc'] * emissions['temp_factor'] + emissions.drop(columns=['temp_factor'], inplace=True) + emissions.set_index(['FID', 'snap', 'tstep'], inplace=True) + + self.logger.write_time_log('SolventsSector', 'calculate_hourly_emissions', timeit.default_timer() - spent_time) + return emissions + + def distribute_yearly_emissions(self): + """ + Calcualte the yearly emission by destiny grid cell and snap code. + + :return: GeoDataFrame with the yearly emissions by snap code. + :rtype: GeoDataFrame + """ + spent_time = timeit.default_timer() + self.logger.write_log('\t\tYearly distribution', message_level=2) + + yearly_emis = self.read_yearly_emissions( + self.yearly_emissions_path, np.unique(self.proxy.index.get_level_values('nut_code'))) + year_nuts = np.unique(yearly_emis.index.get_level_values('nuts2_id')) + proxy_nuts = np.unique(self.proxy.index.get_level_values('nut_code')) + unknow_nuts = list(set(proxy_nuts) - set(year_nuts)) + if len(unknow_nuts) > 0: + warn("*WARNING* The {0} nuts2_id have no emissions in the solvents_yearly_emissions_by_nut2_path.".format( + str(unknow_nuts))) + self.proxy.drop(unknow_nuts, level='nut_code', inplace=True) + emis_list = [] + for snap, snap_df in self.proxies_map.iterrows(): + emis = self.proxy.reset_index() + emis['snap'] = snap + emis['P_month'] = snap_df['P_month'] + emis['P_week'] = snap_df['P_week'] + emis['P_hour'] = snap_df['P_hour'] + emis['P_spec'] = snap_df['P_spec'] + + emis['nmvoc'] = emis.apply(lambda row: yearly_emis.loc[(row['nut_code'], snap), 'nmvoc'] * row[ + self.proxies_map.loc[snap, 'proxy_name']], axis=1) + + emis.set_index(['FID', 'snap'], inplace=True) + emis_list.append(emis[['P_month', 'P_week', 'P_hour', 'P_spec', 'nmvoc', 'geometry']]) + emis = pd.concat(emis_list).sort_index() + emis = emis[emis['nmvoc'] > 0] + + self.logger.write_time_log('SolventsSector', 'distribute_yearly_emissions', timeit.default_timer() - spent_time) + return emis + + def speciate(self, dataframe, code='default'): + """ + Spectiate the NMVOC pollutant into as many pollutants as the speciation map indicates. + + :param dataframe: Emissions to be speciated. + :type dataframe: DataFrame + + :param code: NOt used. + + :return: Speciated emissions. + :rtype: DataFrame + """ + + def calculate_new_pollutant(x, out_p): + sys.stdout.flush() + profile = self.speciation_profile.loc[x.name, ['VOCtoTOG', out_p]] + x[out_p] = x['nmvoc'] * (profile['VOCtoTOG'] * profile[out_p]) + return x[[out_p]] + + spent_time = timeit.default_timer() + self.logger.write_log('\tSpeciation emissions', message_level=2) + + new_dataframe = gpd.GeoDataFrame(index=dataframe.index, data=None, crs=dataframe.crs, + geometry=dataframe.geometry) + for out_pollutant in self.output_pollutants: + self.logger.write_log('\t\tSpeciating {0}'.format(out_pollutant), message_level=3) + new_dataframe[out_pollutant] = dataframe.groupby('P_spec').apply( + lambda x: calculate_new_pollutant(x, out_pollutant)) + new_dataframe.reset_index(inplace=True) + + new_dataframe.drop(columns=['snap', 'geometry'], inplace=True) + new_dataframe.set_index(['FID', 'tstep'], inplace=True) + + self.logger.write_time_log('SolventsSector', 'speciate', timeit.default_timer() - spent_time) + return new_dataframe + + def calculate_emissions(self): + """ + Main function to calculate the emissions. + + :return: Solvent emissions. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() + self.logger.write_log('\tCalculating emissions') + + emissions = self.distribute_yearly_emissions() + emissions = self.calculate_hourly_emissions(emissions) + emissions = self.speciate(emissions) + + emissions.reset_index(inplace=True) + emissions['layer'] = 0 + emissions = emissions.groupby(['FID', 'layer', 'tstep']).sum() + + self.logger.write_time_log('SolventsSector', 'calculate_emissions', timeit.default_timer() - spent_time) + return emissions diff --git a/hermesv3_bu/sectors/traffic_area_sector.py b/hermesv3_bu/sectors/traffic_area_sector.py index 023f2405c502ccb6ca0b0ea0c6f7f206d77e2fb3..44e9ae0ad7b212321dcd6bd88335cb143425f712 100755 --- a/hermesv3_bu/sectors/traffic_area_sector.py +++ b/hermesv3_bu/sectors/traffic_area_sector.py @@ -7,33 +7,51 @@ import geopandas as gpd import pandas as pd import numpy as np from hermesv3_bu.sectors.sector import Sector +from hermesv3_bu.io_server.io_raster import IoRaster from hermesv3_bu.io_server.io_shapefile import IoShapefile from hermesv3_bu.io_server.io_netcdf import IoNetcdf +from hermesv3_bu.tools.checker import check_files, error_exit + +from pandas import DataFrame +from geopandas import GeoDataFrame + pmc_list = ['pmc', 'PMC'] class TrafficAreaSector(Sector): - def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, - population_tiff_path, speciation_map_path, molecular_weights_path, - do_evaporative, gasoline_path, total_pop_by_prov, nuts_shapefile, speciation_profiles_evaporative, + def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, + population_tif_path, speciation_map_path, molecular_weights_path, + do_evaporative, gasoline_path, population_nuts3, nuts_shapefile, speciation_profiles_evaporative, evaporative_ef_file, temperature_dir, do_small_cities, small_cities_shp, speciation_profiles_small_cities, small_cities_ef_file, small_cities_monthly_profile, small_cities_weekly_profile, small_cities_hourly_profile): spent_time = timeit.default_timer() logger.write_log('===== TRAFFIC AREA SECTOR =====') + if do_evaporative: + check_files([population_tif_path, speciation_map_path, molecular_weights_path, + gasoline_path, population_nuts3, nuts_shapefile, speciation_profiles_evaporative, + evaporative_ef_file, temperature_dir]) + if do_small_cities: + check_files([population_tif_path, speciation_map_path, molecular_weights_path, + small_cities_shp, speciation_profiles_small_cities, small_cities_ef_file, + small_cities_monthly_profile, small_cities_weekly_profile, small_cities_hourly_profile]) super(TrafficAreaSector, self).__init__( - comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, None, None, None, speciation_map_path, None, molecular_weights_path) self.do_evaporative = do_evaporative self.temperature_dir = temperature_dir self.speciation_profiles_evaporative = self.read_speciation_profiles(speciation_profiles_evaporative) self.evaporative_ef_file = evaporative_ef_file + + # self.create_population_by_nut(population_tif_path, nuts_shapefile, population_nuts3, nut_column='nuts3_id') + if do_evaporative: - self.evaporative = self.init_evaporative(population_tiff_path, nuts_shapefile, gasoline_path, - total_pop_by_prov) + logger.write_log('\tInitialising evaporative emissions.', message_level=2) + self.population_percent = self.get_population_percent(population_tif_path, population_nuts3, nuts_shapefile) + self.evaporative = self.init_evaporative(gasoline_path) else: self.evaporative = None @@ -44,159 +62,240 @@ class TrafficAreaSector(Sector): self.small_cities_weekly_profile = self.read_weekly_profiles(small_cities_weekly_profile) self.small_cities_hourly_profile = self.read_hourly_profiles(small_cities_hourly_profile) if do_small_cities: - self.small_cities = self.init_small_cities(population_tiff_path, small_cities_shp) + logger.write_log('\tInitialising small cities emissions.', message_level=2) + self.small_cities = self.init_small_cities(population_tif_path, small_cities_shp) else: self.small_cities = None self.logger.write_time_log('TrafficAreaSector', '__init__', timeit.default_timer() - spent_time) - def init_evaporative(self, global_path, provinces_shapefile, gasoline_path, total_pop_by_prov): - spent_time = timeit.default_timer() - - if self.comm.Get_rank() == 0: - if not os.path.exists(os.path.join(self.auxiliary_dir, 'traffic_area', 'vehicle_by_cell.shp')): - pop = self.get_clipped_population( - global_path, os.path.join(self.auxiliary_dir, 'traffic_area', 'population.shp')) - pop = self.make_population_by_nuts( - pop, provinces_shapefile, os.path.join(self.auxiliary_dir, 'traffic_area', 'pop_NUT.shp'), - write_file=False) - pop = self.make_population_by_nuts_cell( - pop, os.path.join(self.auxiliary_dir, 'traffic_area', 'pop_NUT_cell.shp')) - - veh_cell = self.make_vehicles_by_cell( - pop, gasoline_path, pd.read_csv(total_pop_by_prov), - os.path.join(self.auxiliary_dir, 'traffic_area', 'vehicle_by_cell.shp')) - else: - veh_cell = IoShapefile(self.comm).read_shapefile_serial( - os.path.join(self.auxiliary_dir, 'traffic_area', 'vehicle_by_cell.shp')) - else: - veh_cell = None - - veh_cell = IoShapefile(self.comm).split_shapefile(veh_cell) + def get_population_by_nut2(self, path): + """ + Read the CSV file that contains the amount of population by nut3. - self.logger.write_time_log('TrafficAreaSector', 'init_evaporative', timeit.default_timer() - spent_time) - return veh_cell + :param path: Path to the CSV file that contains the amount of population by nut3. + :type path: str - def init_small_cities(self, global_path, small_cities_shapefile): + :return: DataFrame with the amount of population by nut3. + :rtype: DataFrame + """ spent_time = timeit.default_timer() - if self.comm.Get_rank() == 0: - if not os.path.exists(os.path.join(self.auxiliary_dir, 'traffic_area', 'pop_SMALL_cell.shp')): - pop = self.get_clipped_population( - global_path, os.path.join(self.auxiliary_dir, 'traffic_area', 'population.shp')) - pop = self.make_population_by_nuts( - pop, small_cities_shapefile, os.path.join(self.auxiliary_dir, 'traffic_area', 'pop_SMALL.shp'), - write_file=False) - pop = self.make_population_by_nuts_cell( - pop, os.path.join(self.auxiliary_dir, 'traffic_area', 'pop_SMALL_cell.shp')) - else: - pop = IoShapefile(self.comm).read_shapefile_serial( - os.path.join(self.auxiliary_dir, 'traffic_area', 'pop_SMALL_cell.shp')) - else: - pop = None - pop = IoShapefile(self.comm).split_shapefile(pop) - self.logger.write_time_log('TrafficAreaSector', 'init_small_cities', timeit.default_timer() - spent_time) - return pop + pop_by_nut3 = pd.read_csv(path) + pop_by_nut3.set_index('nuts3_id', inplace=True) + pop_by_nut3 = pop_by_nut3.to_dict()['population'] - def get_clipped_population(self, global_path, population_shapefile_path): - from hermesv3_bu.io_server.io_raster import IoRaster - spent_time = timeit.default_timer() + self.logger.write_time_log('TrafficAreaSector', 'get_pop_by_nut3', timeit.default_timer() - spent_time) + return pop_by_nut3 - if not os.path.exists(population_shapefile_path): - population_density = IoRaster(self.comm).clip_raster_with_shapefile_poly( - global_path, self.clip.shapefile, - os.path.join(self.auxiliary_dir, 'traffic_area', 'population.tif')) - population_density = IoRaster(self.comm).to_shapefile_serie(population_density) - else: - population_density = IoShapefile(self.comm).read_shapefile_serial(population_shapefile_path) + def get_population_percent(self, pop_raster_path, pop_by_nut_path, nut_shapefile_path): + """ + Calculate the percentage based on the amount of population. + + :param pop_raster_path: Path to the raster file that contains the population information. + :type pop_raster_path: str - self.logger.write_time_log('TrafficAreaSector', 'get_clipped_population', timeit.default_timer() - spent_time) + :param pop_by_nut_path: Path to the CSV file that contains the amount of population by nut3. + :type pop_by_nut_path: str - return population_density + :param nut_shapefile_path: Path to the shapefile that contains the nut3. + :type nut_shapefile_path: str - def make_population_by_nuts(self, population_shape, nut_shp, pop_by_nut_path, write_file=True, csv_path=None, - column_id='ORDER07'): + :return: DataFrame with the population distribution by destiny cell. + :rtype: DataFrame + """ spent_time = timeit.default_timer() - if not os.path.exists(pop_by_nut_path): - nut_df = IoShapefile(self.comm).read_shapefile_serial(nut_shp) - population_shape['area_in'] = population_shape.geometry.area - df = gpd.overlay(population_shape, nut_df.to_crs(population_shape.crs), how='intersection') - df.crs = population_shape.crs - df.loc[:, 'data'] = df['data'] * (df.geometry.area / df['area_in']) - del df['area_in'] - if write_file: - IoShapefile(self.comm).write_shapefile_serial(df, pop_by_nut_path) - if csv_path is not None: - df = df.loc[:, ['data', column_id]].groupby(column_id).sum() - df.to_csv(csv_path) + pop_percent_path = os.path.join(self.auxiliary_dir, 'traffic_area', 'population_percent') + if not os.path.exists(pop_percent_path): + # 1st Clip the raster + self.logger.write_log("\t\tCreating clipped population raster", message_level=3) + if self.comm.Get_rank() == 0: + pop_raster_path = IoRaster(self.comm).clip_raster_with_shapefile_poly( + pop_raster_path, self.clip.shapefile, os.path.join(self.auxiliary_dir, 'traffic_area', 'pop.tif')) + + # 2nd Raster to shapefile + self.logger.write_log("\t\tRaster to shapefile", message_level=3) + pop_shp = IoRaster(self.comm).to_shapefile_parallel( + pop_raster_path, gather=False, bcast=False, crs={'init': 'epsg:4326'}) + + # 3rd Add NUT code + self.logger.write_log("\t\tAdding nut codes to the shapefile", message_level=3) + # if self.comm.Get_rank() == 0: + pop_shp.drop(columns='CELL_ID', inplace=True) + pop_shp.rename(columns={'data': 'population'}, inplace=True) + pop_shp = self.add_nut_code(pop_shp, nut_shapefile_path, nut_value='nuts3_id') + pop_shp = pop_shp[pop_shp['nut_code'] != -999] + pop_shp = IoShapefile(self.comm).balance(pop_shp) + + # 4th Calculate population percent + self.logger.write_log("\t\tCalculating population percentage on source resolution", message_level=3) + pop_by_nut2 = self.get_population_by_nut2(pop_by_nut_path) + pop_shp['tot_pop'] = pop_shp['nut_code'].map(pop_by_nut2) + pop_shp['pop_percent'] = pop_shp['population'] / pop_shp['tot_pop'] + pop_shp.drop(columns=['tot_pop', 'population'], inplace=True) + + # 5th Calculate percent by destiny cell + self.logger.write_log("\t\tCalculating population percentage on destiny resolution", message_level=3) + pop_shp.to_crs(self.grid.shapefile.crs, inplace=True) + pop_shp['src_inter_fraction'] = pop_shp.geometry.area + pop_shp = self.spatial_overlays(pop_shp.reset_index(), self.grid.shapefile.reset_index()) + pop_shp.drop(columns=['idx1', 'idx2', 'index'], inplace=True) + pop_shp['src_inter_fraction'] = pop_shp.geometry.area / pop_shp['src_inter_fraction'] + pop_shp['pop_percent'] = pop_shp['pop_percent'] * pop_shp['src_inter_fraction'] + pop_shp.drop(columns=['src_inter_fraction'], inplace=True) + + pop_shp = IoShapefile(self.comm).gather_shapefile(pop_shp) + if self.comm.Get_rank() == 0: + popu_dist = pop_shp.groupby(['FID', 'nut_code']).sum() + popu_dist = GeoDataFrame( + popu_dist, + geometry=self.grid.shapefile.loc[popu_dist.index.get_level_values('FID'), 'geometry'].values, + crs=self.grid.shapefile.crs) + IoShapefile(self.comm).write_shapefile_serial(popu_dist.reset_index(), pop_percent_path) + else: + popu_dist = None + popu_dist = IoShapefile(self.comm).split_shapefile(popu_dist) else: - df = IoShapefile(self.comm).read_shapefile_serial(pop_by_nut_path) + popu_dist = IoShapefile(self.comm).read_shapefile_parallel(pop_percent_path) + popu_dist.set_index(['FID', 'nut_code'], inplace=True) - self.logger.write_time_log('TrafficAreaSector', 'make_population_by_nuts', timeit.default_timer() - spent_time) - return df - - def make_population_by_nuts_cell(self, pop_by_nut, pop_nut_cell_path, write_file=True): - spent_time = timeit.default_timer() + self.logger.write_time_log('TrafficAreaSector', 'get_population_percent', timeit.default_timer() - spent_time) + return popu_dist - if not os.path.exists(pop_nut_cell_path): + def get_population(self, pop_raster_path, nut_shapefile_path): + """ + Calculate the amount of population. - pop_by_nut = pop_by_nut.to_crs(self.grid_shp.crs) + :param pop_raster_path: Path to the raster file that contains the population information. + :type pop_raster_path: str - del pop_by_nut['NAME'] - pop_by_nut['area_in'] = pop_by_nut.geometry.area + :param nut_shapefile_path: Path to the shapefile that contains the small cities. + :type nut_shapefile_path: str - # df = gpd.overlay(pop_by_nut, grid_shp, how='intersection') - df = self.spatial_overlays(pop_by_nut, self.grid_shp.reset_index(), how='intersection') + :return: DataFrame with the amount of population distribution by small city. + :rtype: DataFrame + """ + spent_time = timeit.default_timer() - df.crs = self.grid_shp.crs - df.loc[:, 'data'] = df['data'] * (df.geometry.area / df['area_in']) - del pop_by_nut['area_in'] - if write_file: - IoShapefile(self.comm).write_shapefile_serial(df, pop_nut_cell_path) + pop_path = os.path.join(self.auxiliary_dir, 'traffic_area', 'population_small') + if not os.path.exists(pop_path): + # 1st Clip the raster + self.logger.write_log("\t\tCreating clipped population raster", message_level=3) + if self.comm.Get_rank() == 0: + pop_raster_path = IoRaster(self.comm).clip_raster_with_shapefile_poly( + pop_raster_path, self.clip.shapefile, os.path.join(self.auxiliary_dir, 'traffic_area', 'pop.tif')) + + # 2nd Raster to shapefile + self.logger.write_log("\t\tRaster to shapefile", message_level=3) + pop_shp = IoRaster(self.comm).to_shapefile_parallel( + pop_raster_path, gather=False, bcast=False, crs={'init': 'epsg:4326'}) + + # 3rd Add NUT code + self.logger.write_log("\t\tAdding nut codes to the shapefile", message_level=3) + # if self.comm.Get_rank() == 0: + pop_shp.drop(columns='CELL_ID', inplace=True) + pop_shp.rename(columns={'data': 'population'}, inplace=True) + + pop_shp = self.add_nut_code(pop_shp, nut_shapefile_path, nut_value='ORDER08') + pop_shp = pop_shp[pop_shp['nut_code'] != -999] + pop_shp = IoShapefile(self.comm).balance(pop_shp) + + # 4th Calculate percent by destiny cell + self.logger.write_log("\t\tCalculating population percentage on destiny resolution", message_level=3) + pop_shp.to_crs(self.grid.shapefile.crs, inplace=True) + pop_shp['src_inter_fraction'] = pop_shp.geometry.area + pop_shp = self.spatial_overlays(pop_shp.reset_index(), self.grid.shapefile.reset_index()) + pop_shp.drop(columns=['idx1', 'idx2', 'index'], inplace=True) + pop_shp['src_inter_fraction'] = pop_shp.geometry.area / pop_shp['src_inter_fraction'] + pop_shp['population'] = pop_shp['population'] * pop_shp['src_inter_fraction'] + pop_shp.drop(columns=['src_inter_fraction', 'nut_code'], inplace=True) + + pop_shp = IoShapefile(self.comm).gather_shapefile(pop_shp) + if self.comm.Get_rank() == 0: + popu_dist = pop_shp.groupby(['FID']).sum() + popu_dist = GeoDataFrame( + popu_dist, + geometry=self.grid.shapefile.loc[popu_dist.index.get_level_values('FID'), 'geometry'].values, + crs=self.grid.shapefile.crs) + IoShapefile(self.comm).write_shapefile_serial(popu_dist.reset_index(), pop_path) + else: + popu_dist = None + popu_dist = IoShapefile(self.comm).split_shapefile(popu_dist) else: - df = IoShapefile(self.comm).read_shapefile_serial(pop_nut_cell_path) - - self.logger.write_time_log('TrafficAreaSector', 'make_population_by_nuts_cell', - timeit.default_timer() - spent_time) - return df + popu_dist = IoShapefile(self.comm).read_shapefile_parallel(pop_path) + popu_dist.set_index(['FID'], inplace=True) - def make_vehicles_by_cell(self, pop_nut_cell, gasoline_path, total_pop_by_nut, veh_by_cell_path, - column_id='ORDER07'): - spent_time = timeit.default_timer() + self.logger.write_time_log('TrafficAreaSector', 'get_population_percent', timeit.default_timer() - spent_time) + return popu_dist - if not os.path.exists(veh_by_cell_path): + def init_evaporative(self, gasoline_path): + """ + Create the gasoline vehicle by destiny cell. - total_pop_by_nut.loc[:, column_id] = total_pop_by_nut[column_id].astype(np.int16) - pop_nut_cell.loc[:, column_id] = pop_nut_cell[column_id].astype(np.int16) + :param gasoline_path: Path to the CSV file that contains the amount of vehicles by NUTS3. + :type gasoline_path: str - df = pop_nut_cell.merge(total_pop_by_nut, left_on=column_id, right_on=column_id, how='left') + :return: Shapefile with the vehicle distribution. + :rtype: GeoDataFrame + """ + spent_time = timeit.default_timer() + veh_cell_path = os.path.join(self.auxiliary_dir, 'traffic_area', 'vehicle_by_cell') + if not os.path.exists(veh_cell_path): + veh_cell = self.make_vehicles_by_cell(gasoline_path) + IoShapefile(self.comm).write_shapefile_parallel(veh_cell.reset_index(), veh_cell_path) + else: + self.logger.write_log('\t\tReading vehicle shapefile by cell.', message_level=3) + veh_cell = IoShapefile(self.comm).read_shapefile_parallel(veh_cell_path) + veh_cell.set_index('FID', inplace=True) - df['pop_percent'] = df['data_x'] / df['data_y'] - del df['data_x'], df['data_y'], df['CELL_ID'] + self.logger.write_time_log('TrafficAreaSector', 'init_evaporative', timeit.default_timer() - spent_time) + return veh_cell - gas_df = pd.read_csv(gasoline_path, index_col='COPERT_V_name').transpose() - vehicle_type_list = list(gas_df.columns.values) - gas_df.loc[:, column_id] = gas_df.index.astype(np.int16) + def init_small_cities(self, global_path, small_cities_shapefile): + spent_time = timeit.default_timer() + pop = self.get_population(global_path, small_cities_shapefile) - df = df.merge(gas_df, left_on=column_id, right_on=column_id, how='left') - for vehicle_type in vehicle_type_list: - df.loc[:, vehicle_type] = df[vehicle_type] * df['pop_percent'] + self.logger.write_time_log('TrafficAreaSector', 'init_small_cities', timeit.default_timer() - spent_time) + return pop - del df['pop_percent'], df[column_id] + def read_vehicles_by_nut(self, path): + spent_time = timeit.default_timer() - aux_df = df.loc[:, ['FID'] + vehicle_type_list].groupby('FID').sum() - aux_df.loc[:, 'FID'] = aux_df.index + vehicles_by_nut = pd.read_csv(path, index_col='COPERT_V_name') + vehicle_list = vehicles_by_nut.index.values + nut_list = vehicles_by_nut.columns.values.astype(np.int32) + vehicles_by_nut = pd.DataFrame(vehicles_by_nut.values.T, index=nut_list, columns=vehicle_list) + vehicles_by_nut.index.name = 'nuts3_id' - geom = self.grid_shp.loc[aux_df.index, 'geometry'] + self.logger.write_time_log('TrafficAreaSector', 'read_vehicles_by_nut', timeit.default_timer() - spent_time) + return vehicles_by_nut - df = gpd.GeoDataFrame(aux_df, geometry=geom, crs=pop_nut_cell.crs) - IoShapefile(self.comm).write_shapefile_serial(df, veh_by_cell_path) + def make_vehicles_by_cell(self, gasoline_path): + spent_time = timeit.default_timer() + vehicles_by_nut = self.read_vehicles_by_nut(gasoline_path) + + vehicle_list = vehicles_by_nut.columns.values + vehicle_by_cell = pd.merge(self.population_percent.reset_index(), vehicles_by_nut.reset_index(), + left_on='nut_code', right_on='nuts3_id', how='left') + vehicle_by_cell.drop(columns=['nut_code', 'nuts3_id'], inplace=True) + vehicle_by_cell[vehicle_list] = vehicle_by_cell[vehicle_list].multiply( + vehicle_by_cell['pop_percent'], axis='index') + vehicle_by_cell.drop(columns=['pop_percent'], inplace=True) + vehicle_by_cell = IoShapefile(self.comm).gather_shapefile(vehicle_by_cell, rank=0) + if self.comm.Get_rank() == 0: + vehicle_by_cell = vehicle_by_cell.groupby('FID').sum() else: - df = IoShapefile(self.comm).read_shapefile_serial(veh_by_cell_path) + vehicle_by_cell = None + vehicle_by_cell = IoShapefile(self.comm).split_shapefile(vehicle_by_cell) + + vehicle_by_cell = GeoDataFrame( + vehicle_by_cell, + geometry=self.grid.shapefile.loc[vehicle_by_cell.index.get_level_values('FID'), 'geometry'].values, + crs=self.grid.shapefile.crs) self.logger.write_time_log('TrafficAreaSector', 'make_vehicles_by_cell', timeit.default_timer() - spent_time) - return df + return vehicle_by_cell def get_profiles_from_temperature(self, temperature, default=False): spent_time = timeit.default_timer() @@ -207,11 +306,11 @@ class TrafficAreaSector(Sector): [0.025, 0.025, 0.025, 0.025, 0.025, 0.027083, 0.03125, 0.0375, 0.045833, 0.05625, 0.060417, 0.066667, 0.06875, 0.072917, 0.070833, 0.064583, 0.05625, 0.045833, 0.0375, 0.03125, 0.027083, 0.025, 0.025, 0.025]) - for x in xrange(24): + for x in range(24): temperature['t_{0}'.format(x)] = default_profile[x] else: - temp_list = ['t_{0}'.format(x) for x in xrange(24)] + temp_list = ['t_{0}'.format(x) for x in range(24)] temperature.loc[:, temp_list] = temperature[temp_list] + 273.15 temperature.loc[:, temp_list] = temperature[temp_list].subtract(temperature[temp_list].min(axis=1), axis=0) @@ -232,6 +331,7 @@ class TrafficAreaSector(Sector): def calculate_evaporative_emissions(self): spent_time = timeit.default_timer() + self.evaporative.reset_index(inplace=True) veh_list = list(self.evaporative.columns.values) veh_list.remove('FID') veh_list.remove('geometry') @@ -250,13 +350,13 @@ class TrafficAreaSector(Sector): temperature = IoNetcdf(self.comm).get_hourly_data_from_netcdf( self.evaporative['c_lon'].min(), self.evaporative['c_lon'].max(), self.evaporative['c_lat'].min(), self.evaporative['c_lat'].max(), self.temperature_dir, 'tas', self.date_array) - temperature.rename(columns={x: 't_{0}'.format(x) for x in xrange(len(self.date_array))}, inplace=True) + temperature.rename(columns={x: 't_{0}'.format(x) for x in range(len(self.date_array))}, inplace=True) # From Kelvin to Celsius degrees - temperature.loc[:, ['t_{0}'.format(x) for x in xrange(len(self.date_array))]] = \ - temperature.loc[:, ['t_{0}'.format(x) for x in xrange(len(self.date_array))]] - 273.15 + temperature.loc[:, ['t_{0}'.format(x) for x in range(len(self.date_array))]] = \ + temperature.loc[:, ['t_{0}'.format(x) for x in range(len(self.date_array))]] - 273.15 temperature_mean = gpd.GeoDataFrame(temperature[['t_{0}'.format(x) for x in - xrange(len(self.date_array))]].mean(axis=1), + range(len(self.date_array))]].mean(axis=1), columns=['temp'], geometry=temperature.geometry) temperature_mean['REC'] = temperature['REC'] @@ -265,8 +365,8 @@ class TrafficAreaSector(Sector): df1=self.evaporative, df2=temperature_mean, geom1_col='centroid', src_column='REC', axis=1) del self.evaporative['c_lat'], self.evaporative['c_lon'], self.evaporative['centroid'] - IoShapefile(self.comm).write_shapefile_serial( - self.evaporative, os.path.join(self.auxiliary_dir, 'traffic_area', 'vehicle_by_cell.shp')) + IoShapefile(self.comm).write_shapefile_parallel( + self.evaporative, os.path.join(self.auxiliary_dir, 'traffic_area', 'vehicle_by_cell')) else: del self.evaporative['c_lat'], self.evaporative['c_lon'], self.evaporative['centroid'] @@ -324,7 +424,7 @@ class TrafficAreaSector(Sector): spent_time = timeit.default_timer() speciated_df = self.evaporative.drop(columns=['nmvoc']) - out_p_list = [out_p for out_p, in_p_aux in self.speciation_map.iteritems() if in_p_aux == 'nmvoc'] + out_p_list = [out_p for out_p, in_p_aux in self.speciation_map.items() if in_p_aux == 'nmvoc'] for p in out_p_list: # From g/day to mol/day @@ -333,22 +433,18 @@ class TrafficAreaSector(Sector): self.logger.write_time_log('TrafficAreaSector', 'speciate_evaporative', timeit.default_timer() - spent_time) return speciated_df - def small_cities_emissions_by_population(self, df): + def small_cities_emissions_by_population(self, pop_by_cell): spent_time = timeit.default_timer() - df = df.loc[:, ['data', 'FID']].groupby('FID').sum() - # print pop_nut_cell ef_df = pd.read_csv(self.small_cities_ef_file, sep=',') - # print ef_df ef_df.drop(['Code', 'Copert_V_name'], axis=1, inplace=True) for pollutant in ef_df.columns.values: - # print ef_df[pollutant].iloc[0] - df[pollutant] = df['data'] * ef_df[pollutant].iloc[0] - df.drop('data', axis=1, inplace=True) + pop_by_cell[pollutant] = pop_by_cell['population'] * ef_df[pollutant].iloc[0] + pop_by_cell.drop(columns=['population'], inplace=True) self.logger.write_time_log('TrafficAreaSector', 'small_cities_emissions_by_population', timeit.default_timer() - spent_time) - return df + return pop_by_cell def add_timezones(self, grid, default=False): from timezonefinder import TimezoneFinder @@ -364,7 +460,6 @@ class TrafficAreaSector(Sector): inc = 1 while len(grid.loc[grid['timezone'] == '', :]) > 0: - print len(grid.loc[grid['timezone'] == '', :]) grid.loc[grid['timezone'] == '', 'timezone'] = aux_grid.loc[grid['timezone'] == '', :].apply( lambda x: tz.closest_timezone_at(lng=x['lons'], lat=x['lats'], delta_degree=inc), axis=1) inc += 1 @@ -378,21 +473,19 @@ class TrafficAreaSector(Sector): p_names = small_cities.columns.values - aux_grid = self.grid_shp.loc[self.grid_shp['FID'].isin(small_cities.index.values), :] + aux_grid = self.grid.shapefile.loc[small_cities.index.values, :].reset_index().copy() aux_grid = self.add_timezone(aux_grid) aux_grid.set_index('FID', inplace=True) small_cities = small_cities.merge(aux_grid.loc[:, ['timezone']], left_index=True, right_index=True, how='left') - small_cities.loc[:, 'utc'] = self.date_array[0] + small_cities['utc'] = self.date_array[0] small_cities['date'] = small_cities.groupby('timezone')['utc'].apply( lambda x: pd.to_datetime(x).dt.tz_localize(pytz.utc).dt.tz_convert(x.name).dt.tz_localize(None)) - small_cities.drop(['utc', 'timezone'], inplace=True, axis=1) - # print small_cities - + small_cities.drop(columns=['utc', 'timezone'], inplace=True) df_list = [] - for tstep in xrange(len(self.date_array)): + for tstep in range(len(self.date_array)): small_cities['month'] = small_cities['date'].dt.month small_cities['weekday'] = small_cities['date'].dt.dayofweek small_cities['hour'] = small_cities['date'].dt.hour @@ -446,13 +539,13 @@ class TrafficAreaSector(Sector): spent_time = timeit.default_timer() if self.do_evaporative and self.do_small_cities: - dataset = pd.concat([self.evaporative, self.small_cities]) + dataset = pd.concat([self.evaporative, self.small_cities], sort=False) elif self.do_evaporative: dataset = self.evaporative elif self.do_small_cities: dataset = self.small_cities else: - raise ValueError('No traffic area emission selected. do_evaporative and do_small_cities are False') + error_exit('No traffic area emission selected. do_evaporative and do_small_cities are False') dataset['layer'] = 0 dataset = dataset.groupby(['FID', 'layer', 'tstep']).sum() @@ -462,10 +555,13 @@ class TrafficAreaSector(Sector): def calculate_emissions(self): spent_time = timeit.default_timer() + self.logger.write_log('\tCalculating traffic area.', message_level=2) if self.do_evaporative: + self.logger.write_log('\tCalculating evaporative emissions.', message_level=2) self.calculate_evaporative_emissions() if self.do_small_cities: + self.logger.write_log('\tCalculating small cities emissions.', message_level=2) self.calculate_small_cities_emissions() emissions = self.to_grid() diff --git a/hermesv3_bu/sectors/traffic_sector.py b/hermesv3_bu/sectors/traffic_sector.py index 2df4f77c2a3908fd08bb51c76906d9079f0df87f..6ff9e65f3cffa61b178a09600fa5ba868a0990ba 100755 --- a/hermesv3_bu/sectors/traffic_sector.py +++ b/hermesv3_bu/sectors/traffic_sector.py @@ -4,13 +4,16 @@ import os import timeit import pandas as pd +from pandas import DataFrame import geopandas as gpd +from geopandas import GeoDataFrame import numpy as np from datetime import timedelta import warnings from hermesv3_bu.logger.log import Log from hermesv3_bu.sectors.sector import Sector from hermesv3_bu.io_server.io_netcdf import IoNetcdf +from hermesv3_bu.tools.checker import check_files, error_exit from ctypes import cdll, CDLL cdll.LoadLibrary("libc.so.6") @@ -19,6 +22,7 @@ libc.malloc_trim(0) MIN_RAIN = 0.254 # After USEPA (2011) RECOVERY_RATIO = 0.0872 # After Amato et al. (2012) +FINAL_PROJ = {'init': 'epsg:3035'} # https://epsg.io/3035 ETRS89 / LAEA Europe aerosols = ['oc', 'ec', 'pno3', 'pso4', 'pmfine', 'pmc', 'poa', 'poc', 'pec', 'pcl', 'pnh4', 'pna', 'pmg', 'pk', 'pca', @@ -41,7 +45,7 @@ class TrafficSector(Sector): relative to the timesteps. """ - def __init__(self, comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + def __init__(self, comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, road_link_path, fleet_compo_path, speed_hourly_path, monthly_profiles_path, weekly_profiles_path, hourly_mean_profiles_path, hourly_weekday_profiles_path, hourly_saturday_profiles_path, hourly_sunday_profiles_path, ef_common_path, vehicle_list=None, load=0.5, speciation_map_path=None, @@ -52,8 +56,47 @@ class TrafficSector(Sector): spent_time = timeit.default_timer() logger.write_log('===== TRAFFIC SECTOR =====') + if do_hot: + check_files( + [road_link_path, fleet_compo_path, speed_hourly_path, monthly_profiles_path, weekly_profiles_path, + hourly_mean_profiles_path, hourly_weekday_profiles_path, hourly_saturday_profiles_path, + hourly_sunday_profiles_path, speciation_map_path, molecular_weights_path, hot_cold_speciation] + + [os.path.join(ef_common_path, "hot_{0}.csv".format(pol)) for pol in source_pollutants]) + if do_cold: + check_files( + [road_link_path, fleet_compo_path, speed_hourly_path, monthly_profiles_path, weekly_profiles_path, + hourly_mean_profiles_path, hourly_weekday_profiles_path, hourly_saturday_profiles_path, + hourly_sunday_profiles_path, speciation_map_path, molecular_weights_path, hot_cold_speciation, + temp_common_path] + + [os.path.join(ef_common_path, "cold_{0}.csv".format(pol)) for pol in source_pollutants]) + if do_tyre_wear: + check_files( + [road_link_path, fleet_compo_path, speed_hourly_path, monthly_profiles_path, weekly_profiles_path, + hourly_mean_profiles_path, hourly_weekday_profiles_path, hourly_saturday_profiles_path, + hourly_sunday_profiles_path, speciation_map_path, molecular_weights_path, tyre_speciation] + + [os.path.join(ef_common_path, "tyre_{0}.csv".format(pol)) for pol in ['pm']]) + if do_road_wear: + check_files( + [road_link_path, fleet_compo_path, speed_hourly_path, monthly_profiles_path, weekly_profiles_path, + hourly_mean_profiles_path, hourly_weekday_profiles_path, hourly_saturday_profiles_path, + hourly_sunday_profiles_path, speciation_map_path, molecular_weights_path, road_speciation] + + [os.path.join(ef_common_path, "road_{0}.csv".format(pol)) for pol in ['pm']]) + if do_brake_wear: + check_files( + [road_link_path, fleet_compo_path, speed_hourly_path, monthly_profiles_path, weekly_profiles_path, + hourly_mean_profiles_path, hourly_weekday_profiles_path, hourly_saturday_profiles_path, + hourly_sunday_profiles_path, speciation_map_path, molecular_weights_path, brake_speciation] + + [os.path.join(ef_common_path, "brake_{0}.csv".format(pol)) for pol in ['pm']]) + if do_resuspension: + check_files( + [road_link_path, fleet_compo_path, speed_hourly_path, monthly_profiles_path, weekly_profiles_path, + hourly_mean_profiles_path, hourly_weekday_profiles_path, hourly_saturday_profiles_path, + hourly_sunday_profiles_path, speciation_map_path, molecular_weights_path, resuspension_speciation] + + [os.path.join(ef_common_path, "resuspension_{0}.csv".format(pol)) for pol in ['pm']]) + if resuspension_correction: + check_files(precipitation_path) super(TrafficSector, self).__init__( - comm, logger, auxiliary_dir, grid_shp, clip, date_array, source_pollutants, vertical_levels, + comm, logger, auxiliary_dir, grid, clip, date_array, source_pollutants, vertical_levels, monthly_profiles_path, weekly_profiles_path, None, speciation_map_path, None, molecular_weights_path) self.resuspension_correction = resuspension_correction @@ -69,13 +112,11 @@ class TrafficSector(Sector): self.crs = None # crs is the projection of the road links and it is set on the read_road_links function. self.write_rline = write_rline self.road_links = self.read_road_links(road_link_path) + self.load = load self.ef_common_path = ef_common_path self.temp_common_path = temp_common_path - # TODO use only date_array - self.timestep_num = len(self.date_array) - self.timestep_freq = 1 - self.starting_date = self.date_array[0] + self.add_local_date(self.date_array[0]) self.hot_cold_speciation = hot_cold_speciation @@ -87,14 +128,10 @@ class TrafficSector(Sector): self.fleet_compo = self.read_fleet_compo(fleet_compo_path, vehicle_list) self.speed_hourly = self.read_speed_hourly(speed_hourly_path) - self.hourly_profiles = pd.concat([ - pd.read_csv(hourly_mean_profiles_path), - pd.read_csv(hourly_weekday_profiles_path), - pd.read_csv(hourly_saturday_profiles_path), - pd.read_csv(hourly_sunday_profiles_path) - ]).reset_index() - - self.expanded = self.expand_road_links('hourly', len(self.date_array), 1) + self.hourly_profiles = self.read_all_hourly_profiles(hourly_mean_profiles_path, hourly_weekday_profiles_path, + hourly_saturday_profiles_path, hourly_sunday_profiles_path) + self.check_profiles() + self.expanded = self.expand_road_links() del self.fleet_compo, self.speed_hourly, self.monthly_profiles, self.weekly_profiles, self.hourly_profiles @@ -107,6 +144,66 @@ class TrafficSector(Sector): self.logger.write_time_log('TrafficSector', '__init__', timeit.default_timer() - spent_time) + def check_profiles(self): + spent_time = timeit.default_timer() + # Checking speed profiles IDs + links_speed = set(np.unique(np.concatenate([ + np.unique(self.road_links['sp_hour_su'].dropna().values), + np.unique(self.road_links['sp_hour_mo'].dropna().values), + np.unique(self.road_links['sp_hour_tu'].dropna().values), + np.unique(self.road_links['sp_hour_we'].dropna().values), + np.unique(self.road_links['sp_hour_th'].dropna().values), + np.unique(self.road_links['sp_hour_fr'].dropna().values), + np.unique(self.road_links['sp_hour_sa'].dropna().values), + ]))) + # The '0' speed profile means that we don't know the speed profile and it will be replaced by a flat profile + speed = set(np.unique(np.concatenate([self.speed_hourly.index.values, [0]]))) + + speed_res = links_speed - speed + if len(speed_res) > 0: + error_exit("The following speed profile IDs reported in the road links shapefile do not appear " + + "in the hourly speed profiles file. {0}".format(speed_res)) + + # Checking monthly profiles IDs + links_month = set(np.unique(self.road_links['aadt_m_mn'].dropna().values)) + month = set(self.monthly_profiles.index.values) + month_res = links_month - month + if len(month_res) > 0: + error_exit("The following monthly profile IDs reported in the road links shapefile do not appear " + + "in the monthly profiles file. {0}".format(month_res)) + + # Checking weekly profiles IDs + links_week = set(np.unique(self.road_links['aadt_week'].dropna().values)) + week = set(self.weekly_profiles.index.values) + week_res = links_week - week + if len(week_res) > 0: + error_exit("The following weekly profile IDs reported in the road links shapefile do not appear " + + "in the weekly profiles file. {0}".format(week_res)) + + # Checking hourly profiles IDs + links_hour = set(np.unique(np.concatenate([ + np.unique(self.road_links['aadt_h_mn'].dropna().values), + np.unique(self.road_links['aadt_h_wd'].dropna().values), + np.unique(self.road_links['aadt_h_sat'].dropna().values), + np.unique(self.road_links['aadt_h_sun'].dropna().values), + ]))) + hour = set(self.hourly_profiles.index.values) + hour_res = links_hour - hour + if len(hour_res) > 0: + error_exit("The following hourly profile IDs reported in the road links shapefile do not appear " + + "in the hourly profiles file. {0}".format(hour_res)) + + self.logger.write_time_log('TrafficSector', 'check_profiles', timeit.default_timer() - spent_time) + + def read_all_hourly_profiles(self, hourly_mean_profiles_path, hourly_weekday_profiles_path, + hourly_saturday_profiles_path, hourly_sunday_profiles_path): + hourly_profiles = pd.concat([self.read_hourly_profiles(hourly_mean_profiles_path), + self.read_hourly_profiles(hourly_weekday_profiles_path), + self.read_hourly_profiles(hourly_saturday_profiles_path), + self.read_hourly_profiles(hourly_sunday_profiles_path)]) + hourly_profiles.index = hourly_profiles.index.astype(str) + return hourly_profiles + def read_speciation_map(self, path): """ Read the speciation map. @@ -171,7 +268,8 @@ class TrafficSector(Sector): self.road_links['start_date'] = self.road_links.groupby('timezone')['utc'].apply( lambda x: pd.to_datetime(x).dt.tz_localize(pytz.utc).dt.tz_convert(x.name).dt.tz_localize(None)) - del self.road_links['utc'], self.road_links['timezone'] + self.road_links.drop(columns=['utc', 'timezone'], inplace=True) + libc.malloc_trim(0) self.logger.write_time_log('TrafficSector', 'add_local_date', timeit.default_timer() - spent_time) return True @@ -197,13 +295,14 @@ class TrafficSector(Sector): :type path: str: :return: ... - :rtype: Pandas.DataFrame + :rtype: DataFrame """ spent_time = timeit.default_timer() df = pd.read_csv(path, sep=',', dtype=np.float32) df['P_speed'] = df['P_speed'].astype(int) - # df.set_index('P_speed', inplace=True) + df.set_index('P_speed', inplace=True) + self.logger.write_time_log('TrafficSector', 'read_speed_hourly', timeit.default_timer() - spent_time) return df @@ -224,11 +323,11 @@ class TrafficSector(Sector): min_num = nprocs - max_num index_list = [] prev = 0 - for i in xrange(max_num): + for i in range(max_num): prev += max_len index_list.append(prev) if min_num > 0: - for i in xrange(min_num - 1): + for i in range(min_num - 1): prev += min_len index_list.append(prev) @@ -245,32 +344,43 @@ class TrafficSector(Sector): if self.comm.Get_rank() == 0: df = gpd.read_file(path) - + try: + df.drop(columns=['Adminis', 'CCAA', 'NETWORK_ID', 'Province', 'Road_name', 'aadt_m_sat', 'aadt_m_sun', + 'aadt_m_wd', 'Source'], inplace=True) + except KeyError as e: + error_exit(str(e).replace('axis', 'the road links shapefile')) + libc.malloc_trim(0) + # df.to_file('~/temp/road_links.shp') df = gpd.sjoin(df, self.clip.shapefile.to_crs(df.crs), how="inner", op='intersects') + # df.to_file('~/temp/road_links_selected.shp') + df.drop(columns=['index_right'], inplace=True) + libc.malloc_trim(0) # Filtering road links to CONSiderate. df['CONS'] = df['CONS'].astype(np.int16) df = df[df['CONS'] != 0] df = df[df['aadt'] > 0] - # TODO Manu update shapefile replacing NULL values on 'aadt_m-mn' column + try: + df.drop(columns=['CONS'], inplace=True) + except KeyError as e: + error_exit(str(e).replace('axis', 'the road links shapefile')) + df = df.loc[df['aadt_m_mn'] != 'NULL', :] + libc.malloc_trim(0) # Adding identificator of road link - df['Link_ID'] = xrange(len(df)) - - del df['Adminis'], df['CCAA'], df['CONS'], df['NETWORK_ID'] - del df['Province'], df['Road_name'] - - # Deleting unused columns - del df['aadt_m_sat'], df['aadt_m_sun'], df['aadt_m_wd'], df['Source'] + df['Link_ID'] = range(len(df)) + df.set_index('Link_ID', inplace=True) libc.malloc_trim(0) + chunks = chunk_road_links(df, self.comm.Get_size()) else: chunks = None self.comm.Barrier() df = self.comm.scatter(chunks, root=0) + del chunks libc.malloc_trim(0) df = df.to_crs({'init': 'epsg:4326'}) @@ -290,19 +400,18 @@ class TrafficSector(Sector): df.loc[df['Road_type'] == '2', 'Road_type'] = 'Urban Off Peak' df.loc[df['Road_type'] == '3', 'Road_type'] = 'Urban Peak' - # TODO Read with units types - df['road_grad'] = df['road_grad'].astype(float) + df['road_grad'] = df['road_grad'].astype(np.float32) # Check if percents are ok if len(df[df['PcLight'] < 0]) is not 0: - print 'ERROR: PcLight < 0' - exit(1) + error_exit('PcLight < 0') if self.write_rline: self.write_rline_roadlinks(df) self.logger.write_time_log('TrafficSector', 'read_road_links', timeit.default_timer() - spent_time) libc.malloc_trim(0) + return df def read_ef(self, emission_type, pollutant_name): @@ -327,7 +436,10 @@ class TrafficSector(Sector): # Pollutants different to NH3 if pollutant_name != 'nh3': - del df['Copert_V_name'] + try: + df.drop(columns=['Copert_V_name'], inplace=True) + except KeyError as e: + error_exit(str(e).replace('axis', 'the {0} file'.format(ef_path))) # For hot emission factors if emission_type == 'hot': @@ -335,8 +447,10 @@ class TrafficSector(Sector): df.loc[df['Technology'].isnull(), 'Technology'] = '' df = df[df['Technology'] != 'EGR'] - - del df['Technology'], df['Load'] + try: + df.drop(columns=['Technology', 'Load'], inplace=True) + except KeyError as e: + error_exit(str(e).replace('axis', 'the {0} file'.format(ef_path))) # Split the EF file into small DataFrames divided by column Road.Slope and Mode restrictions. df_code_slope_road = df[df['Road.Slope'].notnull() & df['Mode'].notnull()] @@ -347,7 +461,7 @@ class TrafficSector(Sector): # Checks that the splited DataFrames contain the full DataFrame if (len(df_code_slope_road) + len(df_code_slope) + len(df_code_road) + len(df_code)) != len(df): # TODO check that error - print 'ERROR in blablavbla' + error_exit('ERROR in blablavbla') return df_code_slope_road, df_code_slope, df_code_road, df_code elif emission_type == 'cold' or emission_type == 'tyre' or emission_type == 'road' or \ @@ -355,7 +469,10 @@ class TrafficSector(Sector): return df # NH3 pollutant else: - del df['Copert_V_name'] + try: + df.drop(columns=['Copert_V_name'], inplace=True) + except KeyError as e: + error_exit(str(e).replace('axis', 'the {0} file'.format(ef_path))) # Specific case for cold NH3 emission factors that needs the hot emission factors and the cold ones. if emission_type == 'cold': df_hot = self.read_ef('hot', pollutant_name) @@ -363,8 +480,10 @@ class TrafficSector(Sector): df = df.merge(df_hot, left_on=['Code', 'Mode'], right_on=['Code_hot', 'Mode_hot'], how='left') - - del df['Cmileage_hot'], df['Mode_hot'], df['Code_hot'] + try: + df.drop(columns=['Cmileage_hot', 'Mode_hot', 'Code_hot'], inplace=True) + except KeyError as e: + error_exit(str(e).replace('axis', 'the {0} file'.format(ef_path))) return df @@ -404,7 +523,7 @@ class TrafficSector(Sector): prlr = prlr <= MIN_RAIN dst = np.empty(prlr.shape) last = np.zeros((prlr.shape[-1])) - for time in xrange(prlr.shape[0]): + for time in range(prlr.shape[0]): dst[time, :] = (last + prlr[time, :]) * prlr[time, :] last = dst[time, :] @@ -413,7 +532,7 @@ class TrafficSector(Sector): # It is assumed that after 48 h without rain the potential emission is equal to one dst[dst >= (1 - np.exp(- RECOVERY_RATIO * 48))] = 1. # Creates the GeoDataFrame - df = gpd.GeoDataFrame(dst.T, geometry=precipitation.geometry) + df = gpd.GeoDataFrame(dst.T, geometry=precipitation['geometry'].values) df.columns = ['PR_{0}'.format(x) for x in df.columns.values[:-1]] + ['geometry'] df.loc[:, 'REC'] = df.index @@ -425,14 +544,20 @@ class TrafficSector(Sector): def update_fleet_value(self, df): spent_time = timeit.default_timer() - # Calculating fleet value by fleet class - df.loc[:, 'Fleet_value'] = df['Fleet_value'] * df['aadt'] - - df.loc[df['Fleet_Class'] == 'light_veh', 'Fleet_value'] = df['PcLight'] * df['Fleet_value'] - df.loc[df['Fleet_Class'] == 'heavy_veh', 'Fleet_value'] = df['PcHeavy'] * df['Fleet_value'] - df.loc[df['Fleet_Class'] == 'motos', 'Fleet_value'] = df['PcMoto'] * df['Fleet_value'] - df.loc[df['Fleet_Class'] == 'mopeds', 'Fleet_value'] = df['PcMoped'] * df['Fleet_value'] + def update_by_class(x): + if x.name == 'light_veh': + x['value'] = x['PcLight'].mul(x['Fleet_value'] * x['aadt'], axis='index') + elif x.name == 'heavy_veh': + x['value'] = x['PcHeavy'].mul(x['Fleet_value'] * x['aadt'], axis='index') + elif x.name == 'motos': + x['value'] = x['PcMoto'].mul(x['Fleet_value'] * x['aadt'], axis='index') + elif x.name == 'mopeds': + x['value'] = x['PcMoped'].mul(x['Fleet_value'] * x['aadt'], axis='index') + else: + x['value'] = np.nan + return x[['value']] + df['Fleet_value'] = df.groupby('Fleet_Class').apply(update_by_class) for link_id, aux_df in df.groupby('Link_ID'): aadt = round(aux_df['aadt'].min(), 1) fleet_value = round(aux_df['Fleet_value'].sum(), 1) @@ -444,143 +569,139 @@ class TrafficSector(Sector): df = df[df['Fleet_value'] > 0] # Deleting unused columns - del df['aadt'], df['PcLight'], df['PcHeavy'], df['PcMoto'], df['PcMoped'], df['Fleet_Class'] + try: + df.drop(columns=['aadt', 'PcLight', 'PcHeavy', 'PcMoto', 'PcMoped', 'Fleet_Class'], inplace=True) + except KeyError as e: + error_exit(str(e).replace('axis', 'the road links shapefile')) + libc.malloc_trim(0) + self.logger.write_time_log('TrafficSector', 'update_fleet_value', timeit.default_timer() - spent_time) return df - def calculate_timedelta(self, timestep_type, num_tstep, timestep_freq): - from datetime import timedelta + def calculate_time_dependent_values(self, df): spent_time = timeit.default_timer() - delta = timedelta(hours=timestep_freq * num_tstep) + def get_weekday_speed_profile(x): + # Spead mean + if x.name <= 4: + x['speed_mean'] = df['sp_wd'] + else: + x['speed_mean'] = df['sp_we'] + + # Profile_ID + if x.name == 0: + x['P_speed'] = x['sp_hour_mo'] + elif x.name == 1: + x['P_speed'] = x['sp_hour_tu'] + elif x.name == 2: + x['P_speed'] = x['sp_hour_we'] + elif x.name == 3: + x['P_speed'] = x['sp_hour_th'] + elif x.name == 4: + x['P_speed'] = x['sp_hour_fr'] + elif x.name == 5: + x['P_speed'] = x['sp_hour_sa'] + elif x.name == 6: + x['P_speed'] = x['sp_hour_su'] + else: + x['P_speed'] = 1 # Flat profile - self.logger.write_time_log('TrafficSector', 'calculate_timedelta', timeit.default_timer() - spent_time) - return pd.Timedelta(delta) + # Flat profile + x['P_speed'].replace([0, np.nan], 1, inplace=True) + x['P_speed'] = x['P_speed'].astype(int) - def calculate_hourly_speed(self, df): - spent_time = timeit.default_timer() + return x[['speed_mean', 'P_speed']] - df = df.merge(self.speed_hourly, left_on='profile_id', right_on='P_speed', how='left') - df['speed'] = df.groupby('hour').apply(lambda x: x[[str(x.name)]]) + def get_velocity(x): + speed = self.speed_hourly.loc[np.unique(x['P_speed'].values), :] - self.logger.write_time_log('TrafficSector', 'calculate_hourly_speed', timeit.default_timer() - spent_time) - return df['speed'] * df['speed_mean'] + x = pd.merge(x, speed, left_on='P_speed', right_index=True, how='left') + x['speed'] = x.groupby('hour').apply(lambda y: x[[str(y.name)]]) - def calculate_temporal_factor(self, df): - spent_time = timeit.default_timer() + return x['speed'] * x['speed_mean'] - def get_hourly_id_from_weekday(weekday): - if weekday <= 4: - return 'aadt_h_wd' - elif weekday == 5: - return 'aadt_h_sat' - elif weekday == 6: - return 'aadt_h_sun' - else: - print 'ERROR: Weekday not found' - exit() + def get_temporal_factor(x): + def get_hourly_id_from_weekday(weekday): + if weekday <= 4: + return 'aadt_h_wd' + elif weekday == 5: + return 'aadt_h_sat' + elif weekday == 6: + return 'aadt_h_sun' + else: + error_exit('Weekday not found') - # Monthly factor - df = df.merge(self.monthly_profiles.reset_index(), left_on='aadt_m_mn', right_on='P_month', how='left') - df['MF'] = df.groupby('month').apply(lambda x: x[[x.name]]) - df.drop(columns=range(1, 12 + 1), inplace=True) + # Monthly factor + x = pd.merge(x, self.monthly_profiles, left_on='aadt_m_mn', right_index=True, how='left') + x['MF'] = x.groupby('month').apply(lambda y: x[[y.name]]) + x.drop(columns=range(1, 12 + 1), inplace=True) - # Daily factor - df = df.merge(self.weekly_profiles.reset_index(), left_on='aadt_week', right_on='P_week', how='left') + # Daily factor + x = pd.merge(x, self.weekly_profiles, left_on='aadt_week', right_index=True, how='left') + x['WF'] = x.groupby('weekday').apply(lambda y: x[[y.name]]) + x.drop(columns=range(0, 7), inplace=True) - df['WF'] = df.groupby('week_day').apply(lambda x: x[[x.name]]) - df.drop(columns=range(0, 7), inplace=True) + # Hourly factor + x.fillna(value=pd.np.nan, inplace=True) + x['hourly_profile'] = x.groupby('weekday').apply(lambda y: x[[get_hourly_id_from_weekday(y.name)]]) + x['hourly_profile'].fillna(x['aadt_h_mn'], inplace=True) - # Hourly factor - df['hourly_profile'] = df.groupby('week_day').apply(lambda x: x[[get_hourly_id_from_weekday(x.name)]]) - df.loc[df['hourly_profile'] == '', 'hourly_profile'] = df['aadt_h_mn'] + x = pd.merge(x, self.hourly_profiles, left_on='hourly_profile', right_index=True, how='left') + x['HF'] = x.groupby('hour').apply(lambda y: x[[y.name]]) + x.drop(columns=range(0, 24), inplace=True) + x['factor'] = x['MF'] * x['WF'] * x['HF'] - df['hourly_profile'] = df['hourly_profile'].astype(str) - self.hourly_profiles['P_hour'] = self.hourly_profiles['P_hour'].astype(str) + return x[['factor']] - df = df.merge(self.hourly_profiles, left_on='hourly_profile', right_on='P_hour', how='left') - df['HF'] = df.groupby('hour').apply(lambda x: x[[str(x.name)]]) + for i_t, tstep in enumerate(self.date_array): + df['aux_date'] = df['start_date'] + (tstep - self.date_array[0]) + df['month'] = df['aux_date'].dt.month + df['weekday'] = df['aux_date'].dt.weekday + df['hour'] = df['aux_date'].dt.hour - self.logger.write_time_log('TrafficSector', 'calculate_temporal_factor', timeit.default_timer() - spent_time) - return df['MF'] * df['WF'] * df['HF'] + df[['speed_mean', 'P_speed']] = df.groupby('weekday').apply(get_weekday_speed_profile) - def calculate_time_dependent_values(self, df, timestep_type, timestep_num, timestep_freq): - spent_time = timeit.default_timer() + df['v_{0}'.format(i_t)] = get_velocity(df[['hour', 'speed_mean', 'P_speed']]) + df['f_{0}'.format(i_t)] = get_temporal_factor( + df[['month', 'weekday', 'hour', 'aadt_m_mn', 'aadt_week', 'aadt_h_mn', 'aadt_h_wd', 'aadt_h_sat', + 'aadt_h_sun']]) - df.reset_index(inplace=True) - for tstep in xrange(timestep_num): - # Finding weekday - # 0 -> Monday; 6 -> Sunday - df.loc[:, 'month'] = (df['start_date'] + self.calculate_timedelta( - timestep_type, tstep, timestep_freq)).dt.month - df.loc[:, 'week_day'] = (df['start_date'] + self.calculate_timedelta( - timestep_type, tstep, timestep_freq)).dt.weekday - df.loc[:, 'hour'] = (df['start_date'] + self.calculate_timedelta( - timestep_type, tstep, timestep_freq)).dt.hour - - # Selecting speed_mean - df.loc[df['week_day'] <= 4, 'speed_mean'] = df['sp_wd'] - df.loc[df['week_day'] > 4, 'speed_mean'] = df['sp_we'] - - # Selecting speed profile_id - df.loc[df['week_day'] == 0, 'profile_id'] = df['sp_hour_mo'] - df.loc[df['week_day'] == 1, 'profile_id'] = df['sp_hour_tu'] - df.loc[df['week_day'] == 2, 'profile_id'] = df['sp_hour_we'] - df.loc[df['week_day'] == 3, 'profile_id'] = df['sp_hour_th'] - df.loc[df['week_day'] == 4, 'profile_id'] = df['sp_hour_fr'] - df.loc[df['week_day'] == 5, 'profile_id'] = df['sp_hour_sa'] - df.loc[df['week_day'] == 6, 'profile_id'] = df['sp_hour_su'] - - df['profile_id'] = df['profile_id'].astype(int) - - # Selecting flat profile for 0 and nan's - df.loc[df['profile_id'] == 0, 'profile_id'] = 1 - df.loc[df['profile_id'] == np.nan, 'profile_id'] = 1 - - # Calculating speed by tstep - speed_column_name = 'v_{0}'.format(tstep) - df[speed_column_name] = self.calculate_hourly_speed(df.loc[:, ['hour', 'speed_mean', 'profile_id']]) - - factor_column_name = 'f_{0}'.format(tstep) - - df.loc[:, factor_column_name] = self.calculate_temporal_factor( - df.loc[:, ['month', 'week_day', 'hour', 'aadt_m_mn', 'aadt_week', 'aadt_h_mn', 'aadt_h_wd', - 'aadt_h_sat', 'aadt_h_sun']]) - - # Deleting time variables - - del df['month'], df['week_day'], df['hour'], df['profile_id'], df['speed_mean'] - del df['sp_wd'], df['sp_we'], df['index'] - del df['sp_hour_mo'], df['sp_hour_tu'], df['sp_hour_we'], df['sp_hour_th'], df['sp_hour_fr'] - del df['sp_hour_sa'], df['sp_hour_su'] - del df['aadt_m_mn'], df['aadt_h_mn'], df['aadt_h_wd'], df['aadt_h_sat'], df['aadt_h_sun'], df['aadt_week'] - del df['start_date'] + try: + df.drop(columns=['month', 'weekday', 'hour', 'P_speed', 'speed_mean', 'sp_wd', 'sp_we', 'sp_hour_mo', + 'sp_hour_tu', 'sp_hour_we', 'sp_hour_th', 'sp_hour_fr', 'sp_hour_sa', 'sp_hour_su', + 'aux_date', 'aadt_m_mn', 'aadt_h_mn', 'aadt_h_wd', 'aadt_h_sat', 'aadt_h_sun', 'aadt_week', + 'start_date'], inplace=True) + except KeyError as e: + error_exit(str(e).replace('axis', 'the road links shapefile')) + libc.malloc_trim(0) self.logger.write_time_log('TrafficSector', 'calculate_time_dependent_values', timeit.default_timer() - spent_time) - return df - def expand_road_links(self, timestep_type, timestep_num, timestep_freq): + def expand_road_links(self): spent_time = timeit.default_timer() # Expands each road link by any vehicle type that the selected road link has. df_list = [] - road_link_aux = self.road_links.copy() + road_link_aux = self.road_links.copy().reset_index() + + road_link_aux.drop(columns='geometry', inplace=True) + libc.malloc_trim(0) - del road_link_aux['geometry'] for zone, compo_df in road_link_aux.groupby('fleet_comp'): fleet = self.find_fleet(zone) df_aux = pd.merge(compo_df, fleet, how='left', on='fleet_comp') + df_aux.drop(columns='fleet_comp', inplace=True) df_list.append(df_aux) df = pd.concat(df_list, ignore_index=True) - libc.malloc_trim(0) - del df['fleet_comp'] + df.set_index(['Link_ID', 'Fleet_Code'], inplace=True) + libc.malloc_trim(0) df = self.update_fleet_value(df) - df = self.calculate_time_dependent_values(df, timestep_type, timestep_num, timestep_freq) + df = self.calculate_time_dependent_values(df) self.logger.write_time_log('TrafficSector', 'expand_road_links', timeit.default_timer() - spent_time) @@ -592,7 +713,7 @@ class TrafficSector(Sector): try: fleet = self.fleet_compo[['Code', 'Class', zone]] except KeyError as e: - raise KeyError(e.message + ' of the fleet_compo file') + error_exit(e.message + ' of the fleet_compo file') fleet.columns = ['Fleet_Code', 'Fleet_Class', 'Fleet_value'] fleet = fleet[fleet['Fleet_value'] > 0] @@ -612,6 +733,7 @@ class TrafficSector(Sector): if pollutant != 'nh3': ef_code_slope_road, ef_code_slope, ef_code_road, ef_code = self.read_ef('hot', pollutant) + df_code_slope_road = expanded_aux.merge( ef_code_slope_road, left_on=['Fleet_Code', 'road_grad', 'Road_type'], right_on=['Code', 'Road.Slope', 'Mode'], how='inner') @@ -631,11 +753,11 @@ class TrafficSector(Sector): expanded_aux = expanded_aux.merge(ef_code_road, left_on=['Fleet_Code', 'Road_type'], right_on=['Code', 'Mode'], how='inner') - del expanded_aux['Code'], expanded_aux['Mode'] + expanded_aux.drop(columns=['Code', 'Mode'], inplace=True) # Warnings and Errors - original_ef_profile = self.expanded['Fleet_Code'].unique() - calculated_ef_profiles = expanded_aux['Fleet_Code'].unique() + original_ef_profile = np.unique(self.expanded.index.get_level_values('Fleet_Code')) + calculated_ef_profiles = np.unique(expanded_aux['Fleet_Code']) resta_1 = [item for item in original_ef_profile if item not in calculated_ef_profiles] # Warining resta_2 = [item for item in calculated_ef_profiles if item not in original_ef_profile] # Error @@ -644,14 +766,14 @@ class TrafficSector(Sector): resta_1)) warnings.warn('Exists some fleet codes that not appear on the EF file: {0}'.format(resta_1), Warning) if len(resta_2) > 0: - raise ImportError('Exists some fleet codes duplicateds on the EF file: {0}'.format(resta_2)) + error_exit('Exists some fleet codes duplicated on the EF file: {0}'.format(resta_2)) m_corr = self.read_mcorr_file(pollutant) if m_corr is not None: expanded_aux = expanded_aux.merge(m_corr, left_on='Fleet_Code', right_on='Code', how='left') - del expanded_aux['Code'] + expanded_aux.drop(columns=['Code'], inplace=True) - for tstep in xrange(self.timestep_num): + for tstep in range(len(self.date_array)): ef_name = 'ef_{0}_{1}'.format(pollutant, tstep) p_column = '{0}_{1}'.format(pollutant, tstep) if pollutant != 'nh3': @@ -662,14 +784,17 @@ class TrafficSector(Sector): expanded_aux['v_aux'] > expanded_aux['Max.Speed'], 'Max.Speed'] # EF - expanded_aux.loc[:, ef_name] = \ + expanded_aux[ef_name] = \ ((expanded_aux.Alpha * expanded_aux.v_aux**2 + expanded_aux.Beta * expanded_aux.v_aux + expanded_aux.Gamma + (expanded_aux.Delta / expanded_aux.v_aux)) / (expanded_aux.Epsilon * expanded_aux.v_aux**2 + expanded_aux.Zita * expanded_aux.v_aux + expanded_aux.Hta)) * (1 - expanded_aux.RF) * \ (expanded_aux.PF * expanded_aux['T'] / expanded_aux.Q) + + # COPERT V equation can give nan for CH4 + expanded_aux[ef_name].fillna(0, inplace=True) else: - expanded_aux.loc[:, ef_name] = \ + expanded_aux[ef_name] = \ ((expanded_aux['a'] * expanded_aux['Cmileage'] + expanded_aux['b']) * (expanded_aux['EFbase'] * expanded_aux['TF'])) / 1000 @@ -692,62 +817,56 @@ class TrafficSector(Sector): expanded_aux.loc[:, p_column] = \ expanded_aux['Fleet_value'] * expanded_aux[ef_name] * expanded_aux['Mcorr'] * \ expanded_aux['f_{0}'.format(tstep)] - del expanded_aux[ef_name], expanded_aux['Mcorr'] + expanded_aux.drop(columns=[ef_name, 'Mcorr'], inplace=True) if pollutant != 'nh3': - del expanded_aux['v_aux'] - del expanded_aux['Min.Speed'], expanded_aux['Max.Speed'], expanded_aux['Alpha'], expanded_aux['Beta'] - del expanded_aux['Gamma'], expanded_aux['Delta'], expanded_aux['Epsilon'], expanded_aux['Zita'] - del expanded_aux['Hta'], expanded_aux['RF'], expanded_aux['Q'], expanded_aux['PF'], expanded_aux['T'] + expanded_aux.drop(columns=['v_aux', 'Min.Speed', 'Max.Speed', 'Alpha', 'Beta', 'Gamma', 'Delta', + 'Epsilon', 'Zita', 'Hta', 'RF', 'Q', 'PF', 'T'], inplace=True) else: - del expanded_aux['a'], expanded_aux['Cmileage'], expanded_aux['b'], expanded_aux['EFbase'] - del expanded_aux['TF'] + expanded_aux.drop(columns=['a', 'Cmileage', 'b', 'EFbase', 'TF'], inplace=True) if m_corr is not None: - del expanded_aux['A_urban'], expanded_aux['B_urban'], expanded_aux['A_road'], expanded_aux['B_road'] - del expanded_aux['M'] - - del expanded_aux['road_grad'] + expanded_aux.drop(columns=['A_urban', 'B_urban', 'A_road', 'B_road', 'M'], inplace=True) + expanded_aux.drop(columns=['road_grad'], inplace=True) + expanded_aux.drop(columns=['f_{0}'.format(x) for x in range(len(self.date_array))], inplace=True) - for tstep in xrange(self.timestep_num): - del expanded_aux['f_{0}'.format(tstep)] + libc.malloc_trim(0) self.logger.write_time_log('TrafficSector', 'calculate_hot', timeit.default_timer() - spent_time) + return expanded_aux def calculate_cold(self, hot_expanded): spent_time = timeit.default_timer() - cold_links = self.road_links.copy() - - del cold_links['aadt'], cold_links['PcHeavy'], cold_links['PcMoto'], cold_links['PcMoped'], cold_links['sp_wd'] - del cold_links['sp_we'], cold_links['sp_hour_su'], cold_links['sp_hour_mo'], cold_links['sp_hour_tu'] - del cold_links['sp_hour_we'], cold_links['sp_hour_th'], cold_links['sp_hour_fr'], cold_links['sp_hour_sa'] - del cold_links['Road_type'], cold_links['aadt_m_mn'], cold_links['aadt_h_mn'], cold_links['aadt_h_wd'] - del cold_links['aadt_h_sat'], cold_links['aadt_h_sun'], cold_links['aadt_week'], cold_links['fleet_comp'] - del cold_links['road_grad'], cold_links['PcLight'], cold_links['start_date'] + cold_links = self.road_links.copy().reset_index() + cold_links.drop(columns=['aadt', 'PcHeavy', 'PcMoto', 'PcMoped', 'sp_wd', 'sp_we', 'sp_hour_su', 'sp_hour_mo', + 'sp_hour_tu', 'sp_hour_we', 'sp_hour_th', 'sp_hour_fr', 'sp_hour_sa', 'Road_type', + 'aadt_m_mn', 'aadt_h_mn', 'aadt_h_wd', 'aadt_h_sat', 'aadt_h_sun', 'aadt_week', + 'fleet_comp', 'road_grad', 'PcLight', 'start_date'], inplace=True) libc.malloc_trim(0) - cold_links.loc[:, 'centroid'] = cold_links['geometry'].centroid + cold_links['centroid'] = cold_links['geometry'].centroid link_lons = cold_links['geometry'].centroid.x link_lats = cold_links['geometry'].centroid.y temperature = IoNetcdf(self.comm).get_hourly_data_from_netcdf( link_lons.min(), link_lons.max(), link_lats.min(), link_lats.max(), self.temp_common_path, 'tas', self.date_array) - temperature.rename(columns={x: 't_{0}'.format(x) for x in xrange(len(self.date_array))}, inplace=True) + temperature.rename(columns={x: 't_{0}'.format(x) for x in range(len(self.date_array))}, inplace=True) # From Kelvin to Celsius degrees - temperature.loc[:, ['t_{0}'.format(x) for x in xrange(len(self.date_array))]] = \ - temperature.loc[:, ['t_{0}'.format(x) for x in xrange(len(self.date_array))]] - 273.15 + temperature[['t_{0}'.format(x) for x in range(len(self.date_array))]] = \ + temperature[['t_{0}'.format(x) for x in range(len(self.date_array))]] - 273.15 unary_union = temperature.unary_union cold_links['REC'] = cold_links.apply(self.nearest, geom_union=unary_union, df1=cold_links, df2=temperature, geom1_col='centroid', src_column='REC', axis=1) - del cold_links['geometry'], cold_links['centroid'], temperature['geometry'] + + cold_links.drop(columns=['geometry', 'centroid', 'geometry'], inplace=True) libc.malloc_trim(0) cold_links = cold_links.merge(temperature, left_on='REC', right_on='REC', how='left') - del cold_links['REC'] + cold_links.drop(columns=['REC'], inplace=True) libc.malloc_trim(0) c_expanded = hot_expanded.merge(cold_links, left_on='Link_ID', right_on='Link_ID', how='left') @@ -767,11 +886,10 @@ class TrafficSector(Sector): right_on=['Code', 'Mode'], how='inner') cold_exp_p_aux = c_expanded_p.copy() - del cold_exp_p_aux['index_right_x'], cold_exp_p_aux['Road_type'], cold_exp_p_aux['Fleet_value'] - del cold_exp_p_aux['Code'] + cold_exp_p_aux.drop(columns=['Road_type', 'Fleet_value', 'Code'], inplace=True) libc.malloc_trim(0) - for tstep in xrange(self.timestep_num): + for tstep in range(len(self.date_array)): v_column = 'v_{0}'.format(tstep) p_column = '{0}_{1}'.format(pollutant, tstep) t_column = 't_{0}'.format(tstep) @@ -782,16 +900,16 @@ class TrafficSector(Sector): cold_exp_p_aux = cold_exp_p_aux.loc[cold_exp_p_aux[v_column] < cold_exp_p_aux['Max.Speed'], :] # Beta - cold_exp_p_aux.loc[:, 'Beta'] = \ + cold_exp_p_aux['Beta'] = \ (0.6474 - (0.02545 * cold_exp_p_aux['ltrip']) - (0.00974 - (0.000385 * cold_exp_p_aux['ltrip'])) * cold_exp_p_aux[t_column]) * cold_exp_p_aux['bc'] if pollutant != 'nh3': - cold_exp_p_aux.loc[:, 'cold_hot'] = \ + cold_exp_p_aux['cold_hot'] = \ cold_exp_p_aux['A'] * cold_exp_p_aux[v_column] + cold_exp_p_aux['B'] * \ cold_exp_p_aux[t_column] + cold_exp_p_aux['C'] else: - cold_exp_p_aux.loc[:, 'cold_hot'] = \ + cold_exp_p_aux['cold_hot'] = \ ((cold_exp_p_aux['a'] * cold_exp_p_aux['Cmileage'] + cold_exp_p_aux['b']) * cold_exp_p_aux['EFbase'] * cold_exp_p_aux['TF']) / \ ((cold_exp_p_aux['a_hot'] * cold_exp_p_aux['Cmileage'] + cold_exp_p_aux['b_hot']) * @@ -799,9 +917,9 @@ class TrafficSector(Sector): cold_exp_p_aux.loc[cold_exp_p_aux['cold_hot'] < 1, 'cold_hot'] = 1 # Formula Cold emissions - cold_exp_p_aux.loc[:, p_column] = \ + cold_exp_p_aux[p_column] = \ cold_exp_p_aux[p_column] * cold_exp_p_aux['Beta'] * (cold_exp_p_aux['cold_hot'] - 1) - df_list.append((cold_exp_p_aux.loc[:, ['Link_ID', 'Fleet_Code', p_column]]).set_index( + df_list.append((cold_exp_p_aux[['Link_ID', 'Fleet_Code', p_column]]).set_index( ['Link_ID', 'Fleet_Code'])) try: @@ -817,18 +935,18 @@ class TrafficSector(Sector): uni.remove(o) except Exception: error_fleet_code.append(o) - raise IndexError('There are duplicated values for {0} codes in the cold EF files.'.format(error_fleet_code)) + error_exit('There are duplicated values for {0} codes in the cold EF files.'.format(error_fleet_code)) - for tstep in xrange(self.timestep_num): + for tstep in range(len(self.date_array)): if 'pm' in self.source_pollutants: - cold_df.loc[:, 'pm10_{0}'.format(tstep)] = cold_df['pm_{0}'.format(tstep)] - cold_df.loc[:, 'pm25_{0}'.format(tstep)] = cold_df['pm_{0}'.format(tstep)] - del cold_df['pm_{0}'.format(tstep)] + cold_df['pm10_{0}'.format(tstep)] = cold_df['pm_{0}'.format(tstep)] + cold_df['pm25_{0}'.format(tstep)] = cold_df['pm_{0}'.format(tstep)] + cold_df.drop(columns=['pm_{0}'.format(tstep)], inplace=True) libc.malloc_trim(0) if 'voc' in self.source_pollutants and 'ch4' in self.source_pollutants: - cold_df.loc[:, 'nmvoc_{0}'.format(tstep)] = \ + cold_df['nmvoc_{0}'.format(tstep)] = \ cold_df['voc_{0}'.format(tstep)] - cold_df['ch4_{0}'.format(tstep)] - del cold_df['voc_{0}'.format(tstep)] + cold_df.drop(columns=['voc_{0}'.format(tstep)], inplace=True) libc.malloc_trim(0) else: self.logger.write_log("WARNING! nmvoc emissions cannot be estimated because voc or ch4 are not " + @@ -844,20 +962,21 @@ class TrafficSector(Sector): def compact_hot_expanded(self, expanded): spent_time = timeit.default_timer() - columns_to_delete = ['Road_type', 'Fleet_value'] + ['v_{0}'.format(x) for x in xrange(self.timestep_num)] - for column_name in columns_to_delete: - del expanded[column_name] + columns_to_delete = ['Road_type', 'Fleet_value'] + ['v_{0}'.format(x) for x in range(len(self.date_array))] + expanded.drop(columns=columns_to_delete, inplace=True) - for tstep in xrange(self.timestep_num): + for tstep in range(len(self.date_array)): if 'pm' in self.source_pollutants: - expanded.loc[:, 'pm10_{0}'.format(tstep)] = expanded['pm_{0}'.format(tstep)] - expanded.loc[:, 'pm25_{0}'.format(tstep)] = expanded['pm_{0}'.format(tstep)] - del expanded['pm_{0}'.format(tstep)] + expanded['pm10_{0}'.format(tstep)] = expanded['pm_{0}'.format(tstep)] + expanded['pm25_{0}'.format(tstep)] = expanded['pm_{0}'.format(tstep)] + expanded.drop(columns=['pm_{0}'.format(tstep)], inplace=True) if 'voc' in self.source_pollutants and 'ch4' in self.source_pollutants: - expanded.loc[:, 'nmvoc_{0}'.format(tstep)] = expanded['voc_{0}'.format(tstep)] - \ - expanded['ch4_{0}'.format(tstep)] - del expanded['voc_{0}'.format(tstep)] + expanded['nmvoc_{0}'.format(tstep)] = expanded['voc_{0}'.format(tstep)] - \ + expanded['ch4_{0}'.format(tstep)] + # For certain vehicles (mostly diesel) and speeds, in urban road CH4 > than VOC according to COPERT V + expanded.loc[expanded['nmvoc_{0}'.format(tstep)] < 0, 'nmvoc_{0}'.format(tstep)] = 0 + expanded.drop(columns=['voc_{0}'.format(tstep)], inplace=True) else: self.logger.write_log("nmvoc emissions cannot be estimated because voc or ch4 are not selected in " + "the pollutant list.") @@ -875,9 +994,10 @@ class TrafficSector(Sector): pollutants = ['pm'] for pollutant in pollutants: ef_tyre = self.read_ef('tyre', pollutant) - df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') - del df['road_grad'], df['Road_type'], df['Code'] - for tstep in xrange(self.timestep_num): + df = pd.merge(self.expanded.reset_index(), ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') + df.drop(columns=['road_grad', 'Road_type', 'Code'], inplace=True) + + for tstep in range(len(self.date_array)): p_column = '{0}_{1}'.format(pollutant, tstep) f_column = 'f_{0}'.format(tstep) v_column = 'v_{0}'.format(tstep) @@ -888,16 +1008,15 @@ class TrafficSector(Sector): # from PM to PM10 & PM2.5 if pollutant == 'pm': - df.loc[:, 'pm10_{0}'.format(tstep)] = df[p_column] * 0.6 - df.loc[:, 'pm25_{0}'.format(tstep)] = df[p_column] * 0.42 - del df[p_column] + df['pm10_{0}'.format(tstep)] = df[p_column] * 0.6 + df['pm25_{0}'.format(tstep)] = df[p_column] * 0.42 + df.drop(columns=[p_column], inplace=True) # Cleaning df - columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in xrange( - self.timestep_num)] - columns_to_delete += ['Fleet_value', 'EFbase'] - for column in columns_to_delete: - del df[column] + columns_to_delete = ['f_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['v_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['Fleet_value', 'EFbase'] + df.drop(columns=columns_to_delete, inplace=True) df = self.speciate_traffic(df, self.tyre_speciation) self.logger.write_time_log('TrafficSector', 'calculate_tyre_wear', timeit.default_timer() - spent_time) @@ -909,9 +1028,9 @@ class TrafficSector(Sector): pollutants = ['pm'] for pollutant in pollutants: ef_tyre = self.read_ef('brake', pollutant) - df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') - del df['road_grad'], df['Road_type'], df['Code'] - for tstep in xrange(self.timestep_num): + df = pd.merge(self.expanded.reset_index(), ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') + df.drop(columns=['road_grad', 'Road_type', 'Code'], inplace=True) + for tstep in range(len(self.date_array)): p_column = '{0}_{1}'.format(pollutant, tstep) f_column = 'f_{0}'.format(tstep) v_column = 'v_{0}'.format(tstep) @@ -924,14 +1043,14 @@ class TrafficSector(Sector): if pollutant == 'pm': df.loc[:, 'pm10_{0}'.format(tstep)] = df[p_column] * 0.98 df.loc[:, 'pm25_{0}'.format(tstep)] = df[p_column] * 0.39 - del df[p_column] + df.drop(columns=[p_column], inplace=True) # Cleaning df - columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in xrange( - self.timestep_num)] - columns_to_delete += ['Fleet_value', 'EFbase'] - for column in columns_to_delete: - del df[column] + columns_to_delete = ['f_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['v_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['Fleet_value', 'EFbase'] + df.drop(columns=columns_to_delete, inplace=True) + libc.malloc_trim(0) df = self.speciate_traffic(df, self.brake_speciation) @@ -944,9 +1063,9 @@ class TrafficSector(Sector): pollutants = ['pm'] for pollutant in pollutants: ef_tyre = self.read_ef('road', pollutant) - df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') - del df['road_grad'], df['Road_type'], df['Code'] - for tstep in xrange(self.timestep_num): + df = pd.merge(self.expanded.reset_index(), ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') + df.drop(columns=['road_grad', 'Road_type', 'Code'], inplace=True) + for tstep in range(len(self.date_array)): p_column = '{0}_{1}'.format(pollutant, tstep) f_column = 'f_{0}'.format(tstep) df.loc[:, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] @@ -955,14 +1074,13 @@ class TrafficSector(Sector): if pollutant == 'pm': df.loc[:, 'pm10_{0}'.format(tstep)] = df[p_column] * 0.5 df.loc[:, 'pm25_{0}'.format(tstep)] = df[p_column] * 0.27 - del df[p_column] + df.drop(columns=[p_column], inplace=True) # Cleaning df - columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in xrange( - self.timestep_num)] - columns_to_delete += ['Fleet_value', 'EFbase'] - for column in columns_to_delete: - del df[column] + columns_to_delete = ['f_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['v_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['Fleet_value', 'EFbase'] + df.drop(columns=columns_to_delete, inplace=True) df = self.speciate_traffic(df, self.road_speciation) @@ -973,54 +1091,54 @@ class TrafficSector(Sector): spent_time = timeit.default_timer() if self.resuspension_correction: - road_link_aux = self.road_links.loc[:, ['Link_ID', 'geometry']].copy() + road_link_aux = self.road_links[['geometry']].copy().reset_index() - road_link_aux.loc[:, 'centroid'] = road_link_aux['geometry'].centroid + road_link_aux['centroid'] = road_link_aux['geometry'].centroid link_lons = road_link_aux['geometry'].centroid.x link_lats = road_link_aux['geometry'].centroid.y p_factor = self.calculate_precipitation_factor(link_lons.min(), link_lons.max(), link_lats.min(), link_lats.max(), self.precipitation_path) - unary_union = p_factor.unary_union + road_link_aux['REC'] = road_link_aux.apply(self.nearest, geom_union=unary_union, df1=road_link_aux, df2=p_factor, geom1_col='centroid', src_column='REC', axis=1) - del road_link_aux['centroid'], p_factor['geometry'] + road_link_aux.drop(columns=['centroid'], inplace=True) + p_factor.drop(columns=['geometry'], inplace=True) road_link_aux = road_link_aux.merge(p_factor, left_on='REC', right_on='REC', how='left') - del road_link_aux['REC'] + road_link_aux.drop(columns=['REC'], inplace=True) pollutants = ['pm'] for pollutant in pollutants: ef_tyre = self.read_ef('resuspension', pollutant) - df = self.expanded.merge(ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') + df = pd.merge(self.expanded.reset_index(), ef_tyre, left_on='Fleet_Code', right_on='Code', how='inner') if self.resuspension_correction: df = df.merge(road_link_aux, left_on='Link_ID', right_on='Link_ID', how='left') - del df['road_grad'], df['Road_type'], df['Code'] - for tstep in xrange(self.timestep_num): + df.drop(columns=['road_grad', 'Road_type', 'Code'], inplace=True) + for tstep in range(len(self.date_array)): p_column = '{0}_{1}'.format(pollutant, tstep) f_column = 'f_{0}'.format(tstep) if self.resuspension_correction: pr_column = 'PR_{0}'.format(tstep) - df.loc[:, p_column] = df['Fleet_value'] * df['EFbase'] * df[pr_column] * df[f_column] + df[p_column] = df['Fleet_value'] * df['EFbase'] * df[pr_column] * df[f_column] else: - df.loc[:, p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] + df[p_column] = df['Fleet_value'] * df['EFbase'] * df[f_column] # from PM to PM10 & PM2.5 if pollutant == 'pm': - df.loc[:, 'pm10_{0}'.format(tstep)] = df[p_column] + df['pm10_{0}'.format(tstep)] = df[p_column] # TODO Check fraction of pm2.5 - df.loc[:, 'pm25_{0}'.format(tstep)] = df[p_column] * 0.5 - del df[p_column] + df['pm25_{0}'.format(tstep)] = df[p_column] * 0.5 + df.drop(columns=[p_column], inplace=True) # Cleaning df - columns_to_delete = ['f_{0}'.format(x) for x in xrange(self.timestep_num)] + ['v_{0}'.format(x) for x in - xrange(self.timestep_num)] - columns_to_delete += ['Fleet_value', 'EFbase'] - for column in columns_to_delete: - del df[column] + columns_to_delete = ['f_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['v_{0}'.format(x) for x in range(len(self.date_array))] + \ + ['Fleet_value', 'EFbase'] + df.drop(columns=columns_to_delete, inplace=True) df = self.speciate_traffic(df, self.resuspension_speciation) @@ -1031,7 +1149,7 @@ class TrafficSector(Sector): spent_time = timeit.default_timer() df_list = [] - for tstep in xrange(self.timestep_num): + for tstep in range(len(self.date_array)): pollutants_to_rename = [p for p in list(df.columns.values) if p.endswith('_{0}'.format(tstep))] pollutants_renamed = [] for p_name in pollutants_to_rename: @@ -1039,7 +1157,7 @@ class TrafficSector(Sector): df.rename(columns={p_name: p_name_new}, inplace=True) pollutants_renamed.append(p_name_new) - df_aux = pd.DataFrame(df.loc[:, ['Link_ID', 'Fleet_Code'] + pollutants_renamed]) + df_aux = df[['Link_ID', 'Fleet_Code'] + pollutants_renamed].copy() df_aux['tstep'] = tstep df_list.append(df_aux) @@ -1055,8 +1173,7 @@ class TrafficSector(Sector): # Reads speciation profile speciation = self.read_profiles(speciation) - del speciation['Copert_V_name'] - + speciation.drop(columns=['Copert_V_name'], inplace=True) # Transform dataset into timestep rows instead of timestep columns df = self.transform_df(df) @@ -1071,28 +1188,28 @@ class TrafficSector(Sector): # PMC if not set(speciation.columns.values).isdisjoint(pmc_list): out_p = set(speciation.columns.values).intersection(pmc_list).pop() - speciation_by_in_p = speciation.loc[:, [out_p] + ['Code']] + speciation_by_in_p = speciation[[out_p] + ['Code']].copy() speciation_by_in_p.rename(columns={out_p: 'f_{0}'.format(out_p)}, inplace=True) - df_aux = df.loc[:, ['pm10', 'pm25', 'Fleet_Code', 'tstep', 'Link_ID']] + df_aux = df[['pm10', 'pm25', 'Fleet_Code', 'tstep', 'Link_ID']] df_aux = df_aux.merge(speciation_by_in_p, left_on='Fleet_Code', right_on='Code', how='left') df_aux.drop(columns=['Code'], inplace=True) - df_aux.loc[:, out_p] = df_aux['pm10'] - df_aux['pm25'] + df_aux[out_p] = df_aux['pm10'] - df_aux['pm25'] + + df_out_list.append(df_aux[[out_p] + ['tstep', 'Link_ID']].groupby(['tstep', 'Link_ID']).sum()) - df_out_list.append(df_aux.loc[:, [out_p] + ['tstep', 'Link_ID']].groupby(['tstep', 'Link_ID']).sum()) - del df_aux[out_p] for in_p in in_list: - involved_out_pollutants = [key for key, value in self.speciation_map.iteritems() if value == in_p] + involved_out_pollutants = [key for key, value in self.speciation_map.items() if value == in_p] # Selecting only necessary speciation profiles - speciation_by_in_p = speciation.loc[:, involved_out_pollutants + ['Code']] + speciation_by_in_p = speciation[involved_out_pollutants + ['Code']].copy() # Adding "f_" in the formula column names for p in involved_out_pollutants: speciation_by_in_p.rename(columns={p: 'f_{0}'.format(p)}, inplace=True) # Getting a slice of the full dataset to be merged - df_aux = df.loc[:, [in_p] + ['Fleet_Code', 'tstep', 'Link_ID']] + df_aux = df[[in_p] + ['Fleet_Code', 'tstep', 'Link_ID']] df_aux = df_aux.merge(speciation_by_in_p, left_on='Fleet_Code', right_on='Code', how='left') df_aux.drop(columns=['Code'], inplace=True) @@ -1101,24 +1218,23 @@ class TrafficSector(Sector): for p in involved_out_pollutants: if in_p is not np.nan: if in_p != 0: - df_aux.loc[:, p] = df_aux['old_{0}'.format(in_p)].multiply(df_aux['f_{0}'.format(p)]) + df_aux[p] = df_aux['old_{0}'.format(in_p)].multiply(df_aux['f_{0}'.format(p)]) try: if in_p == 'nmvoc': mol_w = 1.0 else: mol_w = self.molecular_weights[in_p] except KeyError: - raise AttributeError('{0} not found in the molecular weights file.'.format(in_p)) + error_exit('{0} not found in the molecular weights file.'.format(in_p)) # from g/km.h to mol/km.h or g/km.h (aerosols) - df_aux.loc[:, p] = df_aux.loc[:, p] / mol_w + df_aux[p] = df_aux.loc[:, p] / mol_w else: df_aux.loc[:, p] = 0 - df_out_list.append(df_aux.loc[:, [p] + ['tstep', 'Link_ID']].groupby(['tstep', 'Link_ID']).sum()) - del df_aux[p] + df_out_list.append(df_aux[[p] + ['tstep', 'Link_ID']].groupby(['tstep', 'Link_ID']).sum()) del df_aux - del df[in_p] + df.drop(columns=[in_p], inplace=True) df_out = pd.concat(df_out_list, axis=1) @@ -1161,22 +1277,24 @@ class TrafficSector(Sector): if self.do_tyre_wear: self.logger.write_log('\t\tCalculating Tyre wear emissions.', message_level=2) - df_accum = pd.concat([df_accum, self.calculate_tyre_wear()]).groupby(['tstep', 'Link_ID']).sum() + df_accum = pd.concat([df_accum, self.calculate_tyre_wear()], sort=False).groupby(['tstep', 'Link_ID']).sum() libc.malloc_trim(0) if self.do_brake_wear: self.logger.write_log('\t\tCalculating Brake wear emissions.', message_level=2) - df_accum = pd.concat([df_accum, self.calculate_brake_wear()]).groupby(['tstep', 'Link_ID']).sum() + df_accum = pd.concat([df_accum, self.calculate_brake_wear()], sort=False).groupby( + ['tstep', 'Link_ID']).sum() libc.malloc_trim(0) if self.do_road_wear: self.logger.write_log('\t\tCalculating Road wear emissions.', message_level=2) - df_accum = pd.concat([df_accum, self.calculate_road_wear()]).groupby(['tstep', 'Link_ID']).sum() + df_accum = pd.concat([df_accum, self.calculate_road_wear()], sort=False).groupby(['tstep', 'Link_ID']).sum() libc.malloc_trim(0) if self.do_resuspension: self.logger.write_log('\t\tCalculating Resuspension emissions.', message_level=2) - df_accum = pd.concat([df_accum, self.calculate_resuspension()]).groupby(['tstep', 'Link_ID']).sum() + df_accum = pd.concat([df_accum, self.calculate_resuspension()], sort=False).groupby( + ['tstep', 'Link_ID']).sum() libc.malloc_trim(0) - df_accum = df_accum.reset_index().merge(self.road_links.loc[:, ['Link_ID', 'geometry']], left_on='Link_ID', - right_on='Link_ID', how='left') + df_accum = df_accum.reset_index().merge(self.road_links.reset_index().loc[:, ['Link_ID', 'geometry']], + on='Link_ID', how='left') df_accum = gpd.GeoDataFrame(df_accum, crs=self.crs) libc.malloc_trim(0) df_accum.set_index(['Link_ID', 'tstep'], inplace=True) @@ -1198,14 +1316,20 @@ class TrafficSector(Sector): if not os.path.exists(self.link_to_grid_csv): link_emissions_aux = link_emissions.loc[link_emissions['tstep'] == 0, :] - link_emissions_aux = link_emissions_aux.to_crs(self.grid_shp.crs) + if self.grid.grid_type in ['Lambert Conformal Conic', 'Mercator']: + grid_aux = self.grid.shapefile + else: + # For REGULAR and ROTATED grids, shapefile projection is transformed to a metric projected coordinate + # system to derive the length in km. + grid_aux = self.grid.shapefile.to_crs(FINAL_PROJ) + + link_emissions_aux = link_emissions_aux.to_crs(grid_aux.crs) - link_emissions_aux = gpd.sjoin(link_emissions_aux, self.grid_shp.reset_index(), - how="inner", op='intersects') + link_emissions_aux = gpd.sjoin(link_emissions_aux, grid_aux.reset_index(), how="inner", op='intersects') link_emissions_aux = link_emissions_aux.loc[:, ['Link_ID', 'geometry', 'FID']] - link_emissions_aux = link_emissions_aux.merge(self.grid_shp.reset_index().loc[:, ['FID', 'geometry']], + link_emissions_aux = link_emissions_aux.merge(grid_aux.reset_index().loc[:, ['FID', 'geometry']], on='FID', how='left') length_list = [] @@ -1237,7 +1361,7 @@ class TrafficSector(Sector): link_grid = pd.read_csv(self.link_to_grid_csv) link_grid = link_grid[link_grid['Link_ID'].isin(link_emissions['Link_ID'].values)] - del link_emissions['geometry'] + link_emissions.drop(columns=['geometry'], inplace=True) link_grid = link_grid.merge(link_emissions, left_on='Link_ID', right_on='Link_ID') if 'Unnamed: 0' in link_grid.columns.values: link_grid.drop(columns=['Unnamed: 0'], inplace=True) @@ -1248,8 +1372,7 @@ class TrafficSector(Sector): cols_to_update.remove('FID') for col in cols_to_update: link_grid.loc[:, col] = link_grid[col] * link_grid['length'] - del link_grid['length'] - link_grid.drop(columns=['Link_ID'], inplace=True) + link_grid.drop(columns=['length', 'Link_ID'], inplace=True) link_grid['layer'] = 0 link_grid = link_grid.groupby(['FID', 'layer', 'tstep']).sum() @@ -1311,7 +1434,6 @@ class TrafficSector(Sector): df_in = df_in.to_crs({u'units': u'm', u'no_defs': True, u'ellps': u'intl', u'proj': u'utm', u'zone': 31}) if rline_shp: - gpd.GeoDataFrame().to_file df_in.to_file(os.path.join(self.output_dir, 'roads.shp')) count = 0 diff --git a/hermesv3_bu/tools/__init__.py b/hermesv3_bu/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hermesv3_bu/tools/checker.py b/hermesv3_bu/tools/checker.py new file mode 100644 index 0000000000000000000000000000000000000000..69d124a7f5af62044977cb7cc8393bb1e9b654e1 --- /dev/null +++ b/hermesv3_bu/tools/checker.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +import os +import sys +import time +from mpi4py import MPI +from warnings import warn + + +def check_files(file_path_list, warning=False): + if isinstance(file_path_list, str): + file_path_list = [file_path_list] + + files_not_found = [] + for file_path in file_path_list: + if not os.path.exists(file_path): + files_not_found.append(file_path) + if len(files_not_found) > 0: + error_message = "*ERROR* (Rank {0}) File/s not found:".format(MPI.COMM_WORLD.Get_rank()) + for file_path in files_not_found: + error_message += "\n\t{0}".format(file_path) + if warning: + print(error_message.replace('ERROR', 'WARNING')) + warn(error_message.replace('ERROR', 'WARNING')) + return False + else: + error_exit(error_message) + return True + + +def error_exit(error_message): + if not error_message[:7] == "*ERROR*": + error_message = "*ERROR* (Rank {0}) ".format(MPI.COMM_WORLD.Get_rank()) + error_message + print(error_message) + print(error_message, file=sys.stderr) + sys.stderr.flush() + time.sleep(5) + MPI.COMM_WORLD.Abort(1) diff --git a/hermesv3_bu/tools/download_benchmark.py b/hermesv3_bu/tools/download_benchmark.py new file mode 100755 index 0000000000000000000000000000000000000000..2c1c25cdf8bf62b4d7375cf3df942ac8f4768945 --- /dev/null +++ b/hermesv3_bu/tools/download_benchmark.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python + +import sys +import os + + +def query_yes_no(question, default="yes"): + valid = {"yes": True, "Yes": True, "YES": True, "Y": True, "y": True, "1": True, 1: True, + "no": False, "No": False, "NO": False, "N": False, "n": False, "0": False, 0: False} + if default is None: + prompt = " [y/n] " + elif default == "yes": + prompt = " [Y/n] " + elif default == "no": + prompt = " [y/N] " + else: + raise ValueError("invalid default answer: '%s'" % default) + + while True: + sys.stdout.write(question + prompt) + choice = input().lower() + if default is not None and choice == '': + return valid[default] + elif choice in valid: + return valid[choice] + else: + sys.stdout.write("Please respond with 'yes' or 'no' (or 'y' or 'n').\n") + + +def check_args(args, exe_str): + if len(args) == 0: + print("Missing destination path after '{0}'. e.g.:".format(exe_str) + + "\n\t{0} /home/user/HERMESv3_BU".format(exe_str)) + sys.exit(1) + elif len(args) > 1: + print("Too much arguments through '{0}'. Only destination path is needed e.g.:".format(exe_str) + + "\n\t{0} /home/user/HERMESv3_BU".format(exe_str)) + sys.exit(1) + else: + dir_path = args[0] + + if not os.path.exists(dir_path): + if query_yes_no("'{0}' does not exist. Do you want to create it? ".format(dir_path)): + os.makedirs(dir_path) + else: + sys.exit(0) + + return dir_path + + +def download_files(parent_path): + from ftplib import FTP + + ftp = FTP('bscesftp.bsc.es') + ftp.login() + dst_file = os.path.join(parent_path, 'HERMESv3_BU_Benchmark.zip') + + ftp.retrbinary('RETR HERMESv3_BU_Benchmark.zip', open(dst_file, 'wb').write) + + ftp.quit() + + return dst_file + + +def unzip_files(zippath, parent_path): + import zipfile + + zip_file = zipfile.ZipFile(zippath, 'r') + zip_file.extractall(parent_path) + zip_file.close() + os.remove(zippath) + + +def download_benchmark(): + argv = sys.argv[1:] + + parent_dir = check_args(argv, 'hermesv3_bu_download_benchmark') + + zippath = download_files(parent_dir) + unzip_files(zippath, parent_dir) + + +if __name__ == '__main__': + download_benchmark() diff --git a/hermesv3_bu/writer/cmaq_writer.py b/hermesv3_bu/writer/cmaq_writer.py index b74c9d25bb80351a1718a9d8562defc0ac2166c2..53c5bec0fc38a13b14b80df1e1c1bd551629e6d7 100755 --- a/hermesv3_bu/writer/cmaq_writer.py +++ b/hermesv3_bu/writer/cmaq_writer.py @@ -9,6 +9,7 @@ from hermesv3_bu.writer.writer import Writer from mpi4py import MPI import timeit from hermesv3_bu.logger.log import Log +from hermesv3_bu.tools.checker import error_exit class CmaqWriter(Writer): @@ -67,8 +68,8 @@ class CmaqWriter(Writer): super(CmaqWriter, self).__init__(comm_world, comm_write, logger, netcdf_path, grid, date_array, pollutant_info, rank_distribution, emission_summary) if self.grid.grid_type not in ['Lambert Conformal Conic']: - raise TypeError("ERROR: Only Lambert Conformal Conic grid is implemented for CMAQ. " + - "The current grid type is '{0}'".format(self.grid.grid_type)) + error_exit("Only Lambert Conformal Conic grid is implemented for CMAQ. " + + "The current grid type is '{0}'".format(self.grid.grid_type)) self.global_attributes_order = [ 'IOAPI_VERSION', 'EXEC_ID', 'FTYPE', 'CDATE', 'CTIME', 'WDATE', 'WTIME', 'SDATE', 'STIME', 'TSTEP', 'NTHIK', @@ -111,8 +112,8 @@ class CmaqWriter(Writer): for i, (pollutant, variable) in enumerate(self.pollutant_info.iterrows()): if variable.get('units') not in ['mol.s-1', 'g.s-1', 'mole/s', 'g/s']: - raise ValueError("'{0}' unit is not supported for CMAQ emission ".format(variable.get('units')) + - "input file. Set mol.s-1 or g.s-1 in the speciation_map file.") + error_exit("'{0}' unit is not supported for CMAQ emission ".format(variable.get('units')) + + "input file. Set mol.s-1 or g.s-1 in the speciation_map file.") new_pollutant_info.loc[i, 'pollutant'] = pollutant if variable.get('units') in ['mol.s-1', 'mole/s']: new_pollutant_info.loc[i, 'units'] = "{:<16}".format('mole/s') @@ -134,15 +135,17 @@ class CmaqWriter(Writer): """ spent_time = timeit.default_timer() - a = np.array([[[]]]) + t_flag = np.empty((len(self.date_array), len(self.pollutant_info), 2)) - for date in self.date_array: - b = np.array([[int(date.strftime('%Y%j'))], [int(date.strftime('%H%M%S'))]] * len(self.pollutant_info)) - a = np.append(a, b) + for i_d, date in enumerate(self.date_array): + y_d = int(date.strftime('%Y%j')) + hms = int(date.strftime('%H%M%S')) + for i_p in range(len(self.pollutant_info)): + t_flag[i_d, i_p, 0] = y_d + t_flag[i_d, i_p, 1] = hms - a.shape = (len(self.date_array), 2, len(self.pollutant_info)) self.logger.write_time_log('CmaqWriter', 'create_tflag', timeit.default_timer() - spent_time) - return a + return t_flag def str_var_list(self): """ @@ -180,7 +183,7 @@ class CmaqWriter(Writer): df = pd.read_csv(global_attributes_path) - for att in atts_dict.iterkeys(): + for att in atts_dict.keys(): try: if att in int_atts: atts_dict[att] = np.int32(df.loc[df['attribute'] == att, 'value'].item()) @@ -266,7 +269,11 @@ class CmaqWriter(Writer): """ spent_time = timeit.default_timer() - netcdf = Dataset(self.netcdf_path, mode='w', parallel=True, comm=self.comm_write, info=MPI.Info()) + if self.comm_write.Get_size() > 1: + netcdf = Dataset(self.netcdf_path, format="NETCDF4", mode='w', parallel=True, comm=self.comm_write, + info=MPI.Info()) + else: + netcdf = Dataset(self.netcdf_path, format="NETCDF4", mode='w') # ===== DIMENSIONS ===== self.logger.write_log('\tCreating NetCDF dimensions', message_level=2) @@ -282,14 +289,21 @@ class CmaqWriter(Writer): tflag = netcdf.createVariable('TFLAG', 'i', ('TSTEP', 'VAR', 'DATE-TIME',)) tflag.setncatts({'units': "{:<16}".format(''), 'long_name': "{:<16}".format('TFLAG'), 'var_desc': "{:<80}".format('Timestep-valid flags: (1) YYYYDDD or (2) HHMMSS')}) + tflag[:] = self.create_tflag() # ========== POLLUTANTS ========== for var_name in emissions.columns.values: self.logger.write_log('\t\tCreating {0} variable'.format(var_name), message_level=3) + if self.comm_write.Get_size() > 1: + var = netcdf.createVariable(var_name, np.float64, ('TSTEP', 'LAY', 'ROW', 'COL',)) + var.set_collective(True) + else: + var = netcdf.createVariable(var_name, np.float64, ('TSTEP', 'LAY', 'ROW', 'COL',), zlib=True) + var_data = self.dataframe_to_array(emissions.loc[:, [var_name]]) - var = netcdf.createVariable(var_name, np.float64, ('TSTEP', 'LAY', 'ROW', 'COL',)) + var[:, :, self.rank_distribution[self.comm_write.Get_rank()]['y_min']: self.rank_distribution[self.comm_write.Get_rank()]['y_max'], diff --git a/hermesv3_bu/writer/default_writer.py b/hermesv3_bu/writer/default_writer.py index 65c787ab990d112da1a46c5b84ed7fc2ef92c0d9..b25d1aead2eaec2dacf5c10ab51dd9a0e6d8f60b 100755 --- a/hermesv3_bu/writer/default_writer.py +++ b/hermesv3_bu/writer/default_writer.py @@ -198,14 +198,16 @@ class DefaultWriter(Writer): # emissions.drop(columns=['Unnamed: 0'], inplace=True) for var_name in emissions.columns.values: self.logger.write_log('\t\tCreating {0} variable'.format(var_name), message_level=3) - if CHUNK: - var = netcdf.createVariable(var_name, np.float64, ('time', 'lev',) + var_dim, - chunksizes=self.rank_distribution[0]['shape']) - else: - var = netcdf.createVariable(var_name, np.float64, ('time', 'lev',) + var_dim) - if self.comm_write.Get_size() > 1: + if CHUNK: + var = netcdf.createVariable(var_name, np.float64, ('time', 'lev',) + var_dim, + chunksizes=self.rank_distribution[0]['shape']) + else: + var = netcdf.createVariable(var_name, np.float64, ('time', 'lev',) + var_dim) + var.set_collective(True) + else: + var = netcdf.createVariable(var_name, np.float64, ('time', 'lev',) + var_dim, zlib=True) var_data = self.dataframe_to_array(emissions.loc[:, [var_name]]) var[:, :, diff --git a/hermesv3_bu/writer/monarch_writer.py b/hermesv3_bu/writer/monarch_writer.py index 7a00324e5c14c92f35110ca794fa642f6674f35e..3e43f0d59dc077d6046a9934b7c7091bba58321a 100755 --- a/hermesv3_bu/writer/monarch_writer.py +++ b/hermesv3_bu/writer/monarch_writer.py @@ -6,6 +6,7 @@ from hermesv3_bu.writer.writer import Writer from mpi4py import MPI import timeit from hermesv3_bu.logger.log import Log +from hermesv3_bu.tools.checker import error_exit class MonarchWriter(Writer): @@ -62,13 +63,13 @@ class MonarchWriter(Writer): pollutant_info, rank_distribution, emission_summary) if self.grid.grid_type not in ['Rotated']: - raise TypeError("ERROR: Only Rotated grid is implemented for MONARCH. " + - "The current grid type is '{0}'".format(self.grid.grid_type)) + error_exit("ERROR: Only Rotated grid is implemented for MONARCH. " + + "The current grid type is '{0}'".format(self.grid.grid_type)) for i, (pollutant, variable) in enumerate(self.pollutant_info.iterrows()): if variable.get('units') not in ['mol.s-1.m-2', 'kg.s-1.m-2']: - raise ValueError("'{0}' unit is not supported for CMAQ emission ".format(variable.get('units')) + - "input file. Set mol.s-1.m-2 or kg.s-1.m-2 in the speciation_map file.") + error_exit("'{0}' unit is not supported for CMAQ emission ".format(variable.get('units')) + + "input file. Set mol.s-1.m-2 or kg.s-1.m-2 in the speciation_map file.") self.logger.write_time_log('MonarchWriter', '__init__', timeit.default_timer() - spent_time) @@ -86,8 +87,8 @@ class MonarchWriter(Writer): if self.comm_write.Get_rank() == 0: self.grid.add_cell_area() - cell_area = self.grid.shapefile[['FID', 'cell_area']] - cell_area.set_index('FID', inplace=True) + cell_area = self.grid.shapefile[['cell_area']] + # cell_area.set_index('FID', inplace=True) else: cell_area = None cell_area = self.comm_write.bcast(cell_area, root=0) @@ -113,7 +114,11 @@ class MonarchWriter(Writer): """ from cf_units import Unit spent_time = timeit.default_timer() - netcdf = Dataset(self.netcdf_path, mode='w', parallel=True, comm=self.comm_write, info=MPI.Info()) + if self.comm_write.Get_size() > 1: + netcdf = Dataset(self.netcdf_path, format="NETCDF4", mode='w', parallel=True, comm=self.comm_write, + info=MPI.Info()) + else: + netcdf = Dataset(self.netcdf_path, format="NETCDF4", mode='w') # ========== DIMENSIONS ========== self.logger.write_log('\tCreating NetCDF dimensions', message_level=2) @@ -186,10 +191,16 @@ class MonarchWriter(Writer): for var_name in emissions.columns.values: self.logger.write_log('\t\tCreating {0} variable'.format(var_name), message_level=3) - var_data = self.dataframe_to_array(emissions.loc[:, [var_name]]) # var = netcdf.createVariable(var_name, np.float64, ('time', 'lev',) + var_dim, # chunksizes=self.rank_distribution[0]['shape']) - var = netcdf.createVariable(var_name, np.float64, ('time', 'lev',) + var_dim) + + if self.comm_write.Get_size() > 1: + var = netcdf.createVariable(var_name, np.float64, ('time', 'lev',) + var_dim) + var.set_collective(True) + else: + var = netcdf.createVariable(var_name, np.float64, ('time', 'lev',) + var_dim, zlib=True) + + var_data = self.dataframe_to_array(emissions.loc[:, [var_name]]) var[:, :, self.rank_distribution[self.comm_write.Get_rank()]['y_min']: diff --git a/hermesv3_bu/writer/wrfchem_writer.py b/hermesv3_bu/writer/wrfchem_writer.py index 693727c136d67ef889a45a804398b27bf693cfa9..a1fd289d2ec553b6a6dccaa81b1d5f77e1033fb4 100755 --- a/hermesv3_bu/writer/wrfchem_writer.py +++ b/hermesv3_bu/writer/wrfchem_writer.py @@ -9,6 +9,7 @@ from hermesv3_bu.writer.writer import Writer from mpi4py import MPI import timeit from hermesv3_bu.logger.log import Log +from hermesv3_bu.tools.checker import error_exit class WrfChemWriter(Writer): @@ -67,8 +68,8 @@ class WrfChemWriter(Writer): super(WrfChemWriter, self).__init__(comm_world, comm_write, logger, netcdf_path, grid, date_array, pollutant_info, rank_distribution, emission_summary) if self.grid.grid_type not in ['Lambert Conformal Conic', 'Mercator']: - raise TypeError("ERROR: Only Lambert Conformal Conic or Mercator grid is implemented for WRF-Chem. " + - "The current grid type is '{0}'".format(self.grid.grid_type)) + error_exit("ERROR: Only Lambert Conformal Conic or Mercator grid is implemented for WRF-Chem. " + + "The current grid type is '{0}'".format(self.grid.grid_type)) self.global_attributes_order = [ 'TITLE', 'START_DATE', 'WEST-EAST_GRID_DIMENSION', 'SOUTH-NORTH_GRID_DIMENSION', @@ -138,9 +139,9 @@ class WrfChemWriter(Writer): for i, (pollutant, variable) in enumerate(self.pollutant_info.iterrows()): if variable.get('units') not in ['mol.h-1.km-2', "mol km^-2 hr^-1", 'ug.s-1.m-2', "ug/m3 m/s"]: - raise ValueError("'{0}' unit is not supported for WRF-Chem emission ".format(variable.get('units')) + - "input file. Set '{0}' in the speciation_map file.".format( - ['mol.h-1.km-2', "mol km^-2 hr^-1", 'ug.s-1.m-2', "ug/m3 m/s"])) + error_exit("'{0}' unit is not supported for WRF-Chem emission ".format(variable.get('units')) + + "input file. Set '{0}' in the speciation_map file.".format( + ['mol.h-1.km-2', "mol km^-2 hr^-1", 'ug.s-1.m-2', "ug/m3 m/s"])) new_pollutant_info.loc[i, 'pollutant'] = pollutant if variable.get('units') in ['mol.h-1.km-2', "mol km^-2 hr^-1"]: @@ -177,8 +178,8 @@ class WrfChemWriter(Writer): elif self.grid.grid_type == 'Mercator': lat_ts = np.float32(self.grid.attributes['lat_ts']) else: - raise TypeError("ERROR: Only Lambert Conformal Conic or Mercator grid is implemented for WRF-Chem. " + - "The current grid type is '{0}'".format(self.grid.grid_type)) + error_exit("Only Lambert Conformal Conic or Mercator grid is implemented for WRF-Chem. " + + "The current grid type is '{0}'".format(self.grid.grid_type)) atts_dict = { 'BOTTOM-TOP_GRID_DIMENSION': np.int32(45), @@ -228,7 +229,7 @@ class WrfChemWriter(Writer): df = pd.read_csv(global_attributes_path) - for att in atts_dict.iterkeys(): + for att in atts_dict.keys(): try: if att in int_atts: atts_dict[att] = np.int32(df.loc[df['attribute'] == att, 'value'].item()) @@ -307,15 +308,12 @@ class WrfChemWriter(Writer): :return: """ - import netCDF4 + aux_times = np.chararray((len(self.date_array), 19), itemsize=1) - aux_times_list = [] + for i, date in enumerate(self.date_array): + aux_times[i] = list(date.strftime("%Y-%m-%d_%H:%M:%S")) - for date in self.date_array: - aux_times_list.append(date.strftime("%Y-%m-%d_%H:%M:%S")) - - str_out = netCDF4.stringtochar(np.array(aux_times_list)) - return str_out + return aux_times def write_netcdf(self, emissions): """ @@ -327,7 +325,11 @@ class WrfChemWriter(Writer): """ spent_time = timeit.default_timer() - netcdf = Dataset(self.netcdf_path, mode='w', parallel=True, comm=self.comm_write, info=MPI.Info()) + if self.comm_write.Get_size() > 1: + netcdf = Dataset(self.netcdf_path, format="NETCDF4", mode='w', parallel=True, comm=self.comm_write, + info=MPI.Info()) + else: + netcdf = Dataset(self.netcdf_path, format="NETCDF4", mode='w') # ===== DIMENSIONS ===== self.logger.write_log('\tCreating NetCDF dimensions', message_level=2) @@ -347,8 +349,16 @@ class WrfChemWriter(Writer): for var_name in emissions.columns.values: self.logger.write_log('\t\tCreating {0} variable'.format(var_name), message_level=3) + if self.comm_write.Get_size() > 1: + var = netcdf.createVariable(var_name, np.float64, + ('Time', 'emissions_zdim', 'south_north', 'west_east',)) + var.set_collective(True) + else: + var = netcdf.createVariable(var_name, np.float64, + ('Time', 'emissions_zdim', 'south_north', 'west_east',), zlib=True) + var_data = self.dataframe_to_array(emissions.loc[:, [var_name]]) - var = netcdf.createVariable(var_name, np.float64, ('Time', 'emissions_zdim', 'south_north', 'west_east',)) + var[:, :, self.rank_distribution[self.comm_write.Get_rank()]['y_min']: self.rank_distribution[self.comm_write.Get_rank()]['y_max'], diff --git a/hermesv3_bu/writer/writer.py b/hermesv3_bu/writer/writer.py index 0da57438bf3bea439c8bf5006abe51acfb07fd3a..3d5d284d78a20491e3b0fefe56fa3cd01fe03c59 100755 --- a/hermesv3_bu/writer/writer.py +++ b/hermesv3_bu/writer/writer.py @@ -8,6 +8,7 @@ from mpi4py import MPI from warnings import warn import timeit from hermesv3_bu.logger.log import Log +from hermesv3_bu.tools.checker import error_exit CHUNKING = True BALANCED = False @@ -80,8 +81,8 @@ def select_writer(logger, arguments, grid, date_array): writer = WrfChemWriter(comm_world, comm_write, logger, arguments.output_name, grid, date_array, pollutant_info, rank_distribution, arguments.output_attributes, arguments.emission_summary) else: - raise TypeError("Unknown output model '{0}'. ".format(arguments.output_model) + - "Only MONARCH, CMAQ, WRF_CHEM or DEFAULT writers are available") + error_exit("Unknown output model '{0}'. ".format(arguments.output_model) + + "Only MONARCH, CMAQ, WRF_CHEM or DEFAULT writers are available") logger.write_time_log('Writer', 'select_writer', timeit.default_timer() - spent_time) return writer @@ -129,7 +130,7 @@ def get_distribution(logger, processors, shape): aux_rows -= 1 rows_sum = 0 - for proc in xrange(processors): + for proc in range(processors): total_rows -= aux_rows if total_rows < 0 or proc == processors - 1: rows = total_rows + aux_rows @@ -193,7 +194,7 @@ def get_balanced_distribution(logger, processors, shape): procs_rows_extended = total_rows-(procs_rows*processors) rows_sum = 0 - for proc in xrange(processors): + for proc in range(processors): if proc < procs_rows_extended: aux_rows = procs_rows + 1 else: @@ -308,7 +309,7 @@ class Writer(object): # Sending self.logger.write_log('Sending emissions to the writing processors.', message_level=2) requests = [] - for w_rank, info in self.rank_distribution.iteritems(): + for w_rank, info in self.rank_distribution.items(): partial_emis = emissions.loc[(emissions.index.get_level_values(0) >= info['fid_min']) & (emissions.index.get_level_values(0) < info['fid_max'])] @@ -320,40 +321,29 @@ class Writer(object): # Receiving self.logger.write_log('Receiving emissions in the writing processors.', message_level=2) - if self.comm_world.Get_rank() in self.rank_distribution.iterkeys(): + if self.comm_world.Get_rank() in self.rank_distribution.keys(): self.logger.write_log("I'm a writing processor.", message_level=3) data_list = [] self.logger.write_log("Prepared to receive", message_level=3) - for i_rank in xrange(self.comm_world.Get_size()): - # print self.rank_distribution[i_rank] - # print reduce(lambda x, y: x * y, self.rank_distribution[i_rank]['shape']) - # req = self.comm_world.irecv(source=i_rank, tag=i_rank + MPI_TAG_CONSTANT) - # data_size = req.wait() - + for i_rank in range(self.comm_world.Get_size()): self.logger.write_log( '\tFrom {0} to {1}'.format(i_rank, self.comm_world.Get_rank()), message_level=3) req = self.comm_world.irecv(2**27, source=i_rank, tag=i_rank) dataframe = req.wait() data_list.append(dataframe.reset_index()) - # print "I'm Rank {0} DataList: \n {1}".format(self.comm_world.Get_rank(), data_list) - # new_emissions = pd.concat(data_list).reset_index().groupby(['FID', 'layer', 'tstep']).sum() + new_emissions = pd.concat(data_list) new_emissions[['FID', 'layer', 'tstep']] = new_emissions[['FID', 'layer', 'tstep']].astype(np.int32) - # new_emissions.reset_index(inplace=True) new_emissions = new_emissions.groupby(['FID', 'layer', 'tstep']).sum() - # try: - # new_emissions = new_emissions.groupby(['FID', 'layer', 'tstep']).sum() - # except KeyError as e: - # print "I'm Rank {0} ERROR on: \n {1}".format(self.comm_world.Get_rank(), new_emissions) - # raise e + else: new_emissions = None self.comm_world.Barrier() self.logger.write_log('All emissions received.', message_level=2) - if self.emission_summary and self.comm_world.Get_rank() in self.rank_distribution.iterkeys(): + if self.emission_summary and self.comm_world.Get_rank() in self.rank_distribution.keys(): self.make_summary(new_emissions) self.logger.write_time_log('Writer', 'gather_emissions', timeit.default_timer() - spent_time) @@ -396,7 +386,7 @@ class Writer(object): spent_time = timeit.default_timer() emissions = self.unit_change(emissions) emissions = self.gather_emissions(emissions) - if self.comm_world.Get_rank() in self.rank_distribution.iterkeys(): + if self.comm_world.Get_rank() in self.rank_distribution.keys(): self.write_netcdf(emissions) self.comm_world.Barrier() @@ -447,5 +437,5 @@ class Writer(object): summary.drop(columns=['layer'], inplace=True) summary.groupby('tstep').sum().to_csv(self.emission_summary_paths['hourly_summary_path']) summary.drop(columns=['tstep'], inplace=True) - summary.sum().to_csv(self.emission_summary_paths['total_summary_path']) + pd.DataFrame(summary.sum()).to_csv(self.emission_summary_paths['total_summary_path']) self.logger.write_time_log('Writer', 'make_summary', timeit.default_timer() - spent_time) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..279fe08ed2cc1458976f65436bb64e860705486a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,19 @@ +cdo +cf_units +configargparse +ctypes +ftplib +geopandas +json +mpi4py +netCDF4 +numpy +pandas +pyproj +rasterio +re +shapely +timeit +timezonefinder +warnings +zipfile diff --git a/setup.py b/setup.py index 1472ab30ddee0cbb7fa4837b6301d22e58aefb58..9524d79e0a8f3a41734d7078d44ace02ea86aa4b 100755 --- a/setup.py +++ b/setup.py @@ -35,7 +35,6 @@ setup( 'pyproj', 'configargparse', 'cf_units>=1.1.3', - 'holidays', 'pytz', 'timezonefinder', 'mpi4py', @@ -45,7 +44,7 @@ setup( ], packages=find_packages(), classifiers=[ - "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.7", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Topic :: Scientific/Engineering :: Atmospheric Science" @@ -60,6 +59,7 @@ setup( entry_points={ 'console_scripts': [ 'hermesv3_bu = hermesv3_bu.hermes:run', + 'hermesv3_bu_download_benchmark = hermesv3_bu.tools.download_benchmark:download_benchmark', ], }, ) \ No newline at end of file