From 4be4dbb5f6b3e96553c0ea67d3f3268918965938 Mon Sep 17 00:00:00 2001
From: ctena <carles.tena@bsc.es>
Date: Wed, 2 Mar 2022 16:42:36 +0100
Subject: [PATCH 1/3] nei2017gb17j_preproc.py ready to test

---
 preproc/epa/nei2017gb17j_pollutant_info.csv |  28 ++
 preproc/epa/nei2017gb17j_preproc.py         | 310 ++++++++++++++++++++
 2 files changed, 338 insertions(+)
 create mode 100644 preproc/epa/nei2017gb17j_pollutant_info.csv
 create mode 100755 preproc/epa/nei2017gb17j_preproc.py
diff --git a/preproc/epa/nei2017gb17j_pollutant_info.csv b/preproc/epa/nei2017gb17j_pollutant_info.csv
new file mode 100644
index 0000000..e8dc640
--- /dev/null
+++ b/preproc/epa/nei2017gb17j_pollutant_info.csv
@@ -0,0 +1,28 @@
+Model Specie,Definition,Molecular Weight,Explicit or Lumped
+AACD,acetic acid,60.0,E
+ACET,acetone,58.1,E
+ALD2,acetaldehyde,44.0,E
+ALD2_PRIMARY,acetaldehyde from emissions only,44,E
+ALDX,aldehydes with 3 or more carbons,58.1,L
+APIN,alpha pinene,136.2,E
+BENZ,benzene,78.1,E
+NH3,ammonia,17.0,E
+NH3_FERT,ammonia fertilizers,17.0,E
+MEOH,methanol,32,E
+ETH,ethene,28,E
+ETHA,ethane,30.1,E
+ETHY,ethyne (acetylene),26,E
+ETOH,ethanol,46.1,E
+FACD,formic acid,46,E
+FORM,formaldehyde,30,E
+IOLE,internal alkene bond,56.1,L
+ISOP,isoprene,68.1,E
+KET,carbon-ketone bond,72.1,L
+OLE,terminal alkene bond,42.1,L
+PAR,carbon-carbon single bond,72.1,L
+SOAALK,tracer for alkanes that can form secondary organic aerosol,112,L
+TERP,monoterpenes,136.2,L
+TOL,toluene and other monoalkyl aromatics,92.1,L
+FORM_PRIMARY,formaldehyde from emissions only,30,E
+NAPH,naphthalene,128.2,E
+XYLMN,xylene and other polyalkyl aromatics except naphthalene,106.2,L
\ No newline at end of file
diff --git a/preproc/epa/nei2017gb17j_preproc.py b/preproc/epa/nei2017gb17j_preproc.py
new file mode 100755
index 0000000..f4354f8
--- /dev/null
+++ b/preproc/epa/nei2017gb17j_preproc.py
@@ -0,0 +1,310 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Earth Sciences Department, BSC-CNS
+#
+# This file is part of HERMESv3_GR.
+#
+# HERMESv3_GR is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# HERMESv3_GR is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with HERMESv3_GR. If not, see <http://www.gnu.org/licenses/>.
+
+
+import os
+import sys
+import numpy as np
+import pandas as pd
+import timeit
+from datetime import datetime, timedelta
+import xarray as xr
+
+# ============== README ======================
+"""
+downloading website: 
+reference: 
+Besides citing HERMESv3_GR, users must also acknowledge the use of the corresponding emission inventories in their works
+"""
+
+# ============== CONFIGURATION PARAMETERS ======================
+# The original emissions are downloaded from the following link:
+# https://dataverse.unc.edu/dataset.xhtml?persistentId=doi:10.15139/S3/TCR6BB
+INPUT_PATH = '/esarchive/recon/epa/nei2017gb17j/original_files/<sector>/emis_mole_<sector>_<YYYYMMDD>_12US1_cmaq_cb6ae7_2017gb_17j.ncf'
+OUTPUT_PATH = '/esarchive/recon/epa/nei2017gb17j/daily_mean/<pollutant>_<sector>/<pollutant>_<YYYYMMDD>.nc'
+SECTOR_LIST = ['ag']
+POLLUTANT_INFO_PATH = "/Users/ctena/PycharmProjects/hermesv3_gr/preproc/epa/nei2017gb17j_pollutant_info.csv"
+# GRID_NETCDF created with HERMESv3_GR with taht configuration:
+# if domain_type == lcc:
+#     lat_1 = 33.0
+#     lat_2 = 45.0
+#     lon_0 = -97.0
+#     lat_0 = 40.0
+#     nx = 459
+#     ny = 299
+#     inc_x = 12000
+#     inc_y = 12000
+#     x_0 = -2556000.0
+#     y_0 = -1728000.0
+
+GRID_NETCDF_PATH = "/scratch/Earth/ctena/HERMESv3_GR_aux/coverage/USA_lcc_12000.0_12000.0/temporal_coords.nc"
+
+START_DAY = datetime(year=2017, month=1, day=1)
+# END_DAY = datetime(year=2017, month=12, day=31)
+END_DAY = datetime(year=2017, month=1, day=1)
+
+XARRAY = True
+# ==============================================================
+
+
+def write_netcdf(output_name_path, data_list, center_lats, center_lons, grid_cell_area, date):
+    print(output_name_path)
+    # Creating NetCDF & Dimensions
+    nc_output = Dataset(output_name_path, mode='w', format="NETCDF4")
+    nc_output.createDimension('nv', 2)
+    nc_output.createDimension('lon', center_lons.shape[0])
+    nc_output.createDimension('lat', center_lats.shape[0])
+    nc_output.createDimension('time', None)
+
+    # TIME
+    time = nc_output.createVariable('time', 'd', ('time',), zlib=True)
+    # time.units = "{0} since {1}".format(tstep_units, global_atts['Start_DateTime'].strftime('%Y-%m-%d %H:%M:%S'))
+    time.units = "hours since {0}".format(date.strftime('%Y-%m-%d %H:%M:%S'))
+    time.standard_name = "time"
+    time.calendar = "gregorian"
+    time.long_name = "time"
+    time[:] = [0]
+
+    # LATITUDE
+    lat = nc_output.createVariable('lat', 'f', ('lat',), zlib=True)
+    lat.bounds = "lat_bnds"
+    lat.units = "degrees_north"
+    lat.axis = "Y"
+    lat.long_name = "latitude"
+    lat.standard_name = "latitude"
+    lat[:] = center_lats
+
+    lat_bnds = nc_output.createVariable('lat_bnds', 'f', ('lat', 'nv',), zlib=True)
+    lat_bnds[:] = create_bounds(center_lats)
+
+    # LONGITUDE
+    lon = nc_output.createVariable('lon', 'f', ('lon',), zlib=True)
+    lon.bounds = "lon_bnds"
+    lon.units = "degrees_east"
+    lon.axis = "X"
+    lon.long_name = "longitude"
+    lon.standard_name = "longitude"
+    lon[:] = center_lons
+
+    lon_bnds = nc_output.createVariable('lon_bnds', 'f', ('lon', 'nv',), zlib=True)
+    lon_bnds[:] = create_bounds(center_lons)
+
+    for var in data_list:
+        # VARIABLE
+        nc_var = nc_output.createVariable(var['name'], 'f', ('time', 'lat', 'lon',), zlib=True)
+        nc_var.units = var['units'].symbol
+        nc_var.long_name = var['long_name']
+        nc_var.coordinates = 'lat lon'
+        nc_var.grid_mapping = 'crs'
+        nc_var.cell_measures = 'area: cell_area'
+        nc_var[:] = var['data']
+
+    # CELL AREA
+    cell_area = nc_output.createVariable('cell_area', 'f', ('lat', 'lon',))
+    cell_area.long_name = "area of the grid cell"
+    cell_area.standard_name = "area"
+    cell_area.units = "m2"
+    cell_area[:] = grid_cell_area
+
+    # CRS
+    crs = nc_output.createVariable('crs', 'i')
+    crs.grid_mapping_name = "latitude_longitude"
+    crs.semi_major_axis = 6371000.0
+    crs.inverse_flattening = 0
+
+    nc_output.setncattr('title', 'ECLIPSEv5a inventory')
+    nc_output.setncattr('Conventions', 'CF-1.6', )
+    nc_output.setncattr('institution', 'IIASA', )
+    nc_output.setncattr('source', 'IIASA', )
+    nc_output.setncattr('history', 'Re-writing of the ECLIPSEv5a input to follow the CF-1.6 conventions;\n' +
+                        '2017-11-28: Creating;\n')
+    nc_output.setncattr('web', 'http://www.iiasa.ac.at/web/home/research/researchPrograms/air/ECLIPSEv5a.html')
+    nc_output.setncattr('comment', 'Re-writing done by Carles Tena (carles.tena@bsc.es) from the BSC-CNS ' +
+                        '(Barcelona Supercomputing Center)', )
+
+    nc_output.close()
+
+
+def do_transformation(year):
+    """
+    Does the transformation for the selected year
+
+    :param year: Year to calculate
+    :type year: int
+    """
+    from datetime import datetime
+    from hermesv3_gr.tools.netcdf_tools import extract_vars, get_grid_area, write_netcdf
+    for pollutant in LIST_POLLUTANTS:
+        file_name = get_input_name(pollutant)
+        if os.path.exists(file_name):
+            # c_lats, c_lons, b_lats, b_lons = extract_vars(file_name, ['lat', 'lon', 'lat_bnds', 'lon_bnds'])
+            c_lats, c_lons = extract_vars(file_name, ['lat', 'lon'])
+
+            b_lats = create_bounds(c_lats['data'])
+            b_lons = create_bounds(c_lons['data'])
+
+            cell_area = get_grid_area(file_name)
+
+            global_attributes = get_global_attributes(file_name)
+            for sector in LIST_SECTORS:
+                data = get_full_year_data(file_name, sector, year)
+
+                if pollutant == 'nox':
+                    pollutant_name = 'nox_no'
+                elif pollutant == 'voc1':
+                    pollutant_name = 'voc01'
+                elif pollutant == 'voc2':
+                    pollutant_name = 'voc02'
+                elif pollutant == 'voc3':
+                    pollutant_name = 'voc03'
+                elif pollutant == 'voc4':
+                    pollutant_name = 'voc04'
+                elif pollutant == 'voc5':
+                    pollutant_name = 'voc05'
+                elif pollutant == 'voc6':
+                    pollutant_name = 'voc06'
+                elif pollutant == 'voc7':
+                    pollutant_name = 'voc07'
+                elif pollutant == 'voc8':
+                    pollutant_name = 'voc08'
+                elif pollutant == 'voc9':
+                    pollutant_name = 'voc09'
+                else:
+                    pollutant_name = pollutant
+
+                file_path = os.path.join(OUTPUT_PATH, 'monthly_mean', '{0}_{1}'.format(pollutant_name, sector))
+                if not os.path.exists(file_path):
+                    os.makedirs(file_path)
+
+                for month in range(1, 12 + 1, 1):
+                    emission = {
+                        'name': pollutant_name,
+                        'units': 'kg.m-2.s-1',
+                        'data': data[month - 1, :, :].reshape((1,) + cell_area.shape)
+                    }
+                    write_netcdf(
+                        os.path.join(file_path, '{0}_{1}{2}.nc'.format(pollutant_name, year, str(month).zfill(2))),
+                        c_lats['data'], c_lons['data'], [emission], date=datetime(year=year, month=month, day=1),
+                        boundary_latitudes=b_lats, boundary_longitudes=b_lons, cell_area=cell_area,
+                        global_attributes=global_attributes)
+        else:
+            raise IOError('File not found {0}'.format(file_name))
+    return True
+
+
+def get_pollutant_info(csv_path):
+    """
+    Obtain the pollutant info
+
+    :param csv_path: Path to the CSV that contains all the pollutant info
+    :type csv_path: str
+
+    :return: Pollutant info table
+    :rtype: pd.DataFrame
+    """
+    polluntant_info = pd.read_csv(csv_path)
+    return polluntant_info
+
+
+def parse_path(file_path, date_aux=None, sector=None, pollutant=None):
+    """
+    :param file_path: Path to parse
+    :type file_path: str
+
+    :param date_aux: Date to parse
+    :type date_aux: datetime, None
+
+    :param sector: Sector name
+    :type sector: str, None
+
+    :param pollutant: Pollutant name
+    :type pollutant: str, None
+
+    :return: Complete path
+    :rtype: str
+    """
+    if date_aux is not None:
+        file_path = file_path.replace('<YYYYMMDD>', date_aux.strftime("%Y%m%d"))
+    if pollutant is not None:
+        file_path = file_path.replace('<pollutant>', pollutant)
+    if sector is not None:
+        file_path = file_path.replace('<sector>', sector)
+    return file_path
+
+
+def make_dir(file_path):
+    parent_dir = os.path.dirname(file_path)
+    if not os.path.exists(parent_dir):
+        os.makedirs(parent_dir)
+    return True
+
+
+def main_xarray():
+    pol_info_df = get_pollutant_info(POLLUTANT_INFO_PATH)
+
+    grid_nc = xr.open_dataset(GRID_NETCDF_PATH, decode_cf=True, decode_times=False, decode_coords='all')
+    del grid_nc['var_aux']
+
+    day_aux = START_DAY
+    for sector in SECTOR_LIST:
+        while day_aux <= END_DAY:
+            in_nc = xr.open_dataset(parse_path(INPUT_PATH, date_aux=day_aux, sector=sector))
+            for i, poll_info in pol_info_df.iterrows():
+                out_nc = grid_nc.copy()
+                poll_name = poll_info['Model Specie'].lower()
+                out_nc[poll_name] = (('time', 'lat', 'lon'),
+                                     (in_nc[poll_info['Model Specie']][:24].mean(dim='TSTEP').data * poll_info[
+                                         'Molecular Weight']) / grid_nc['cell_area'].data)
+                out_nc[poll_name].attrs['units'] = 'kg.s-1.m-2'
+                out_nc[poll_name].attrs['long_name'] = poll_info['Definition']
+                out_nc['time'] = [day_aux]
+
+                out_nc.attrs['comment'] = "Re-writing done by Carles Tena (carles.tena@bsc.es) from the BSC-CNS " + \
+                                          "(Barcelona Supercomputing Center)"
+
+                out_path = parse_path(OUTPUT_PATH, date_aux=day_aux, sector=sector, pollutant=poll_name)
+                make_dir(out_path)
+                out_nc.to_netcdf(out_path)
+
+            day_aux += timedelta(days=1)
+
+    return True
+
+
+def main():
+    day_aux = START_DAY
+
+    while day_aux <= END_DAY:
+        in_path = parse_path(INPUT_PATH, day_aux)
+
+        day_aux += timedelta(days=1)
+
+    return True
+
+
+if __name__ == '__main__':
+    initime = timeit.default_timer()
+
+    if XARRAY:
+        main_xarray()
+    else:
+        main()
+
+    print("\t{0:.3f}s".format(timeit.default_timer() - initime))
+    sys.exit(0)
-- 
GitLab


From 21ee39b031df1845f16d287e50a1ab4a54158b1c Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Tue, 8 Mar 2022 12:09:45 +0100
Subject: [PATCH 2/3] Bug fix on the EPA NEI2017 preproc unit change

---
 preproc/epa/nei2017gb17j_preproc.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/preproc/epa/nei2017gb17j_preproc.py b/preproc/epa/nei2017gb17j_preproc.py
index f4354f8..c8e8b06 100755
--- a/preproc/epa/nei2017gb17j_preproc.py
+++ b/preproc/epa/nei2017gb17j_preproc.py
@@ -39,7 +39,7 @@ Besides citing HERMESv3_GR, users must also acknowledge the use of the correspon
 INPUT_PATH = '/esarchive/recon/epa/nei2017gb17j/original_files/<sector>/emis_mole_<sector>_<YYYYMMDD>_12US1_cmaq_cb6ae7_2017gb_17j.ncf'
 OUTPUT_PATH = '/esarchive/recon/epa/nei2017gb17j/daily_mean/<pollutant>_<sector>/<pollutant>_<YYYYMMDD>.nc'
 SECTOR_LIST = ['ag']
-POLLUTANT_INFO_PATH = "/Users/ctena/PycharmProjects/hermesv3_gr/preproc/epa/nei2017gb17j_pollutant_info.csv"
+POLLUTANT_INFO_PATH = "/home/Earth/ctena/Models/hermesv3_gr/preproc/epa/nei2017gb17j_pollutant_info.csv"
 # GRID_NETCDF created with HERMESv3_GR with taht configuration:
 # if domain_type == lcc:
 #     lat_1 = 33.0
@@ -56,8 +56,8 @@ POLLUTANT_INFO_PATH = "/Users/ctena/PycharmProjects/hermesv3_gr/preproc/epa/nei2
 GRID_NETCDF_PATH = "/scratch/Earth/ctena/HERMESv3_GR_aux/coverage/USA_lcc_12000.0_12000.0/temporal_coords.nc"
 
 START_DAY = datetime(year=2017, month=1, day=1)
-# END_DAY = datetime(year=2017, month=12, day=31)
-END_DAY = datetime(year=2017, month=1, day=1)
+END_DAY = datetime(year=2017, month=12, day=31)
+# END_DAY = datetime(year=2017, month=1, day=1)
 
 XARRAY = True
 # ==============================================================
@@ -264,13 +264,14 @@ def main_xarray():
     day_aux = START_DAY
     for sector in SECTOR_LIST:
         while day_aux <= END_DAY:
+            print(day_aux)
             in_nc = xr.open_dataset(parse_path(INPUT_PATH, date_aux=day_aux, sector=sector))
             for i, poll_info in pol_info_df.iterrows():
                 out_nc = grid_nc.copy()
                 poll_name = poll_info['Model Specie'].lower()
                 out_nc[poll_name] = (('time', 'lat', 'lon'),
                                      (in_nc[poll_info['Model Specie']][:24].mean(dim='TSTEP').data * poll_info[
-                                         'Molecular Weight']) / grid_nc['cell_area'].data)
+                                         'Molecular Weight']) / (grid_nc['cell_area'].data * 1000))
                 out_nc[poll_name].attrs['units'] = 'kg.s-1.m-2'
                 out_nc[poll_name].attrs['long_name'] = poll_info['Definition']
                 out_nc['time'] = [day_aux]
-- 
GitLab


From ae5ca39fbbb206add31ef4e26f2dc443a3cc618a Mon Sep 17 00:00:00 2001
From: ctena <carles.tena@bsc.es>
Date: Tue, 8 Mar 2022 16:38:04 +0100
Subject: [PATCH 3/3] fix to run NEI on HERMES

---
 .../modules/emision_inventories/emission_inventory.py      | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/hermesv3_gr/modules/emision_inventories/emission_inventory.py b/hermesv3_gr/modules/emision_inventories/emission_inventory.py
index 987ef7a..59c4451 100755
--- a/hermesv3_gr/modules/emision_inventories/emission_inventory.py
+++ b/hermesv3_gr/modules/emision_inventories/emission_inventory.py
@@ -266,7 +266,12 @@ class EmissionInventory(object):
         if self.coverage == 'global':
             regridded_emissions = self.regrid.start_regridding()
         else:
-            regridded_emissions = self.regrid.start_esmpy_regridding()
+            try:
+                # Used when the original regional EI doesn't cover the whole destination domain
+                regridded_emissions = self.regrid.start_esmpy_regridding()
+            except:
+                # Used when the destination domain is smaller than the original regional domain (over longitudes)
+                regridded_emissions = self.regrid.start_regridding()
         for emission in regridded_emissions:
             dict_aux = {'name': emission['name'], 'data': emission['data'], 'units': 'm'}
             self.emissions.append(dict_aux)
-- 
GitLab