From 084dba5255d9e31f38833d011e12968db7037138 Mon Sep 17 00:00:00 2001
From: ctena <carles.tena@bsc.es>
Date: Thu, 2 Sep 2021 09:47:09 +0200
Subject: [PATCH 01/16] WIP uchile preproc

---
 preproc/uchile_inema_preproc.py | 113 ++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100755 preproc/uchile_inema_preproc.py
diff --git a/preproc/uchile_inema_preproc.py b/preproc/uchile_inema_preproc.py
new file mode 100755
index 0000000..e64760c
--- /dev/null
+++ b/preproc/uchile_inema_preproc.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Earth Sciences Department, BSC-CNS
+#
+# This file is part of HERMESv3_GR.
+#
+# HERMESv3_GR is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# HERMESv3_GR is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with HERMESv3_GR. If not, see <http://www.gnu.org/licenses/>.
+
+
+import os
+import timeit
+from netCDF4 import Dataset
+
+# ============== README ======================
+"""
+downloading website: ¿¿¿???
+reference: ¿¿¿???
+Besides citing HERMESv3_GR, users must also acknowledge the use of the corresponding emission inventories in their works
+"""
+
+# ============== CONFIGURATION PARAMETERS ======================
+INPUT_PATH = '/esarchive/recon/uchile/inema_v1/original_files/<year>/<sector>/inema_<pollutant>_<subsector>_0.01x0.01_<year>.nc'
+OUTPUT_PATH = '/esarchive/recon/uchile/inema_v1/'
+SECTOR_LIST = {'Transport': None,
+               'Energy': None,
+               'Industry': None,
+               'Mining': None,
+               'Residential': ['Residential', 'Rural', 'Urban']}
+POLLUTANT_INFO = {'bc': 'BC', 'c6h6': 'C6H6', 'ch4': 'CH4', 'co2': 'CO2', 'co': 'CO', 'hg': 'Hg', 'nh3': 'NH3',
+                  'nmvoc': 'NMVOC', 'no': 'NO', 'no2': 'NO2', 'nox': 'NOx', 'pb': 'Pb', 'pcddf': 'PCDDF',
+                  'pm10': 'PM10', 'pm25': 'PM25', 'pm': 'PM', 'so2': 'SO2', 'tol': 'Toluene', 'voc': 'VOC'}
+
+YEAR_LIST = [2017]
+# ==============================================================
+
+
+def do_transformation():
+    from hermesv3_gr.tools.netcdf_tools import write_netcdf, get_grid_area
+    from hermesv3_gr.tools.coordinates_tools import create_bounds
+    import calendar
+    import numpy as np
+    from datetime import datetime
+
+    lats = lons = blats = blons = cell_area = None
+
+    for sector in SECTOR_LIST.keys():
+        if SECTOR_LIST[sector] is None:
+            subsector_list = [sector]
+        else:
+            subsector_list = SECTOR_LIST[sector]
+        for subsector in subsector_list:
+            for year in YEAR_LIST:
+                for pollutant in POLLUTANT_INFO.keys():
+                    out_file_name = os.path.join(OUTPUT_PATH, 'yearly_mean',
+                                                 '{0}_{1}'.format(pollutant, subsector),
+                                                 "{0}_{1}.nc".format(pollutant, year))
+                    file_name = INPUT_PATH.replace('<year>', str(year)).replace('<sector>', sector).replace(
+                        '<pollutant>', POLLUTANT_INFO[pollutant]).replace('<subsector>', subsector)
+
+                    netcdf = Dataset(file_name, mode='r')
+                    if lats is None:
+                        lat_nc = netcdf.variables['lat'][:]
+                        lon_nc = netcdf.variables['lon'][:]
+                        lat_inc = round(lat_nc[1] - lat_nc[0], 5)
+                        lon_inc = round(lon_nc[1] - lon_nc[0], 5)
+
+                        lats = np.linspace(lat_nc[0], lat_nc[0] + (lat_inc * (len(lat_nc) - 1)), len(lat_nc),
+                                           dtype=np.float)
+                        lons = np.linspace(lon_nc[0], lon_nc[0] + (lon_inc * (len(lon_nc) - 1)), len(lon_nc),
+                                           dtype=np.float)
+                        del lat_nc, lon_nc
+                        blats = create_bounds(lats)
+                        blons = create_bounds(lons)
+                        cell_area = get_grid_area(file_name)
+
+                    var = netcdf.variables[POLLUTANT_INFO[pollutant]][:]
+                    print(var)
+
+                    # if calendar.isleap(year):
+                    #     days = 366
+                    # else:
+                    #     days = 365
+                    #
+                    # # from ton/yr to kg/m2.s
+                    # var = (var / cell_area) * (1000 / (days * 24 * 60 * 60))
+                    #
+                    # data = [{'name': pollutant, 'units': 'kg.m-2.s-1', 'data': var.reshape((1,) + var.shape)}]
+                    #
+                    # if not os.path.exists(os.path.dirname(out_file_name)):
+                    #     os.makedirs(os.path.dirname(out_file_name))
+                    # print(out_file_name)
+                    # write_netcdf(out_file_name, lats, lons, data, date=datetime(year=year, month=1, day=1),
+                    #              boundary_latitudes=blats, boundary_longitudes=blons, cell_area=cell_area,
+                    #              regular_latlon=True)
+
+
+if __name__ == '__main__':
+    starting_time = timeit.default_timer()
+
+    do_transformation()
+
+    print('Time(s):', timeit.default_timer() - starting_time)
-- 
GitLab


From f2d65586acbb512859e18d6cad8c36ba9ebed1fb Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Thu, 2 Sep 2021 11:13:43 +0200
Subject: [PATCH 02/16] Testing uChule emission inventory

---
 preproc/uchile_inema_preproc.py | 76 +++++++++++++++++----------------
 1 file changed, 39 insertions(+), 37 deletions(-)

diff --git a/preproc/uchile_inema_preproc.py b/preproc/uchile_inema_preproc.py
index e64760c..a6b8bb1 100755
--- a/preproc/uchile_inema_preproc.py
+++ b/preproc/uchile_inema_preproc.py
@@ -31,7 +31,7 @@ Besides citing HERMESv3_GR, users must also acknowledge the use of the correspon
 
 # ============== CONFIGURATION PARAMETERS ======================
 INPUT_PATH = '/esarchive/recon/uchile/inema_v1/original_files/<year>/<sector>/inema_<pollutant>_<subsector>_0.01x0.01_<year>.nc'
-OUTPUT_PATH = '/esarchive/recon/uchile/inema_v1/'
+OUTPUT_PATH = '/esarchive/scratch/ctena/uchile/inema_v1/'
 SECTOR_LIST = {'Transport': None,
                'Energy': None,
                'Industry': None,
@@ -67,42 +67,44 @@ def do_transformation():
                                                  "{0}_{1}.nc".format(pollutant, year))
                     file_name = INPUT_PATH.replace('<year>', str(year)).replace('<sector>', sector).replace(
                         '<pollutant>', POLLUTANT_INFO[pollutant]).replace('<subsector>', subsector)
-
-                    netcdf = Dataset(file_name, mode='r')
-                    if lats is None:
-                        lat_nc = netcdf.variables['lat'][:]
-                        lon_nc = netcdf.variables['lon'][:]
-                        lat_inc = round(lat_nc[1] - lat_nc[0], 5)
-                        lon_inc = round(lon_nc[1] - lon_nc[0], 5)
-
-                        lats = np.linspace(lat_nc[0], lat_nc[0] + (lat_inc * (len(lat_nc) - 1)), len(lat_nc),
-                                           dtype=np.float)
-                        lons = np.linspace(lon_nc[0], lon_nc[0] + (lon_inc * (len(lon_nc) - 1)), len(lon_nc),
-                                           dtype=np.float)
-                        del lat_nc, lon_nc
-                        blats = create_bounds(lats)
-                        blons = create_bounds(lons)
-                        cell_area = get_grid_area(file_name)
-
-                    var = netcdf.variables[POLLUTANT_INFO[pollutant]][:]
-                    print(var)
-
-                    # if calendar.isleap(year):
-                    #     days = 366
-                    # else:
-                    #     days = 365
-                    #
-                    # # from ton/yr to kg/m2.s
-                    # var = (var / cell_area) * (1000 / (days * 24 * 60 * 60))
-                    #
-                    # data = [{'name': pollutant, 'units': 'kg.m-2.s-1', 'data': var.reshape((1,) + var.shape)}]
-                    #
-                    # if not os.path.exists(os.path.dirname(out_file_name)):
-                    #     os.makedirs(os.path.dirname(out_file_name))
-                    # print(out_file_name)
-                    # write_netcdf(out_file_name, lats, lons, data, date=datetime(year=year, month=1, day=1),
-                    #              boundary_latitudes=blats, boundary_longitudes=blons, cell_area=cell_area,
-                    #              regular_latlon=True)
+                    try:
+                        netcdf = Dataset(file_name, mode='r')
+                        if lats is None:
+                            lat_nc = netcdf.variables['lat'][:]
+                            lon_nc = netcdf.variables['lon'][:]
+                            lat_inc = round(lat_nc[1] - lat_nc[0], 5)
+                            lon_inc = round(lon_nc[1] - lon_nc[0], 5)
+
+                            lats = np.linspace(lat_nc[0], lat_nc[0] + (lat_inc * (len(lat_nc) - 1)), len(lat_nc),
+                                               dtype=np.float)
+                            lons = np.linspace(lon_nc[0], lon_nc[0] + (lon_inc * (len(lon_nc) - 1)), len(lon_nc),
+                                               dtype=np.float)
+                            del lat_nc, lon_nc
+                            blats = create_bounds(lats)
+                            blons = create_bounds(lons)
+
+                        var = netcdf.variables[POLLUTANT_INFO[pollutant]][:]
+                        var = var.data
+                        var[var == netcdf.variables[POLLUTANT_INFO[pollutant]].getncattr('_FillValue')] = 0
+
+                        if calendar.isleap(year):
+                            days = 366
+                        else:
+                            days = 365
+
+                        # from ktonne/km2 year to kg/m2.s
+                        var = var * (1000000 / (days * 24 * 60 * 60 * 1000000))
+
+                        data = [{'name': pollutant, 'units': 'kg.m-2.s-1', 'data': var.reshape((1,) + var.shape)}]
+
+                        if not os.path.exists(os.path.dirname(out_file_name)):
+                            os.makedirs(os.path.dirname(out_file_name))
+                        print(out_file_name)
+                        write_netcdf(out_file_name, lats, lons, data, date=datetime(year=year, month=1, day=1),
+                                     boundary_latitudes=blats, boundary_longitudes=blons, cell_area=cell_area,
+                                     regular_latlon=True)
+                    except FileNotFoundError:
+                        print("Variable {0} not found in {1}: {2}".format(pollutant, subsector, file_name))
 
 
 if __name__ == '__main__':
-- 
GitLab


From b153ad4f7a2f198ea1c66f81f0c6109794668105 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Fri, 3 Sep 2021 10:14:58 +0200
Subject: [PATCH 03/16] done uChule emission inventory

---
 preproc/uchile_inema_preproc.py | 43 ++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/preproc/uchile_inema_preproc.py b/preproc/uchile_inema_preproc.py
index a6b8bb1..a57068b 100755
--- a/preproc/uchile_inema_preproc.py
+++ b/preproc/uchile_inema_preproc.py
@@ -31,29 +31,27 @@ Besides citing HERMESv3_GR, users must also acknowledge the use of the correspon
 
 # ============== CONFIGURATION PARAMETERS ======================
 INPUT_PATH = '/esarchive/recon/uchile/inema_v1/original_files/<year>/<sector>/inema_<pollutant>_<subsector>_0.01x0.01_<year>.nc'
-OUTPUT_PATH = '/esarchive/scratch/ctena/uchile/inema_v1/'
+OUTPUT_PATH = '/esarchive/recon/uchile/inema_v1/'
 SECTOR_LIST = {'Transport': None,
                'Energy': None,
                'Industry': None,
                'Mining': None,
                'Residential': ['Residential', 'Rural', 'Urban']}
 POLLUTANT_INFO = {'bc': 'BC', 'c6h6': 'C6H6', 'ch4': 'CH4', 'co2': 'CO2', 'co': 'CO', 'hg': 'Hg', 'nh3': 'NH3',
-                  'nmvoc': 'NMVOC', 'no': 'NO', 'no2': 'NO2', 'nox': 'NOx', 'pb': 'Pb', 'pcddf': 'PCDDF',
+                  'nmvoc': 'NMVOC', 'no': 'NO', 'no2': 'NO2', 'nox_no2': 'NOx', 'pb': 'Pb', 'pcddf': 'PCDDF',
                   'pm10': 'PM10', 'pm25': 'PM25', 'pm': 'PM', 'so2': 'SO2', 'tol': 'Toluene', 'voc': 'VOC'}
 
-YEAR_LIST = [2017]
+YEAR_LIST = [2016, 2017]
 # ==============================================================
 
 
 def do_transformation():
-    from hermesv3_gr.tools.netcdf_tools import write_netcdf, get_grid_area
+    from hermesv3_gr.tools.netcdf_tools import write_netcdf
     from hermesv3_gr.tools.coordinates_tools import create_bounds
     import calendar
     import numpy as np
     from datetime import datetime
 
-    lats = lons = blats = blons = cell_area = None
-
     for sector in SECTOR_LIST.keys():
         if SECTOR_LIST[sector] is None:
             subsector_list = [sector]
@@ -69,23 +67,30 @@ def do_transformation():
                         '<pollutant>', POLLUTANT_INFO[pollutant]).replace('<subsector>', subsector)
                     try:
                         netcdf = Dataset(file_name, mode='r')
-                        if lats is None:
-                            lat_nc = netcdf.variables['lat'][:]
-                            lon_nc = netcdf.variables['lon'][:]
-                            lat_inc = round(lat_nc[1] - lat_nc[0], 5)
-                            lon_inc = round(lon_nc[1] - lon_nc[0], 5)
-
-                            lats = np.linspace(lat_nc[0], lat_nc[0] + (lat_inc * (len(lat_nc) - 1)), len(lat_nc),
-                                               dtype=np.float)
-                            lons = np.linspace(lon_nc[0], lon_nc[0] + (lon_inc * (len(lon_nc) - 1)), len(lon_nc),
-                                               dtype=np.float)
-                            del lat_nc, lon_nc
-                            blats = create_bounds(lats)
-                            blons = create_bounds(lons)
+                        lat_nc = netcdf.variables['lat'][:]
+                        lon_nc = netcdf.variables['lon'][:]
+                        lat_inc = round(lat_nc[1] - lat_nc[0], 5)
+                        lon_inc = round(lon_nc[1] - lon_nc[0], 5)
+
+                        lats = np.linspace(lat_nc[0], lat_nc[0] + (lat_inc * (len(lat_nc) - 1)), len(lat_nc),
+                                           dtype=np.float)
+                        lons = np.linspace(lon_nc[0], lon_nc[0] + (lon_inc * (len(lon_nc) - 1)), len(lon_nc),
+                                           dtype=np.float)
+                        if lons[-1] < lons[0]:
+                            flip = True
+                            lons = np.flip(lons)
+                        else:
+                            flip = False
+
+                        del lat_nc, lon_nc
+                        blats = create_bounds(lats)
+                        blons = create_bounds(lons)
 
                         var = netcdf.variables[POLLUTANT_INFO[pollutant]][:]
                         var = var.data
                         var[var == netcdf.variables[POLLUTANT_INFO[pollutant]].getncattr('_FillValue')] = 0
+                        if flip:
+                            var = np.flip(var, axis=1)
 
                         if calendar.isleap(year):
                             days = 366
-- 
GitLab


From da5634c0cf2588aa4fd4aa5416115cbbdac8bb64 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Fri, 17 Dec 2021 09:06:09 +0100
Subject: [PATCH 04/16] CAMS GLOB SHIP v3.1 preproc don

---
 preproc/cams_glob_ship_v31_preproc.py | 225 ++++++++++++++++++++++++++
 1 file changed, 225 insertions(+)
 create mode 100755 preproc/cams_glob_ship_v31_preproc.py

diff --git a/preproc/cams_glob_ship_v31_preproc.py b/preproc/cams_glob_ship_v31_preproc.py
new file mode 100755
index 0000000..b475aa4
--- /dev/null
+++ b/preproc/cams_glob_ship_v31_preproc.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Earth Sciences Department, BSC-CNS
+#
+# This file is part of HERMESv3_GR.
+#
+# HERMESv3_GR is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# HERMESv3_GR is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with HERMESv3_GR. If not, see <http://www.gnu.org/licenses/>.
+
+
+import os
+import sys
+import numpy as np
+
+# ============== README ======================
+"""
+downloading website: 
+reference: 
+Besides citing HERMESv3_GR, users must also acknowledge the use of the corresponding emission inventories in their works
+"""
+
+# ============== CONFIGURATION PARAMETERS ======================
+INPUT_PATH = '/esarchive/recon/ecmwf/cams_glob_shipv31/original_files/'
+OUTPUT_PATH = '/esarchive/recon/ecmwf/cams_glob_shipv31'
+POLLUTANT_INFO = {'Ash': {'input_var_name': ['glseas', 'inland'], 'output_var_name': 'ash', 'unit_factor': None},
+                  'CH4': {'input_var_name': ['glseas', 'inland'], 'output_var_name': 'ch4', 'unit_factor': None},
+                  'CO2': {'input_var_name': ['glseas', 'inland'], 'output_var_name': 'co2', 'unit_factor': None}, 
+                  'CO': {'input_var_name': ['glseas', 'inland'], 'output_var_name': 'co', 'unit_factor': None},
+                  'EC': {'input_var_name': ['glseas', 'inland'], 'output_var_name': 'ec', 'unit_factor': None}, 
+                  'NOx': {'input_var_name': ['glseas', 'inland'], 'output_var_name': 'nox_no', 'unit_factor': None},
+                  'OC': {'input_var_name': ['glseas', 'inland'], 'output_var_name': 'oc', 'unit_factor': None},
+                  'SO4': {'input_var_name': ['glseas', 'inland'], 'output_var_name': 'so4', 'unit_factor': None},
+                  'SOx': {'input_var_name': ['glseas', 'inland'], 'output_var_name': 'so2', 'unit_factor': None},
+                  'VOC_SUM': {'input_var_name': ['glseas', 'inland'], 'output_var_name': 'nmvoc', 'unit_factor': None}}
+# LIST_YEARS = 2019
+LIST_YEARS = [2019]
+INPUT_NAME = 'CAMS-GLOB-SHIP_Glb_0.1x0.1_anthro_<pollutant>_v3.1_monthly_<year>.nc'
+# ==============================================================
+
+
+def get_input_name(filename, pollutant, year):
+    """
+    Gets the path for the input file name
+
+    :param pollutant: Name of the pollutant
+    :type pollutant: str
+
+    :return: Path to the input file
+    :rtype: str
+    """
+
+    filename = filename.replace('<pollutant>', pollutant)
+    filename = filename.replace('<year>', str(year))
+
+    return filename
+
+
+def get_full_year_data(file_name, year, var_name):
+    """
+    Gets the needed date in the input format.
+
+    :param file_name: path to the input file.
+    :type file_name: str
+
+    :param pollutant: Name of the pollutant.
+    :type pollutant: str
+
+    :param sector: Name of the sector.
+    :type sector: str
+
+    :param year: Year to calculate.
+    :type year: int
+
+    :return: Data of the selected emission.
+    :rtype: numpy.array
+    """
+    from netCDF4 import Dataset
+    from datetime import datetime
+    import cf_units
+    import numpy as np
+
+    nc = Dataset(file_name, mode='r')
+
+    time = nc.variables['time']
+
+    time_array = cf_units.num2date(time[:], time.units, time.calendar)
+    time_array = np.array([datetime(year=x.year, month=x.month, day=1) for x in time_array])
+
+    i_time = np.where(time_array == datetime(year=year, month=1, day=1))[0][0]
+
+    # if sector in LIST_SECTORS:
+    if isinstance(var_name, list):
+        for i, var_name_aux in enumerate(var_name):
+            if i == 0:
+                data = nc.variables[var_name_aux][i_time:i_time + 12, :, :]
+            else:
+                data += nc.variables[var_name_aux][i_time:i_time + 12, :, :]
+    else:
+        data = nc.variables[var_name][i_time:i_time+12, :, :]
+    # else:
+    #     data = None
+    nc.close()
+
+    return data
+
+
+def get_global_attributes(file_name):
+    """
+    Gets the global attributes of the input file.
+
+    :param file_name: Path to the NetCDF file
+    :type file_name: str
+
+    :return: Global attributes
+    :rtype: dict
+    """
+    from netCDF4 import Dataset
+
+    nc = Dataset(file_name, mode='r')
+
+    atts_dict = {}
+    for name in nc.ncattrs():
+        atts_dict[name] = nc.getncattr(name)
+
+    nc.close()
+    return atts_dict
+
+
+def create_bounds(coordinates, number_vertices=2):
+    """
+    Calculate the vertices coordinates.
+
+    :param coordinates: Coordinates in degrees (latitude or longitude)
+    :type coordinates: numpy.array
+
+    :param number_vertices: Non mandatory parameter that informs the number of vertices that must have the boundaries.
+            (by default 2)
+    :type number_vertices: int
+
+    :return: Array with as many elements as vertices for each value of coords.
+    :rtype: numpy.array
+    """
+    interval = coordinates[1] - coordinates[0]
+
+    coords_left = coordinates - interval / 2
+    coords_right = coordinates + interval / 2
+    if number_vertices == 2:
+        bound_coords = np.dstack((coords_left, coords_right))
+    elif number_vertices == 4:
+        bound_coords = np.dstack((coords_left, coords_right, coords_right, coords_left))
+    else:
+        raise ValueError('The number of vertices of the boudaries must be 2 or 4')
+
+    return bound_coords
+
+
+def do_transformation(year):
+    """
+    Does the transformation for the selected year
+
+    :param year: Year to calculate
+    :type year: int
+    """
+    from datetime import datetime
+    from hermesv3_gr.tools.netcdf_tools import extract_vars, get_grid_area, write_netcdf
+    for pollutant, pollutant_info in POLLUTANT_INFO.items():
+        file_name = get_input_name(os.path.join(INPUT_PATH, INPUT_NAME), pollutant, year)
+        if os.path.exists(file_name):
+            c_lats, c_lons = extract_vars(file_name, ['lat', 'lon'])
+            lat_inc = -0.1  # c_lats['data'][1] - c_lats['data'][0]
+            c_lats['data'] += lat_inc / 2
+            lon_inc = 0.1  # c_lons['data'][1] - c_lons['data'][0]
+            c_lons['data'] += lon_inc / 2
+
+            b_lats = create_bounds(c_lats['data'])
+            b_lons = create_bounds(c_lons['data'])
+
+            # cell_area = get_grid_area(file_name)
+
+            global_attributes = get_global_attributes(file_name)
+
+            data = get_full_year_data(file_name, year, pollutant_info['input_var_name'])
+
+            pollutant_name = pollutant_info['output_var_name']
+
+            file_path = os.path.join(OUTPUT_PATH, 'monthly_mean', '{0}'.format(pollutant_name))
+            if not os.path.exists(file_path):
+                os.makedirs(file_path)
+
+            for month in range(1, 12 + 1, 1):
+                emission = {
+                    'name': pollutant_name,
+                    'units': 'kg.m-2.s-1',
+                    # 'data': data[month - 1, :, :].reshape((1,) + cell_area.shape)
+                    'data': data[month - 1, :, :].reshape((1, data.shape[-2], data.shape[-1]))
+
+                }
+                # write_netcdf(
+                #     os.path.join(file_path, '{0}_{1}{2}.nc'.format(pollutant_name, year, str(month).zfill(2))),
+                #     c_lats['data'], c_lons['data'], [emission], date=datetime(year=year, month=month, day=1),
+                #     boundary_latitudes=b_lats, boundary_longitudes=b_lons, cell_area=cell_area,
+                #     global_attributes=global_attributes)
+
+                write_netcdf(
+                    os.path.join(file_path, '{0}_{1}{2}.nc'.format(pollutant_name, year, str(month).zfill(2))),
+                    c_lats['data'], c_lons['data'], [emission], date=datetime(year=year, month=month, day=1),
+                    boundary_latitudes=b_lats, boundary_longitudes=b_lons, global_attributes=global_attributes)
+        else:
+            raise IOError('File not found {0}'.format(file_name))
+    return True
+
+
+if __name__ == '__main__':
+    for y in LIST_YEARS:
+        do_transformation(y)
\ No newline at end of file
-- 
GitLab


From 8c3412a7c5204d8ab2144f501618748c98becae9 Mon Sep 17 00:00:00 2001
From: ctena <carles.tena@bsc.es>
Date: Fri, 17 Dec 2021 09:11:05 +0100
Subject: [PATCH 05/16] Updating master

---
 CHANGELOG | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index e48d830..1417252 100755
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,7 @@
 2.1.2
-    2021/12/01
-    - Corrected error while processing GFAS emissions
+    2021/12/17
+    - Corrected error while processing GFAS emissions (#43)
+    - Added new CAMS GLOB SHIP v3.1 (#42)
 
 2.1.1
     2021/07/29
-- 
GitLab


From 2c16050b6687acd930b03f348159af3bfbfe6125 Mon Sep 17 00:00:00 2001
From: ctena <carles.tena@bsc.es>
Date: Tue, 21 Dec 2021 09:51:17 +0100
Subject: [PATCH 06/16] CAMS REG AP v5.1 preproc done

---
 CHANGELOG                          |   3 +-
 preproc/cams_reg_ap_v51_preproc.py | 182 +++++++++++++++++++++++++++++
 2 files changed, 184 insertions(+), 1 deletion(-)
 create mode 100755 preproc/cams_reg_ap_v51_preproc.py

diff --git a/CHANGELOG b/CHANGELOG
index 1417252..91f977c 100755
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,7 +1,8 @@
 2.1.2
     2021/12/17
     - Corrected error while processing GFAS emissions (#43)
-    - Added new CAMS GLOB SHIP v3.1 (#42)
+    - Added new CAMS-GLOB-SHIP_v3.1 preproc emission inventory (#42)
+    - Added new CAMS-REG-AP_v5.1_Refv22 preproc emission inventory (#45)
 
 2.1.1
     2021/07/29
diff --git a/preproc/cams_reg_ap_v51_preproc.py b/preproc/cams_reg_ap_v51_preproc.py
new file mode 100755
index 0000000..0754f3b
--- /dev/null
+++ b/preproc/cams_reg_ap_v51_preproc.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Earth Sciences Department, BSC-CNS
+#
+# This file is part of HERMESv3_GR.
+#
+# HERMESv3_GR is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# HERMESv3_GR is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with HERMESv3_GR. If not, see <http://www.gnu.org/licenses/>.
+
+
+import os
+import sys
+
+# ============== README ======================
+"""
+downloading website: contact to hugo.deniervandergon@tno.nl or jeroen.kuenen@tno.nl
+reference: https://www.atmos-chem-phys.net/14/10963/2014/
+Besides citing HERMESv3_GR, users must also acknowledge the use of the corresponding emission inventories in their works
+"""
+
+# ============== CONFIGURATION PARAMETERS ======================
+INPUT_PATH = '/esarchive/recon/ecmwf/cams_reg_apv51_ref22/original_files'
+OUTPUT_PATH = '/esarchive/recon/ecmwf/cams_reg_apv51_ref22/yearly_mean'
+INPUT_NAME = 'Ref2_v2_0_PM_components_<year>_DT.csv'
+FACTOR = 1. / (365. * 24. * 3600.)  # To pass from kg/year to Kg/s
+POLLUTANT_INFO = {'ec_dt': {'input_name': 'EC_fine', 'unit_factor': FACTOR},
+                  'oc_dt': {'input_name': 'OC_fine', 'unit_factor': FACTOR},
+                  'pm25_dt': {'input_name': ['EC_fine', 'OC_fine', 'SO4_fine', 'Na_fine', 'OthMin_fine'],
+                              'unit_factor': FACTOR},
+                  'so4_dt': {'input_name': 'SO4_fine', 'unit_factor': FACTOR},
+                  'pm10_dt': {'input_name': ['EC_coarse', 'OC_coarse', 'SO4_coarse', 'Na_coarse', 'OthMin_coarse'],
+                              'unit_factor': FACTOR}}
+# list_years = [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015]
+LIST_YEARS = [2018]
+# ==============================================================
+
+
+def calculate_grid_definition(in_path):
+    """
+    Calculate the latitude and longitude coordinates of the cell.
+
+    :param in_path: Path to the file that contains all the information.
+    :type in_path: str
+
+    :return: Latitudes array, Longitudes array, Latitude interval, Longitude interval.
+    :rtype: numpy.array, numpy.array, float, float
+    """
+    import pandas as pd
+    import numpy as np
+
+    dataframe = pd.read_csv(in_path, sep=',')
+    # Not needed to discriminate cause point sources are reported at the center grid cell
+    # dataframe = dataframe[dataframe.SourceType != 'P']
+
+    # Longitudes
+    lons = np.sort(np.unique(dataframe['Lon']))
+    lons_interval = lons[1:] - lons[:-1]
+    print('Lon min: {0}; Lon max: {1}; Lon inc: {2}; Lon num: {3}'.format(
+        dataframe['Lon'].min(), dataframe['Lon'].max(), lons_interval.min(), len(lons)))
+
+    # Latitudes
+    lats = np.sort(np.unique(dataframe['Lat']))
+    lats_interval = lats[1:] - lats[:-1]
+    print('Lat min: {0}; Lat max: {1}; Lat inc: {2}; Lat num: {3}'.format(
+        dataframe['Lat'].min(), dataframe['Lat'].max(), lats_interval.min(), len(lats)))
+
+    lats = np.arange(-90 + lats_interval.min() / 2, 90, lats_interval.min(), dtype=np.float64)
+    lons = np.arange(-180 + lons_interval.min() / 2, 180, lons_interval.min(), dtype=np.float64)
+
+    return lats, lons, lats_interval.min(), lons_interval.min()
+
+
+def create_pollutant_empty_info(gnfr_id, len_c_lats, len_c_lons):
+    """
+    Crate an empty pollutant list.
+
+    :param gnfr_id: ID of the
+    :type gnfr_id: str
+
+    :param len_c_lats: Number of elements on the latitude array
+    :type len_c_lats: int
+
+    :param len_c_lons: Number of elements on the longitude array
+    :type len_c_lons: int
+
+    :return: Pollutant info
+    :rtype: dict
+    """
+    import numpy as np
+
+    pollutant_info = {}
+    for pollutant_orig_name, pollutant_oirg_info in POLLUTANT_INFO.items():
+        pollutant_name = pollutant_orig_name.replace('<GNFR_Sector>', gnfr_id)
+        pollutant_info[pollutant_name] = pollutant_oirg_info.copy()
+        pollutant_info[pollutant_name]['name'] = pollutant_name
+        pollutant_info[pollutant_name]['units'] = 'kg.m-2.s-1'
+        # aux_dict['units'] = 'Mg.km-2.year-1'
+        pollutant_info[pollutant_name]['data'] = np.zeros((len_c_lats, len_c_lons))
+
+    return pollutant_info
+
+
+def do_transformation(year):
+    """
+    Make al the process to transform the emissions of the current year.
+
+    :param year: year to process.
+    :type year: int
+
+    :return: True when everything finish well.
+    :rtype: Bool
+    """
+    from hermesv3_gr.tools.netcdf_tools import write_netcdf, get_grid_area
+    from hermesv3_gr.tools.coordinates_tools import create_bounds
+    from datetime import datetime
+    import pandas as pd
+    import numpy as np
+
+    in_file = os.path.join(INPUT_PATH, INPUT_NAME.replace('<year>', str(year)))
+
+    c_lats, c_lons, lat_interval, lon_interval = calculate_grid_definition(in_file)
+
+    b_lats = create_bounds(c_lats, number_vertices=2)
+    b_lons = create_bounds(c_lons, number_vertices=2)
+
+    dataframe = pd.read_csv(in_file, sep=',')
+
+    dataframe.loc[:, 'row_lat'] = np.array((dataframe['Lat'] - (-90 + lat_interval / 2)) / lat_interval, dtype=np.int32)
+    dataframe.loc[:, 'col_lon'] = np.array((dataframe['Lon'] - (-180 + lon_interval / 2)) / lon_interval, dtype=np.int32)
+
+    for gnfr_id, gnfr_data in dataframe.groupby('GNFR_Sector'):
+        print('gnfr', gnfr_id)
+        pollutant_info = create_pollutant_empty_info(gnfr_id, len(c_lats), len(c_lons))
+        # Other mobile sources ignoring sea cells (shipping emissions)
+        #if gnfr_id == 8:
+        #    for sea in ['ATL', 'BAS', 'BLS', 'MED', 'NOS']:
+        #        gnfr_data = gnfr_data[gnfr_data.ISO3 != sea]
+
+        gnfr_data = gnfr_data.groupby(['row_lat', 'col_lon']).sum().reset_index()
+
+        for poll_name, poll_info in pollutant_info.items():
+            if isinstance(poll_info['input_name'], list):
+                for poll_in_name in poll_info['input_name']:
+                    poll_info['data'][gnfr_data.row_lat, gnfr_data.col_lon] += gnfr_data[poll_in_name]
+            else:
+                poll_info['data'][gnfr_data.row_lat, gnfr_data.col_lon] += gnfr_data[poll_info['input_name']]
+            poll_info['data'] = poll_info['data'].reshape((1,) + poll_info['data'].shape)
+
+            aux_output_path = os.path.join(OUTPUT_PATH, '{0}_gnfr_{1}'.format(poll_name, gnfr_id))
+            if not os.path.exists(aux_output_path):
+                os.makedirs(aux_output_path)
+            aux_output_path = os.path.join(aux_output_path, '{0}_{1}.nc'.format(poll_name, year))
+            write_netcdf(aux_output_path, c_lats, c_lons, [poll_info], date=datetime(year, month=1, day=1),
+                         boundary_latitudes=b_lats, boundary_longitudes=b_lons)
+            cell_area = get_grid_area(aux_output_path)
+
+            poll_info['data'] = poll_info['data'] * poll_info['unit_factor']/cell_area
+
+            write_netcdf(aux_output_path, c_lats, c_lons, [poll_info], date=datetime(year, month=1, day=1),
+                         boundary_latitudes=b_lats, boundary_longitudes=b_lons, cell_area=cell_area,
+                         global_attributes={
+                             'comment': 'Re-writing done by Carles Tena (carles.tena@bsc.es) from the BSC-CNS ' +
+                                        '(Barcelona Supercomputing Center)'})
+    return True
+
+
+if __name__ == '__main__':
+    for y in LIST_YEARS:
+        do_transformation(y)
+
+
+
-- 
GitLab


From b67be5f73fb89a80e5308626d06e313884976f18 Mon Sep 17 00:00:00 2001
From: ctena <carles.tena@bsc.es>
Date: Thu, 23 Dec 2021 09:10:22 +0100
Subject: [PATCH 07/16] GFAS Hourly working in the ftp folder

---
 .../emision_inventories/emission_inventory.py |  4 +--
 .../gfas_emission_inventory.py                |  2 +-
 .../point_gfas_hourly_emission_inventory.py   | 28 +++++++++++++------
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/hermesv3_gr/modules/emision_inventories/emission_inventory.py b/hermesv3_gr/modules/emision_inventories/emission_inventory.py
index 0eb2d62..987ef7a 100755
--- a/hermesv3_gr/modules/emision_inventories/emission_inventory.py
+++ b/hermesv3_gr/modules/emision_inventories/emission_inventory.py
@@ -389,7 +389,7 @@ class EmissionInventory(object):
                                 reference_year=emission_inventory.ref_year, factors=emission_inventory.factor_mask,
                                 regrid_mask=emission_inventory.regrid_mask, p_vertical=emission_inventory.p_vertical,
                                 p_month=p_month, p_week=p_week, p_day=p_day, p_hour=p_hour,
-                                p_speciation=emission_inventory.p_speciation))
+                                p_speciation=emission_inventory.p_speciation, countries_shapefile=countries_shapefile))
                     elif emission_inventory.frequency == 'hourly':
                         emission_inventory_list.append(
                             PointGfasHourlyEmissionInventory(
@@ -398,7 +398,7 @@ class EmissionInventory(object):
                                 emission_inventory.frequency, vertical_levels,
                                 reference_year=emission_inventory.ref_year, factors=emission_inventory.factor_mask,
                                 regrid_mask=emission_inventory.regrid_mask, p_vertical=emission_inventory.p_vertical,
-                                p_speciation=emission_inventory.p_speciation))
+                                p_speciation=emission_inventory.p_speciation, countries_shapefile=countries_shapefile))
                     else:
                         raise NotImplementedError("ERROR: {0} frequency are not implemented, use 'daily' or 'hourly.")
                 else:
diff --git a/hermesv3_gr/modules/emision_inventories/gfas_emission_inventory.py b/hermesv3_gr/modules/emision_inventories/gfas_emission_inventory.py
index fe2f35d..01a4e55 100755
--- a/hermesv3_gr/modules/emision_inventories/gfas_emission_inventory.py
+++ b/hermesv3_gr/modules/emision_inventories/gfas_emission_inventory.py
@@ -71,7 +71,7 @@ class GfasEmissionInventory(EmissionInventory):
     def __init__(self, comm, options, grid, current_date, inventory_name, source_type, sector, pollutants, inputs_path,
                  frequency, vertical_output_profile,
                  reference_year=2010, coverage='global', factors=None, regrid_mask=None, p_vertical=None, p_month=None,
-                 p_week=None, p_day=None, p_hour=None, p_speciation=None):
+                 p_week=None, p_day=None, p_hour=None, p_speciation=None, countries_shapefile=None):
         from hermesv3_gr.modules.vertical.vertical_gfas import GfasVerticalDistribution
 
         st_time = timeit.default_timer()
diff --git a/hermesv3_gr/modules/emision_inventories/point_gfas_hourly_emission_inventory.py b/hermesv3_gr/modules/emision_inventories/point_gfas_hourly_emission_inventory.py
index 124d1c7..639f09a 100755
--- a/hermesv3_gr/modules/emision_inventories/point_gfas_hourly_emission_inventory.py
+++ b/hermesv3_gr/modules/emision_inventories/point_gfas_hourly_emission_inventory.py
@@ -70,16 +70,16 @@ class PointGfasHourlyEmissionInventory(EmissionInventory):
     """
 
     def __init__(self, comm, options, grid, current_date, inventory_name, source_type, sector, pollutants, inputs_path,
-                 frequency, vertical_output_profile,
-                 reference_year=2010, factors=None, regrid_mask=None, p_vertical=None, p_speciation=None):
+                 frequency, vertical_output_profile, reference_year=2010, factors=None, regrid_mask=None,
+                 p_vertical=None, p_speciation=None, countries_shapefile=None):
 
         st_time = timeit.default_timer()
         settings.write_log('\t\tCreating GFAS emission inventory as point source.', level=3)
         super(PointGfasHourlyEmissionInventory, self).__init__(
             comm, options, grid, current_date, inventory_name, source_type, sector, pollutants, inputs_path, frequency,
             vertical_output_profile,
-            reference_year=reference_year, factors=factors, regrid_mask=regrid_mask, p_vertical=None,
-            p_month=None, p_week=None, p_day=None, p_hour=None, p_speciation=p_speciation)
+            reference_year=reference_year, factors=factors, regrid_mask=regrid_mask, p_vertical=None, p_month=None,
+            p_week=None, p_day=None, p_hour=None, p_speciation=p_speciation, countries_shapefile=countries_shapefile)
 
         self.method = self.get_method(p_vertical)
         self.altitude_name = self.get_altitude_name()
@@ -151,9 +151,22 @@ class PointGfasHourlyEmissionInventory(EmissionInventory):
                     settings.write_log('\t\tCreating GFAS coordinates shapefile. It may take a few minutes.', level=3)
                     src_area = get_grid_area(self.get_input_path(date=self.date_array[0]))
                     netcdf = Dataset(self.get_input_path(date=self.date_array[0]), mode='r')
-
-                    lats = netcdf.variables['latitude'][:]
-                    lons = netcdf.variables['longitude'][:]
+                    try:
+                        lats = netcdf.variables['latitude'][:]
+                    except KeyError:
+                        try:
+                            lats = netcdf.variables['lat'][:]
+                        except KeyError:
+                            raise KeyError("Neither 'lat' or 'latitude' are present on the {0} file".format(
+                                self.get_input_path(date=self.date_array[0])))
+                    try:
+                        lons = netcdf.variables['longitude'][:]
+                    except KeyError:
+                        try:
+                            lons = netcdf.variables['lon'][:]
+                        except KeyError:
+                            raise KeyError("Neither 'lon' or 'longitude' are present on the {0} file".format(
+                                self.get_input_path(date=self.date_array[0])))
                     netcdf.close()
 
                     lons[lons > 180] = lons[lons > 180] - 360.0
@@ -178,7 +191,6 @@ class PointGfasHourlyEmissionInventory(EmissionInventory):
                     #coordinates.to_file(coordinates_shapefile)
                     settings.write_log('\t\t\tDone!', level=3)
 
-
                     grid = self.grid.to_shapefile(full_grid=True).to_crs(coordinates.crs)
 
                     temp_coords = Dataset(os.path.join(self.grid.temporal_path, 'temporal_coords.nc'), mode='r')
-- 
GitLab


From 60bc33c91641fe9812475e8e2768750413765d2d Mon Sep 17 00:00:00 2001
From: ctena <carles.tena@bsc.es>
Date: Thu, 23 Dec 2021 09:18:17 +0100
Subject: [PATCH 08/16] CAMS Reg AP v51 Ref22 preproc corrected unit factor and
 pm10 sum

---
 preproc/cams_reg_ap_v51_preproc.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/preproc/cams_reg_ap_v51_preproc.py b/preproc/cams_reg_ap_v51_preproc.py
index 0754f3b..73364ba 100755
--- a/preproc/cams_reg_ap_v51_preproc.py
+++ b/preproc/cams_reg_ap_v51_preproc.py
@@ -32,13 +32,14 @@ Besides citing HERMESv3_GR, users must also acknowledge the use of the correspon
 INPUT_PATH = '/esarchive/recon/ecmwf/cams_reg_apv51_ref22/original_files'
 OUTPUT_PATH = '/esarchive/recon/ecmwf/cams_reg_apv51_ref22/yearly_mean'
 INPUT_NAME = 'Ref2_v2_0_PM_components_<year>_DT.csv'
-FACTOR = 1. / (365. * 24. * 3600.)  # To pass from kg/year to Kg/s
+FACTOR = 1. / (365. * 24. * 3600. * 1000.)  # To pass from g/year to Kg/s
 POLLUTANT_INFO = {'ec_dt': {'input_name': 'EC_fine', 'unit_factor': FACTOR},
                   'oc_dt': {'input_name': 'OC_fine', 'unit_factor': FACTOR},
                   'pm25_dt': {'input_name': ['EC_fine', 'OC_fine', 'SO4_fine', 'Na_fine', 'OthMin_fine'],
                               'unit_factor': FACTOR},
                   'so4_dt': {'input_name': 'SO4_fine', 'unit_factor': FACTOR},
-                  'pm10_dt': {'input_name': ['EC_coarse', 'OC_coarse', 'SO4_coarse', 'Na_coarse', 'OthMin_coarse'],
+                  'pm10_dt': {'input_name': ['EC_fine', 'EC_coarse', 'OC_fine', 'OC_coarse', 'SO4_fine', 'SO4_coarse',
+                                             'Na_fine', 'Na_coarse', 'OthMin_fine', 'OthMin_coarse'],
                               'unit_factor': FACTOR}}
 # list_years = [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015]
 LIST_YEARS = [2018]
-- 
GitLab


From e7f17d4cabadf65ef50ae4f9a0455ecc2cad4017 Mon Sep 17 00:00:00 2001
From: ctena <carles.tena@bsc.es>
Date: Thu, 23 Dec 2021 09:35:08 +0100
Subject: [PATCH 09/16] CAMS Reg AP v51 Ref22 preproc script name

---
 ...ams_reg_ap_v51_preproc.py => cams_reg_ap_v51_ref22_preproc.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename preproc/{cams_reg_ap_v51_preproc.py => cams_reg_ap_v51_ref22_preproc.py} (100%)

diff --git a/preproc/cams_reg_ap_v51_preproc.py b/preproc/cams_reg_ap_v51_ref22_preproc.py
similarity index 100%
rename from preproc/cams_reg_ap_v51_preproc.py
rename to preproc/cams_reg_ap_v51_ref22_preproc.py
-- 
GitLab


From f5b14da1727fbd5d8a1a9ac25701ca16096ba1da Mon Sep 17 00:00:00 2001
From: ctena <carles.tena@bsc.es>
Date: Thu, 23 Dec 2021 11:47:51 +0100
Subject: [PATCH 10/16] CAMS GLOB OCEAN v3.1 preproc done

---
 CHANGELOG                              |   1 +
 preproc/cams_glob_ocean_v31_preproc.py | 215 +++++++++++++++++++++++++
 2 files changed, 216 insertions(+)
 create mode 100755 preproc/cams_glob_ocean_v31_preproc.py

diff --git a/CHANGELOG b/CHANGELOG
index 91f977c..0b17f19 100755
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -3,6 +3,7 @@
     - Corrected error while processing GFAS emissions (#43)
     - Added new CAMS-GLOB-SHIP_v3.1 preproc emission inventory (#42)
     - Added new CAMS-REG-AP_v5.1_Refv22 preproc emission inventory (#45)
+    - Added new CAMS-GLOB-OCEAN_v3.1 preproc emission inventory (#48)
 
 2.1.1
     2021/07/29
diff --git a/preproc/cams_glob_ocean_v31_preproc.py b/preproc/cams_glob_ocean_v31_preproc.py
new file mode 100755
index 0000000..8394e61
--- /dev/null
+++ b/preproc/cams_glob_ocean_v31_preproc.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Earth Sciences Department, BSC-CNS
+#
+# This file is part of HERMESv3_GR.
+#
+# HERMESv3_GR is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# HERMESv3_GR is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with HERMESv3_GR. If not, see <http://www.gnu.org/licenses/>.
+
+
+import os
+import sys
+import numpy as np
+
+# ============== README ======================
+"""
+downloading website: 
+reference: 
+Besides citing HERMESv3_GR, users must also acknowledge the use of the corresponding emission inventories in their works
+"""
+
+# ============== CONFIGURATION PARAMETERS ======================
+INPUT_PATH = '/esarchive/recon/ecmwf/cams_glob_oceanv31/original_files'
+OUTPUT_PATH = '/esarchive/recon/ecmwf/cams_glob_oceanv31'
+LIST_POLLUTANTS = ['DMS']
+# LIST_POLLUTANTS = ['VOC_SUM']
+# LIST_YEARS = from 2000 to 2020
+LIST_YEARS = [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
+              2017, 2018, 2019, 2020]
+INPUT_NAME = 'CAMS-GLOB-OCE_Glb_0.5x0.5_oce_<pollutant>_v3.1_monthly_<year>.nc'
+# ==============================================================
+
+
+def get_input_name(pollutant, year):
+    """
+    Gets the path for the input file name
+
+    :param pollutant: Name of the pollutant
+    :type pollutant: str
+
+    :return: Path to the input file
+    :rtype: str
+    """
+
+    if pollutant in LIST_POLLUTANTS:
+        file_name = INPUT_NAME.replace('<pollutant>', pollutant)
+    else:
+        raise ValueError('Pollutant {0} not in pollutant list'.format(pollutant))
+    file_name = file_name.replace('<year>', str(year))
+    return os.path.join(INPUT_PATH, file_name)
+
+
+def get_full_year_data(file_name, year):
+    """
+    Gets the needed date in the input format.
+
+    :param file_name: path to the input file.
+    :type file_name: str
+
+    :param pollutant: Name of the pollutant.
+    :type pollutant: str
+
+    :param sector: Name of the sector.
+    :type sector: str
+
+    :param year: Year to calculate.
+    :type year: int
+
+    :return: Data of the selected emission.
+    :rtype: numpy.array
+    """
+    from netCDF4 import Dataset
+    from datetime import datetime
+    import cf_units
+    import numpy as np
+
+    nc = Dataset(file_name, mode='r')
+
+    time = nc.variables['time']
+
+    time_array = cf_units.num2date(time[:], time.units, time.calendar)
+    time_array = np.array([datetime(year=x.year, month=x.month, day=1) for x in time_array])
+
+    i_time = np.where(time_array == datetime(year=year, month=1, day=1))[0][0]
+
+    # if sector in LIST_SECTORS:
+    data = nc.variables['emi_oceans'][i_time:i_time+12, :, :]
+    # else:
+    #     data = None
+    nc.close()
+
+    return data
+
+
+def get_global_attributes(file_name):
+    """
+    Gets the global attributes of the input file.
+
+    :param file_name: Path to the NetCDF file
+    :type file_name: str
+
+    :return: Global attributes
+    :rtype: dict
+    """
+    from netCDF4 import Dataset
+
+    nc = Dataset(file_name, mode='r')
+
+    atts_dict = {}
+    for name in nc.ncattrs():
+        atts_dict[name] = nc.getncattr(name)
+
+    nc.close()
+    return atts_dict
+
+
+def create_bounds(coordinates, number_vertices=2):
+    """
+    Calculate the vertices coordinates.
+
+    :param coordinates: Coordinates in degrees (latitude or longitude)
+    :type coordinates: numpy.array
+
+    :param number_vertices: Non mandatory parameter that informs the number of vertices that must have the boundaries.
+            (by default 2)
+    :type number_vertices: int
+
+    :return: Array with as many elements as vertices for each value of coords.
+    :rtype: numpy.array
+    """
+    interval = coordinates[1] - coordinates[0]
+
+    coords_left = coordinates - interval / 2
+    coords_right = coordinates + interval / 2
+    if number_vertices == 2:
+        bound_coords = np.dstack((coords_left, coords_right))
+    elif number_vertices == 4:
+        bound_coords = np.dstack((coords_left, coords_right, coords_right, coords_left))
+    else:
+        raise ValueError('The number of vertices of the boudaries must be 2 or 4')
+
+    return bound_coords
+
+
+def do_transformation(year):
+    """
+    Does the transformation for the selected year
+
+    :param year: Year to calculate
+    :type year: int
+    """
+    from datetime import datetime
+    from hermesv3_gr.tools.netcdf_tools import extract_vars, get_grid_area, write_netcdf
+    for pollutant in LIST_POLLUTANTS:
+        file_name = get_input_name(pollutant, year)
+        if os.path.exists(file_name):
+            # c_lats, c_lons, b_lats, b_lons = extract_vars(file_name, ['lat', 'lon', 'lat_bnds', 'lon_bnds'])
+            c_lats, c_lons = extract_vars(file_name, ['lat', 'lon'])
+
+            b_lats = create_bounds(c_lats['data'])
+            b_lons = create_bounds(c_lons['data'])
+
+            cell_area = get_grid_area(file_name)
+
+            global_attributes = get_global_attributes(file_name)
+
+            data = get_full_year_data(file_name, year)
+
+            if pollutant == 'DMS':
+                pollutant_name = 'c2h6s'
+
+            else:
+                pollutant_name = pollutant.lower()
+
+            file_path = os.path.join(OUTPUT_PATH, 'monthly_mean', '{0}'.format(pollutant_name))
+            if not os.path.exists(file_path):
+                os.makedirs(file_path)
+
+            for month in range(1, 12 + 1, 1):
+                emission = {
+                    'name': pollutant_name,
+                    'units': 'kg.m-2.s-1',
+                    'data': data[month - 1, :, :].reshape((1,) + cell_area.shape)
+                    # 'data': data[month - 1, :, :].reshape((1, data.shape[-2], data.shape[-1]))
+
+                }
+                write_netcdf(
+                    os.path.join(file_path, '{0}_{1}{2}.nc'.format(pollutant_name, year, str(month).zfill(2))),
+                    c_lats['data'], c_lons['data'], [emission], date=datetime(year=year, month=month, day=1),
+                    boundary_latitudes=b_lats, boundary_longitudes=b_lons, cell_area=cell_area,
+                    global_attributes=global_attributes)
+
+                # write_netcdf(
+                #     os.path.join(file_path, '{0}_{1}{2}.nc'.format(pollutant_name, year, str(month).zfill(2))),
+                #     c_lats['data'], c_lons['data'], [emission], date=datetime(year=year, month=month, day=1),
+                #     boundary_latitudes=b_lats, boundary_longitudes=b_lons, global_attributes=global_attributes)
+        else:
+            raise IOError('File not found {0}'.format(file_name))
+    return True
+
+
+
+if __name__ == '__main__':
+    for y in LIST_YEARS:
+        do_transformation(y)
\ No newline at end of file
-- 
GitLab


From 4be4dbb5f6b3e96553c0ea67d3f3268918965938 Mon Sep 17 00:00:00 2001
From: ctena <carles.tena@bsc.es>
Date: Wed, 2 Mar 2022 16:42:36 +0100
Subject: [PATCH 11/16] nei2017gb17j_preproc.py ready to test

---
 preproc/epa/nei2017gb17j_pollutant_info.csv |  28 ++
 preproc/epa/nei2017gb17j_preproc.py         | 310 ++++++++++++++++++++
 2 files changed, 338 insertions(+)
 create mode 100644 preproc/epa/nei2017gb17j_pollutant_info.csv
 create mode 100755 preproc/epa/nei2017gb17j_preproc.py

diff --git a/preproc/epa/nei2017gb17j_pollutant_info.csv b/preproc/epa/nei2017gb17j_pollutant_info.csv
new file mode 100644
index 0000000..e8dc640
--- /dev/null
+++ b/preproc/epa/nei2017gb17j_pollutant_info.csv
@@ -0,0 +1,28 @@
+Model Specie,Definition,Molecular Weight,Explicit or Lumped
+AACD,acetic acid,60.0,E
+ACET,acetone,58.1,E
+ALD2,acetaldehyde,44.0,E
+ALD2_PRIMARY,acetaldehyde from emissions only,44,E
+ALDX,aldehydes with 3 or more carbons,58.1,L
+APIN,alpha pinene,136.2,E
+BENZ,benzene,78.1,E
+NH3,ammonia,17.0,E
+NH3_FERT,ammonia fertilizers,17.0,E
+MEOH,methanol,32,E
+ETH,ethene,28,E
+ETHA,ethane,30.1,E
+ETHY,ethyne (acetylene),26,E
+ETOH,ethanol,46.1,E
+FACD,formic acid,46,E
+FORM,formaldehyde,30,E
+IOLE,internal alkene bond,56.1,L
+ISOP,isoprene,68.1,E
+KET,carbon-ketone bond,72.1,L
+OLE,terminal alkene bond,42.1,L
+PAR,carbon-carbon single bond,72.1,L
+SOAALK,tracer for alkanes that can form secondary organic aerosol,112,L
+TERP,monoterpenes,136.2,L
+TOL,toluene and other monoalkyl aromatics,92.1,L
+FORM_PRIMARY,formaldehyde from emissions only,30,E
+NAPH,naphthalene,128.2,E
+XYLMN,xylene and other polyalkyl aromatics except naphthalene,106.2,L
\ No newline at end of file
diff --git a/preproc/epa/nei2017gb17j_preproc.py b/preproc/epa/nei2017gb17j_preproc.py
new file mode 100755
index 0000000..f4354f8
--- /dev/null
+++ b/preproc/epa/nei2017gb17j_preproc.py
@@ -0,0 +1,310 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Earth Sciences Department, BSC-CNS
+#
+# This file is part of HERMESv3_GR.
+#
+# HERMESv3_GR is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# HERMESv3_GR is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with HERMESv3_GR. If not, see <http://www.gnu.org/licenses/>.
+
+
+import os
+import sys
+import numpy as np
+import pandas as pd
+import timeit
+from datetime import datetime, timedelta
+import xarray as xr
+
+# ============== README ======================
+"""
+downloading website: 
+reference: 
+Besides citing HERMESv3_GR, users must also acknowledge the use of the corresponding emission inventories in their works
+"""
+
+# ============== CONFIGURATION PARAMETERS ======================
+# The original emissions are downloaded from the following link:
+# https://dataverse.unc.edu/dataset.xhtml?persistentId=doi:10.15139/S3/TCR6BB
+INPUT_PATH = '/esarchive/recon/epa/nei2017gb17j/original_files/<sector>/emis_mole_<sector>_<YYYYMMDD>_12US1_cmaq_cb6ae7_2017gb_17j.ncf'
+OUTPUT_PATH = '/esarchive/recon/epa/nei2017gb17j/daily_mean/<pollutant>_<sector>/<pollutant>_<YYYYMMDD>.nc'
+SECTOR_LIST = ['ag']
+POLLUTANT_INFO_PATH = "/Users/ctena/PycharmProjects/hermesv3_gr/preproc/epa/nei2017gb17j_pollutant_info.csv"
+# GRID_NETCDF created with HERMESv3_GR with taht configuration:
+# if domain_type == lcc:
+#     lat_1 = 33.0
+#     lat_2 = 45.0
+#     lon_0 = -97.0
+#     lat_0 = 40.0
+#     nx = 459
+#     ny = 299
+#     inc_x = 12000
+#     inc_y = 12000
+#     x_0 = -2556000.0
+#     y_0 = -1728000.0
+
+GRID_NETCDF_PATH = "/scratch/Earth/ctena/HERMESv3_GR_aux/coverage/USA_lcc_12000.0_12000.0/temporal_coords.nc"
+
+START_DAY = datetime(year=2017, month=1, day=1)
+# END_DAY = datetime(year=2017, month=12, day=31)
+END_DAY = datetime(year=2017, month=1, day=1)
+
+XARRAY = True
+# ==============================================================
+
+
+def write_netcdf(output_name_path, data_list, center_lats, center_lons, grid_cell_area, date):
+    print(output_name_path)
+    # Creating NetCDF & Dimensions
+    nc_output = Dataset(output_name_path, mode='w', format="NETCDF4")
+    nc_output.createDimension('nv', 2)
+    nc_output.createDimension('lon', center_lons.shape[0])
+    nc_output.createDimension('lat', center_lats.shape[0])
+    nc_output.createDimension('time', None)
+
+    # TIME
+    time = nc_output.createVariable('time', 'd', ('time',), zlib=True)
+    # time.units = "{0} since {1}".format(tstep_units, global_atts['Start_DateTime'].strftime('%Y-%m-%d %H:%M:%S'))
+    time.units = "hours since {0}".format(date.strftime('%Y-%m-%d %H:%M:%S'))
+    time.standard_name = "time"
+    time.calendar = "gregorian"
+    time.long_name = "time"
+    time[:] = [0]
+
+    # LATITUDE
+    lat = nc_output.createVariable('lat', 'f', ('lat',), zlib=True)
+    lat.bounds = "lat_bnds"
+    lat.units = "degrees_north"
+    lat.axis = "Y"
+    lat.long_name = "latitude"
+    lat.standard_name = "latitude"
+    lat[:] = center_lats
+
+    lat_bnds = nc_output.createVariable('lat_bnds', 'f', ('lat', 'nv',), zlib=True)
+    lat_bnds[:] = create_bounds(center_lats)
+
+    # LONGITUDE
+    lon = nc_output.createVariable('lon', 'f', ('lon',), zlib=True)
+    lon.bounds = "lon_bnds"
+    lon.units = "degrees_east"
+    lon.axis = "X"
+    lon.long_name = "longitude"
+    lon.standard_name = "longitude"
+    lon[:] = center_lons
+
+    lon_bnds = nc_output.createVariable('lon_bnds', 'f', ('lon', 'nv',), zlib=True)
+    lon_bnds[:] = create_bounds(center_lons)
+
+    for var in data_list:
+        # VARIABLE
+        nc_var = nc_output.createVariable(var['name'], 'f', ('time', 'lat', 'lon',), zlib=True)
+        nc_var.units = var['units'].symbol
+        nc_var.long_name = var['long_name']
+        nc_var.coordinates = 'lat lon'
+        nc_var.grid_mapping = 'crs'
+        nc_var.cell_measures = 'area: cell_area'
+        nc_var[:] = var['data']
+
+    # CELL AREA
+    cell_area = nc_output.createVariable('cell_area', 'f', ('lat', 'lon',))
+    cell_area.long_name = "area of the grid cell"
+    cell_area.standard_name = "area"
+    cell_area.units = "m2"
+    cell_area[:] = grid_cell_area
+
+    # CRS
+    crs = nc_output.createVariable('crs', 'i')
+    crs.grid_mapping_name = "latitude_longitude"
+    crs.semi_major_axis = 6371000.0
+    crs.inverse_flattening = 0
+
+    nc_output.setncattr('title', 'ECLIPSEv5a inventory')
+    nc_output.setncattr('Conventions', 'CF-1.6', )
+    nc_output.setncattr('institution', 'IIASA', )
+    nc_output.setncattr('source', 'IIASA', )
+    nc_output.setncattr('history', 'Re-writing of the ECLIPSEv5a input to follow the CF-1.6 conventions;\n' +
+                        '2017-11-28: Creating;\n')
+    nc_output.setncattr('web', 'http://www.iiasa.ac.at/web/home/research/researchPrograms/air/ECLIPSEv5a.html')
+    nc_output.setncattr('comment', 'Re-writing done by Carles Tena (carles.tena@bsc.es) from the BSC-CNS ' +
+                        '(Barcelona Supercomputing Center)', )
+
+    nc_output.close()
+
+
+def do_transformation(year):
+    """
+    Does the transformation for the selected year
+
+    :param year: Year to calculate
+    :type year: int
+    """
+    from datetime import datetime
+    from hermesv3_gr.tools.netcdf_tools import extract_vars, get_grid_area, write_netcdf
+    for pollutant in LIST_POLLUTANTS:
+        file_name = get_input_name(pollutant)
+        if os.path.exists(file_name):
+            # c_lats, c_lons, b_lats, b_lons = extract_vars(file_name, ['lat', 'lon', 'lat_bnds', 'lon_bnds'])
+            c_lats, c_lons = extract_vars(file_name, ['lat', 'lon'])
+
+            b_lats = create_bounds(c_lats['data'])
+            b_lons = create_bounds(c_lons['data'])
+
+            cell_area = get_grid_area(file_name)
+
+            global_attributes = get_global_attributes(file_name)
+            for sector in LIST_SECTORS:
+                data = get_full_year_data(file_name, sector, year)
+
+                if pollutant == 'nox':
+                    pollutant_name = 'nox_no'
+                elif pollutant == 'voc1':
+                    pollutant_name = 'voc01'
+                elif pollutant == 'voc2':
+                    pollutant_name = 'voc02'
+                elif pollutant == 'voc3':
+                    pollutant_name = 'voc03'
+                elif pollutant == 'voc4':
+                    pollutant_name = 'voc04'
+                elif pollutant == 'voc5':
+                    pollutant_name = 'voc05'
+                elif pollutant == 'voc6':
+                    pollutant_name = 'voc06'
+                elif pollutant == 'voc7':
+                    pollutant_name = 'voc07'
+                elif pollutant == 'voc8':
+                    pollutant_name = 'voc08'
+                elif pollutant == 'voc9':
+                    pollutant_name = 'voc09'
+                else:
+                    pollutant_name = pollutant
+
+                file_path = os.path.join(OUTPUT_PATH, 'monthly_mean', '{0}_{1}'.format(pollutant_name, sector))
+                if not os.path.exists(file_path):
+                    os.makedirs(file_path)
+
+                for month in range(1, 12 + 1, 1):
+                    emission = {
+                        'name': pollutant_name,
+                        'units': 'kg.m-2.s-1',
+                        'data': data[month - 1, :, :].reshape((1,) + cell_area.shape)
+                    }
+                    write_netcdf(
+                        os.path.join(file_path, '{0}_{1}{2}.nc'.format(pollutant_name, year, str(month).zfill(2))),
+                        c_lats['data'], c_lons['data'], [emission], date=datetime(year=year, month=month, day=1),
+                        boundary_latitudes=b_lats, boundary_longitudes=b_lons, cell_area=cell_area,
+                        global_attributes=global_attributes)
+        else:
+            raise IOError('File not found {0}'.format(file_name))
+    return True
+
+
+def get_pollutant_info(csv_path):
+    """
+    Obtain the pollutant info
+
+    :param csv_path: Path to the CSV that contains all the pollutant info
+    :type csv_path: str
+
+    :return: Pollutant info table
+    :rtype: pd.DataFrame
+    """
+    polluntant_info = pd.read_csv(csv_path)
+    return polluntant_info
+
+
+def parse_path(file_path, date_aux=None, sector=None, pollutant=None):
+    """
+    :param file_path: Path to parse
+    :type file_path: str
+
+    :param date_aux: Date to parse
+    :type date_aux: datetime, None
+
+    :param sector: Sector name
+    :type sector: str, None
+
+    :param pollutant: Pollutant name
+    :type pollutant: str, None
+
+    :return: Complete path
+    :rtype: str
+    """
+    if date_aux is not None:
+        file_path = file_path.replace('<YYYYMMDD>', date_aux.strftime("%Y%m%d"))
+    if pollutant is not None:
+        file_path = file_path.replace('<pollutant>', pollutant)
+    if sector is not None:
+        file_path = file_path.replace('<sector>', sector)
+    return file_path
+
+
+def make_dir(file_path):
+    parent_dir = os.path.dirname(file_path)
+    if not os.path.exists(parent_dir):
+        os.makedirs(parent_dir)
+    return True
+
+
+def main_xarray():
+    pol_info_df = get_pollutant_info(POLLUTANT_INFO_PATH)
+
+    grid_nc = xr.open_dataset(GRID_NETCDF_PATH, decode_cf=True, decode_times=False, decode_coords='all')
+    del grid_nc['var_aux']
+
+    day_aux = START_DAY
+    for sector in SECTOR_LIST:
+        while day_aux <= END_DAY:
+            in_nc = xr.open_dataset(parse_path(INPUT_PATH, date_aux=day_aux, sector=sector))
+            for i, poll_info in pol_info_df.iterrows():
+                out_nc = grid_nc.copy()
+                poll_name = poll_info['Model Specie'].lower()
+                out_nc[poll_name] = (('time', 'lat', 'lon'),
+                                     (in_nc[poll_info['Model Specie']][:24].mean(dim='TSTEP').data * poll_info[
+                                         'Molecular Weight']) / grid_nc['cell_area'].data)
+                out_nc[poll_name].attrs['units'] = 'kg.s-1.m-2'
+                out_nc[poll_name].attrs['long_name'] = poll_info['Definition']
+                out_nc['time'] = [day_aux]
+
+                out_nc.attrs['comment'] = "Re-writing done by Carles Tena (carles.tena@bsc.es) from the BSC-CNS " + \
+                                          "(Barcelona Supercomputing Center)"
+
+                out_path = parse_path(OUTPUT_PATH, date_aux=day_aux, sector=sector, pollutant=poll_name)
+                make_dir(out_path)
+                out_nc.to_netcdf(out_path)
+
+            day_aux += timedelta(days=1)
+
+    return True
+
+
+def main():
+    day_aux = START_DAY
+
+    while day_aux <= END_DAY:
+        in_path = parse_path(INPUT_PATH, day_aux)
+
+        day_aux += timedelta(days=1)
+
+    return True
+
+
+if __name__ == '__main__':
+    initime = timeit.default_timer()
+
+    if XARRAY:
+        main_xarray()
+    else:
+        main()
+
+    print("\t{0:.3f}s".format(timeit.default_timer() - initime))
+    sys.exit(0)
-- 
GitLab


From 21ee39b031df1845f16d287e50a1ab4a54158b1c Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Tue, 8 Mar 2022 12:09:45 +0100
Subject: [PATCH 12/16] Bug fix on the EPA NEI2017 preproc unit change

---
 preproc/epa/nei2017gb17j_preproc.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/preproc/epa/nei2017gb17j_preproc.py b/preproc/epa/nei2017gb17j_preproc.py
index f4354f8..c8e8b06 100755
--- a/preproc/epa/nei2017gb17j_preproc.py
+++ b/preproc/epa/nei2017gb17j_preproc.py
@@ -39,7 +39,7 @@ Besides citing HERMESv3_GR, users must also acknowledge the use of the correspon
 INPUT_PATH = '/esarchive/recon/epa/nei2017gb17j/original_files/<sector>/emis_mole_<sector>_<YYYYMMDD>_12US1_cmaq_cb6ae7_2017gb_17j.ncf'
 OUTPUT_PATH = '/esarchive/recon/epa/nei2017gb17j/daily_mean/<pollutant>_<sector>/<pollutant>_<YYYYMMDD>.nc'
 SECTOR_LIST = ['ag']
-POLLUTANT_INFO_PATH = "/Users/ctena/PycharmProjects/hermesv3_gr/preproc/epa/nei2017gb17j_pollutant_info.csv"
+POLLUTANT_INFO_PATH = "/home/Earth/ctena/Models/hermesv3_gr/preproc/epa/nei2017gb17j_pollutant_info.csv"
 # GRID_NETCDF created with HERMESv3_GR with taht configuration:
 # if domain_type == lcc:
 #     lat_1 = 33.0
@@ -56,8 +56,8 @@ POLLUTANT_INFO_PATH = "/Users/ctena/PycharmProjects/hermesv3_gr/preproc/epa/nei2
 GRID_NETCDF_PATH = "/scratch/Earth/ctena/HERMESv3_GR_aux/coverage/USA_lcc_12000.0_12000.0/temporal_coords.nc"
 
 START_DAY = datetime(year=2017, month=1, day=1)
-# END_DAY = datetime(year=2017, month=12, day=31)
-END_DAY = datetime(year=2017, month=1, day=1)
+END_DAY = datetime(year=2017, month=12, day=31)
+# END_DAY = datetime(year=2017, month=1, day=1)
 
 XARRAY = True
 # ==============================================================
@@ -264,13 +264,14 @@ def main_xarray():
     day_aux = START_DAY
     for sector in SECTOR_LIST:
         while day_aux <= END_DAY:
+            print(day_aux)
             in_nc = xr.open_dataset(parse_path(INPUT_PATH, date_aux=day_aux, sector=sector))
             for i, poll_info in pol_info_df.iterrows():
                 out_nc = grid_nc.copy()
                 poll_name = poll_info['Model Specie'].lower()
                 out_nc[poll_name] = (('time', 'lat', 'lon'),
                                      (in_nc[poll_info['Model Specie']][:24].mean(dim='TSTEP').data * poll_info[
-                                         'Molecular Weight']) / grid_nc['cell_area'].data)
+                                         'Molecular Weight']) / (grid_nc['cell_area'].data * 1000))
                 out_nc[poll_name].attrs['units'] = 'kg.s-1.m-2'
                 out_nc[poll_name].attrs['long_name'] = poll_info['Definition']
                 out_nc['time'] = [day_aux]
-- 
GitLab


From ae5ca39fbbb206add31ef4e26f2dc443a3cc618a Mon Sep 17 00:00:00 2001
From: ctena <carles.tena@bsc.es>
Date: Tue, 8 Mar 2022 16:38:04 +0100
Subject: [PATCH 13/16] fix to run NEI on HERMES

---
 .../modules/emision_inventories/emission_inventory.py      | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/hermesv3_gr/modules/emision_inventories/emission_inventory.py b/hermesv3_gr/modules/emision_inventories/emission_inventory.py
index 987ef7a..59c4451 100755
--- a/hermesv3_gr/modules/emision_inventories/emission_inventory.py
+++ b/hermesv3_gr/modules/emision_inventories/emission_inventory.py
@@ -266,7 +266,12 @@ class EmissionInventory(object):
         if self.coverage == 'global':
             regridded_emissions = self.regrid.start_regridding()
         else:
-            regridded_emissions = self.regrid.start_esmpy_regridding()
+            try:
+                # Used when the original regional EI doesn't cover the whole destination domain
+                regridded_emissions = self.regrid.start_esmpy_regridding()
+            except:
+                # Used when the destination domain is smaller than the original regional domain (over longitudes)
+                regridded_emissions = self.regrid.start_regridding()
         for emission in regridded_emissions:
             dict_aux = {'name': emission['name'], 'data': emission['data'], 'units': 'm'}
             self.emissions.append(dict_aux)
-- 
GitLab


From e925a9948e2043aa57a746bf4c609944ba2e77b7 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Tue, 15 Mar 2022 16:01:12 +0100
Subject: [PATCH 14/16] HEMCO preproc ready to be test

---
 preproc/hemco_soilnox_v202112_preproc.py | 195 +++++++++++++++++++++++
 1 file changed, 195 insertions(+)
 create mode 100755 preproc/hemco_soilnox_v202112_preproc.py

diff --git a/preproc/hemco_soilnox_v202112_preproc.py b/preproc/hemco_soilnox_v202112_preproc.py
new file mode 100755
index 0000000..41c88ed
--- /dev/null
+++ b/preproc/hemco_soilnox_v202112_preproc.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Earth Sciences Department, BSC-CNS
+#
+# This file is part of HERMESv3_GR.
+#
+# HERMESv3_GR is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# HERMESv3_GR is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with HERMESv3_GR. If not, see <http://www.gnu.org/licenses/>.
+
+
+import os
+import sys
+import numpy as np
+import pandas as pd
+import timeit
+from datetime import datetime, timedelta
+import xarray as xr
+
+# ============== README ======================
+"""
+downloading website: 
+reference: 
+Besides citing HERMESv3_GR, users must also acknowledge the use of the corresponding emission inventories in their works
+"""
+
+# ============== CONFIGURATION PARAMETERS ======================
+# The original emissions are downloaded from the following link:
+# http://geoschemdata.wustl.edu/ExtData/HEMCO/OFFLINE_SOILNOX/v2021-12/0.25x0.3125/2019/
+INPUT_PATH = '/esarchive/recon/pku/hemco_soilnox-v202112/original_files/<MM>/soilnox_025.<YYYYMMDD>.nc'
+OUTPUT_PATH = '/esarchive/recon/pku/hemco_soilnox-v202112/daily_mean/<pollutant>_<sector>/<pollutant>_<YYYYMMDD>.nc'
+SECTOR_LIST = ['soil']
+POLLUTANT_INFO = {"EmisNO_Fert": "nox_no_fert", "SOIL_NOx": "nox_no_tot"}
+
+START_DAY = datetime(year=2019, month=1, day=1)
+END_DAY = datetime(year=2019, month=12, day=31)
+# END_DAY = datetime(year=2019, month=1, day=1)
+
+XARRAY = True
+# ==============================================================
+
+
+def create_bounds(coords, number_vertices=2):
+    """
+    Calculate the vertices coordinates.
+
+    :param coords: Coordinates in degrees (latitude or longitude)
+    :type coords: numpy.array
+
+    :param number_vertices: Non mandatory parameter that informs the number of vertices that must have the boundaries.
+            (by default 2)
+    :type number_vertices: int
+
+    :return: Array with as many elements as vertices for each value of coords.
+    :rtype: numpy.array
+    """
+    import numpy as np
+
+    interval = coords[1] - coords[0]
+
+    coords_left = coords - interval / 2
+    coords_right = coords + interval / 2
+    if number_vertices == 2:
+        bound_coords = np.dstack((coords_left, coords_right))
+    elif number_vertices == 4:
+        bound_coords = np.dstack((coords_left, coords_right, coords_right, coords_left))
+    else:
+        raise ValueError('The number of vertices of the boudaries must be 2 or 4')
+
+    return bound_coords
+
+
+def parse_path(file_path, date_aux=None, sector=None, pollutant=None):
+    """
+    :param file_path: Path to parse
+    :type file_path: str
+
+    :param date_aux: Date to parse
+    :type date_aux: datetime, None
+
+    :param sector: Sector name
+    :type sector: str, None
+
+    :param pollutant: Pollutant name
+    :type pollutant: str, None
+
+    :return: Complete path
+    :rtype: str
+    """
+    if date_aux is not None:
+        file_path = file_path.replace('<YYYYMMDD>', date_aux.strftime("%Y%m%d"))
+        file_path = file_path.replace('<MM>', date_aux.strftime("%m"))
+    if pollutant is not None:
+        file_path = file_path.replace('<pollutant>', pollutant)
+    if sector is not None:
+        file_path = file_path.replace('<sector>', sector)
+    return file_path
+
+
+def make_dir(file_path):
+    parent_dir = os.path.dirname(file_path)
+    if not os.path.exists(parent_dir):
+        os.makedirs(parent_dir)
+    return True
+
+
+def make_grid(template_path):
+    grid = xr.open_dataset(template_path)
+
+    grid.coords['cell_area'] = grid['AREA']
+    grid.coords['cell_area'].attrs = {"long_name": "area of the grid cell",
+                                      "standard_name": "cell_area",
+                                      "units": "m2"}
+
+    grid.coords['crs'] = int(0)
+    grid.coords['crs'].attrs = {"grid_mapping_name": "latitude_longitude",
+                                "semi_major_axis": 6371000.,
+                                "inverse_flattening": 0}
+
+    lat_bnds = create_bounds(grid.coords['lat'])
+    lat_bnds = lat_bnds.reshape((lat_bnds.shape[-2], lat_bnds.shape[-1]))
+    grid.coords['lat_bnds'] = (('lat', 'nv'), lat_bnds)
+    grid.coords['lat'].attrs['bounds'] = 'lat_bnds'
+
+    lon_bnds = create_bounds(grid.coords['lon'])
+    lon_bnds = lon_bnds.reshape((lon_bnds.shape[-2], lon_bnds.shape[-1]))
+    grid.coords['lon_bnds'] = (('lon', 'nv'), lon_bnds)
+    grid.coords['lon'].attrs['bounds'] = 'lon_bnds'
+
+    var_list = list(grid.keys())
+    for var_name in var_list:
+        del grid[var_name]
+
+    return grid
+
+
+def main_xarray():
+
+    grid_nc = make_grid(parse_path(INPUT_PATH, date_aux=START_DAY))
+
+    day_aux = START_DAY
+    for sector in SECTOR_LIST:
+        while day_aux <= END_DAY:
+            print(day_aux)
+            in_nc = xr.open_dataset(parse_path(INPUT_PATH, date_aux=day_aux))
+            for poll_in_name, poll_out_name in POLLUTANT_INFO.items():
+                out_nc = grid_nc.copy()
+                out_nc[poll_out_name] = (('lat', 'lon'), (in_nc[poll_in_name][:].mean(dim='time').data))
+                out_nc[poll_out_name].attrs['cell_measures'] = "area: cell_area"
+                out_nc[poll_out_name].attrs['grid_mapping'] = "crs"
+
+                out_nc['time'] = [day_aux]
+
+                out_nc.attrs['comment'] = "Re-writing done by Carles Tena (carles.tena@bsc.es) from the BSC-CNS " + \
+                                          "(Barcelona Supercomputing Center)"
+
+                out_path = parse_path(OUTPUT_PATH, date_aux=day_aux, sector=sector, pollutant=poll_out_name)
+                make_dir(out_path)
+                out_nc.to_netcdf(out_path)
+
+            day_aux += timedelta(days=1)
+
+    return True
+
+
+def main():
+    day_aux = START_DAY
+
+    while day_aux <= END_DAY:
+        in_path = parse_path(INPUT_PATH, day_aux)
+
+        day_aux += timedelta(days=1)
+
+    return True
+
+
+if __name__ == '__main__':
+    initime = timeit.default_timer()
+
+    if XARRAY:
+        main_xarray()
+    else:
+        main()
+
+    print("\t{0:.3f}s".format(timeit.default_timer() - initime))
+    sys.exit(0)
-- 
GitLab


From 5d99ce0f4855eda63736274f3b0d18dc4290fd87 Mon Sep 17 00:00:00 2001
From: Carles Tena <carles.tena@bsc.es>
Date: Thu, 17 Mar 2022 09:54:31 +0100
Subject: [PATCH 15/16] HEMCO preproc updated

---
 preproc/hemco_soilnox_v202112_preproc.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/preproc/hemco_soilnox_v202112_preproc.py b/preproc/hemco_soilnox_v202112_preproc.py
index 41c88ed..6a157fd 100755
--- a/preproc/hemco_soilnox_v202112_preproc.py
+++ b/preproc/hemco_soilnox_v202112_preproc.py
@@ -35,11 +35,13 @@ Besides citing HERMESv3_GR, users must also acknowledge the use of the correspon
 
 # ============== CONFIGURATION PARAMETERS ======================
 # The original emissions are downloaded from the following link:
-# http://geoschemdata.wustl.edu/ExtData/HEMCO/OFFLINE_SOILNOX/v2021-12/0.25x0.3125/2019/
+# downloading website: http://geoschemdata.wustl.edu/ExtData/HEMCO/OFFLINE_SOILNOX/v2021-12/0.25x0.3125/
+# reference: https://doi.org/10.1038/s41597-020-0488-5
+
 INPUT_PATH = '/esarchive/recon/pku/hemco_soilnox-v202112/original_files/<MM>/soilnox_025.<YYYYMMDD>.nc'
 OUTPUT_PATH = '/esarchive/recon/pku/hemco_soilnox-v202112/daily_mean/<pollutant>_<sector>/<pollutant>_<YYYYMMDD>.nc'
 SECTOR_LIST = ['soil']
-POLLUTANT_INFO = {"EmisNO_Fert": "nox_no_fert", "SOIL_NOx": "nox_no_tot"}
+POLLUTANT_INFO = {"EmisNO_Fert": "nox_n_fert", "SOIL_NOx": "nox_n_tot"}
 
 START_DAY = datetime(year=2019, month=1, day=1)
 END_DAY = datetime(year=2019, month=12, day=31)
-- 
GitLab


From 47217ca72906e39d02a3364cb405eb6c5abdf3d2 Mon Sep 17 00:00:00 2001
From: ctena <carles.tena@bsc.es>
Date: Thu, 17 Mar 2022 10:01:22 +0100
Subject: [PATCH 16/16] New minor release with NEI & HEMCO preproc

---
 CHANGELOG               | 5 +++++
 hermesv3_gr/__init__.py | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index 1417252..16a9aeb 100755
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,8 @@
+2.1.3
+    2022/03/17
+    - NEI preproc (#49)
+    - HEMCO preproc (#52)
+
 2.1.2
     2021/12/17
     - Corrected error while processing GFAS emissions (#43)
diff --git a/hermesv3_gr/__init__.py b/hermesv3_gr/__init__.py
index 4eabd0b..e835b9d 100755
--- a/hermesv3_gr/__init__.py
+++ b/hermesv3_gr/__init__.py
@@ -1 +1 @@
-__version__ = "2.1.2"
+__version__ = "2.1.3"
-- 
GitLab