From 4be4dbb5f6b3e96553c0ea67d3f3268918965938 Mon Sep 17 00:00:00 2001 From: ctena Date: Wed, 2 Mar 2022 16:42:36 +0100 Subject: [PATCH 1/3] nei2017gb17j_preproc.py ready to test --- preproc/epa/nei2017gb17j_pollutant_info.csv | 28 ++ preproc/epa/nei2017gb17j_preproc.py | 310 ++++++++++++++++++++ 2 files changed, 338 insertions(+) create mode 100644 preproc/epa/nei2017gb17j_pollutant_info.csv create mode 100755 preproc/epa/nei2017gb17j_preproc.py diff --git a/preproc/epa/nei2017gb17j_pollutant_info.csv b/preproc/epa/nei2017gb17j_pollutant_info.csv new file mode 100644 index 0000000..e8dc640 --- /dev/null +++ b/preproc/epa/nei2017gb17j_pollutant_info.csv @@ -0,0 +1,28 @@ +Model Specie,Definition,Molecular Weight,Explicit or Lumped +AACD,acetic acid,60.0,E +ACET,acetone,58.1,E +ALD2,acetaldehyde,44.0,E +ALD2_PRIMARY,acetaldehyde from emissions only,44,E +ALDX,aldehydes with 3 or more carbons,58.1,L +APIN,alpha pinene,136.2,E +BENZ,benzene,78.1,E +NH3,ammonia,17.0,E +NH3_FERT,ammonia fertilizers,17.0,E +MEOH,methanol,32,E +ETH,ethene,28,E +ETHA,ethane,30.1,E +ETHY,ethyne (acetylene),26,E +ETOH,ethanol,46.1,E +FACD,formic acid,46,E +FORM,formaldehyde,30,E +IOLE,internal alkene bond,56.1,L +ISOP,isoprene,68.1,E +KET,carbon-ketone bond,72.1,L +OLE,terminal alkene bond,42.1,L +PAR,carbon-carbon single bond,72.1,L +SOAALK,tracer for alkanes that can form secondary organic aerosol,112,L +TERP,monoterpenes,136.2,L +TOL,toluene and other monoalkyl aromatics,92.1,L +FORM_PRIMARY,formaldehyde from emissions only,30,E +NAPH,naphthalene,128.2,E +XYLMN,xylene and other polyalkyl aromatics except naphthalene,106.2,L \ No newline at end of file diff --git a/preproc/epa/nei2017gb17j_preproc.py b/preproc/epa/nei2017gb17j_preproc.py new file mode 100755 index 0000000..f4354f8 --- /dev/null +++ b/preproc/epa/nei2017gb17j_preproc.py @@ -0,0 +1,310 @@ +#!/usr/bin/env python + +# Copyright 2018 Earth Sciences Department, BSC-CNS +# +# This file is part of HERMESv3_GR. +# +# HERMESv3_GR is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# HERMESv3_GR is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HERMESv3_GR. If not, see . + + +import os +import sys +import numpy as np +import pandas as pd +import timeit +from datetime import datetime, timedelta +import xarray as xr + +# ============== README ====================== +""" +downloading website: +reference: +Besides citing HERMESv3_GR, users must also acknowledge the use of the corresponding emission inventories in their works +""" + +# ============== CONFIGURATION PARAMETERS ====================== +# The original emissions are downloaded from the following link: +# https://dataverse.unc.edu/dataset.xhtml?persistentId=doi:10.15139/S3/TCR6BB +INPUT_PATH = '/esarchive/recon/epa/nei2017gb17j/original_files//emis_mole___12US1_cmaq_cb6ae7_2017gb_17j.ncf' +OUTPUT_PATH = '/esarchive/recon/epa/nei2017gb17j/daily_mean/_/_.nc' +SECTOR_LIST = ['ag'] +POLLUTANT_INFO_PATH = "/Users/ctena/PycharmProjects/hermesv3_gr/preproc/epa/nei2017gb17j_pollutant_info.csv" +# GRID_NETCDF created with HERMESv3_GR with taht configuration: +# if domain_type == lcc: +# lat_1 = 33.0 +# lat_2 = 45.0 +# lon_0 = -97.0 +# lat_0 = 40.0 +# nx = 459 +# ny = 299 +# inc_x = 12000 +# inc_y = 12000 +# x_0 = -2556000.0 +# y_0 = -1728000.0 + +GRID_NETCDF_PATH = "/scratch/Earth/ctena/HERMESv3_GR_aux/coverage/USA_lcc_12000.0_12000.0/temporal_coords.nc" + +START_DAY = datetime(year=2017, month=1, day=1) +# END_DAY = datetime(year=2017, month=12, day=31) +END_DAY = datetime(year=2017, month=1, day=1) + +XARRAY = True +# ============================================================== + + +def write_netcdf(output_name_path, data_list, center_lats, center_lons, grid_cell_area, date): + print(output_name_path) + # Creating NetCDF & Dimensions + nc_output = Dataset(output_name_path, mode='w', format="NETCDF4") + nc_output.createDimension('nv', 2) + nc_output.createDimension('lon', center_lons.shape[0]) + nc_output.createDimension('lat', center_lats.shape[0]) + nc_output.createDimension('time', None) + + # TIME + time = nc_output.createVariable('time', 'd', ('time',), zlib=True) + # time.units = "{0} since {1}".format(tstep_units, global_atts['Start_DateTime'].strftime('%Y-%m-%d %H:%M:%S')) + time.units = "hours since {0}".format(date.strftime('%Y-%m-%d %H:%M:%S')) + time.standard_name = "time" + time.calendar = "gregorian" + time.long_name = "time" + time[:] = [0] + + # LATITUDE + lat = nc_output.createVariable('lat', 'f', ('lat',), zlib=True) + lat.bounds = "lat_bnds" + lat.units = "degrees_north" + lat.axis = "Y" + lat.long_name = "latitude" + lat.standard_name = "latitude" + lat[:] = center_lats + + lat_bnds = nc_output.createVariable('lat_bnds', 'f', ('lat', 'nv',), zlib=True) + lat_bnds[:] = create_bounds(center_lats) + + # LONGITUDE + lon = nc_output.createVariable('lon', 'f', ('lon',), zlib=True) + lon.bounds = "lon_bnds" + lon.units = "degrees_east" + lon.axis = "X" + lon.long_name = "longitude" + lon.standard_name = "longitude" + lon[:] = center_lons + + lon_bnds = nc_output.createVariable('lon_bnds', 'f', ('lon', 'nv',), zlib=True) + lon_bnds[:] = create_bounds(center_lons) + + for var in data_list: + # VARIABLE + nc_var = nc_output.createVariable(var['name'], 'f', ('time', 'lat', 'lon',), zlib=True) + nc_var.units = var['units'].symbol + nc_var.long_name = var['long_name'] + nc_var.coordinates = 'lat lon' + nc_var.grid_mapping = 'crs' + nc_var.cell_measures = 'area: cell_area' + nc_var[:] = var['data'] + + # CELL AREA + cell_area = nc_output.createVariable('cell_area', 'f', ('lat', 'lon',)) + cell_area.long_name = "area of the grid cell" + cell_area.standard_name = "area" + cell_area.units = "m2" + cell_area[:] = grid_cell_area + + # CRS + crs = nc_output.createVariable('crs', 'i') + crs.grid_mapping_name = "latitude_longitude" + crs.semi_major_axis = 6371000.0 + crs.inverse_flattening = 0 + + nc_output.setncattr('title', 'ECLIPSEv5a inventory') + nc_output.setncattr('Conventions', 'CF-1.6', ) + nc_output.setncattr('institution', 'IIASA', ) + nc_output.setncattr('source', 'IIASA', ) + nc_output.setncattr('history', 'Re-writing of the ECLIPSEv5a input to follow the CF-1.6 conventions;\n' + + '2017-11-28: Creating;\n') + nc_output.setncattr('web', 'http://www.iiasa.ac.at/web/home/research/researchPrograms/air/ECLIPSEv5a.html') + nc_output.setncattr('comment', 'Re-writing done by Carles Tena (carles.tena@bsc.es) from the BSC-CNS ' + + '(Barcelona Supercomputing Center)', ) + + nc_output.close() + + +def do_transformation(year): + """ + Does the transformation for the selected year + + :param year: Year to calculate + :type year: int + """ + from datetime import datetime + from hermesv3_gr.tools.netcdf_tools import extract_vars, get_grid_area, write_netcdf + for pollutant in LIST_POLLUTANTS: + file_name = get_input_name(pollutant) + if os.path.exists(file_name): + # c_lats, c_lons, b_lats, b_lons = extract_vars(file_name, ['lat', 'lon', 'lat_bnds', 'lon_bnds']) + c_lats, c_lons = extract_vars(file_name, ['lat', 'lon']) + + b_lats = create_bounds(c_lats['data']) + b_lons = create_bounds(c_lons['data']) + + cell_area = get_grid_area(file_name) + + global_attributes = get_global_attributes(file_name) + for sector in LIST_SECTORS: + data = get_full_year_data(file_name, sector, year) + + if pollutant == 'nox': + pollutant_name = 'nox_no' + elif pollutant == 'voc1': + pollutant_name = 'voc01' + elif pollutant == 'voc2': + pollutant_name = 'voc02' + elif pollutant == 'voc3': + pollutant_name = 'voc03' + elif pollutant == 'voc4': + pollutant_name = 'voc04' + elif pollutant == 'voc5': + pollutant_name = 'voc05' + elif pollutant == 'voc6': + pollutant_name = 'voc06' + elif pollutant == 'voc7': + pollutant_name = 'voc07' + elif pollutant == 'voc8': + pollutant_name = 'voc08' + elif pollutant == 'voc9': + pollutant_name = 'voc09' + else: + pollutant_name = pollutant + + file_path = os.path.join(OUTPUT_PATH, 'monthly_mean', '{0}_{1}'.format(pollutant_name, sector)) + if not os.path.exists(file_path): + os.makedirs(file_path) + + for month in range(1, 12 + 1, 1): + emission = { + 'name': pollutant_name, + 'units': 'kg.m-2.s-1', + 'data': data[month - 1, :, :].reshape((1,) + cell_area.shape) + } + write_netcdf( + os.path.join(file_path, '{0}_{1}{2}.nc'.format(pollutant_name, year, str(month).zfill(2))), + c_lats['data'], c_lons['data'], [emission], date=datetime(year=year, month=month, day=1), + boundary_latitudes=b_lats, boundary_longitudes=b_lons, cell_area=cell_area, + global_attributes=global_attributes) + else: + raise IOError('File not found {0}'.format(file_name)) + return True + + +def get_pollutant_info(csv_path): + """ + Obtain the pollutant info + + :param csv_path: Path to the CSV that contains all the pollutant info + :type csv_path: str + + :return: Pollutant info table + :rtype: pd.DataFrame + """ + polluntant_info = pd.read_csv(csv_path) + return polluntant_info + + +def parse_path(file_path, date_aux=None, sector=None, pollutant=None): + """ + :param file_path: Path to parse + :type file_path: str + + :param date_aux: Date to parse + :type date_aux: datetime, None + + :param sector: Sector name + :type sector: str, None + + :param pollutant: Pollutant name + :type pollutant: str, None + + :return: Complete path + :rtype: str + """ + if date_aux is not None: + file_path = file_path.replace('', date_aux.strftime("%Y%m%d")) + if pollutant is not None: + file_path = file_path.replace('', pollutant) + if sector is not None: + file_path = file_path.replace('', sector) + return file_path + + +def make_dir(file_path): + parent_dir = os.path.dirname(file_path) + if not os.path.exists(parent_dir): + os.makedirs(parent_dir) + return True + + +def main_xarray(): + pol_info_df = get_pollutant_info(POLLUTANT_INFO_PATH) + + grid_nc = xr.open_dataset(GRID_NETCDF_PATH, decode_cf=True, decode_times=False, decode_coords='all') + del grid_nc['var_aux'] + + day_aux = START_DAY + for sector in SECTOR_LIST: + while day_aux <= END_DAY: + in_nc = xr.open_dataset(parse_path(INPUT_PATH, date_aux=day_aux, sector=sector)) + for i, poll_info in pol_info_df.iterrows(): + out_nc = grid_nc.copy() + poll_name = poll_info['Model Specie'].lower() + out_nc[poll_name] = (('time', 'lat', 'lon'), + (in_nc[poll_info['Model Specie']][:24].mean(dim='TSTEP').data * poll_info[ + 'Molecular Weight']) / grid_nc['cell_area'].data) + out_nc[poll_name].attrs['units'] = 'kg.s-1.m-2' + out_nc[poll_name].attrs['long_name'] = poll_info['Definition'] + out_nc['time'] = [day_aux] + + out_nc.attrs['comment'] = "Re-writing done by Carles Tena (carles.tena@bsc.es) from the BSC-CNS " + \ + "(Barcelona Supercomputing Center)" + + out_path = parse_path(OUTPUT_PATH, date_aux=day_aux, sector=sector, pollutant=poll_name) + make_dir(out_path) + out_nc.to_netcdf(out_path) + + day_aux += timedelta(days=1) + + return True + + +def main(): + day_aux = START_DAY + + while day_aux <= END_DAY: + in_path = parse_path(INPUT_PATH, day_aux) + + day_aux += timedelta(days=1) + + return True + + +if __name__ == '__main__': + initime = timeit.default_timer() + + if XARRAY: + main_xarray() + else: + main() + + print("\t{0:.3f}s".format(timeit.default_timer() - initime)) + sys.exit(0) -- GitLab From 21ee39b031df1845f16d287e50a1ab4a54158b1c Mon Sep 17 00:00:00 2001 From: Carles Tena Date: Tue, 8 Mar 2022 12:09:45 +0100 Subject: [PATCH 2/3] Bug fix on the EPA NEI2017 preproc unit change --- preproc/epa/nei2017gb17j_preproc.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/preproc/epa/nei2017gb17j_preproc.py b/preproc/epa/nei2017gb17j_preproc.py index f4354f8..c8e8b06 100755 --- a/preproc/epa/nei2017gb17j_preproc.py +++ b/preproc/epa/nei2017gb17j_preproc.py @@ -39,7 +39,7 @@ Besides citing HERMESv3_GR, users must also acknowledge the use of the correspon INPUT_PATH = '/esarchive/recon/epa/nei2017gb17j/original_files//emis_mole___12US1_cmaq_cb6ae7_2017gb_17j.ncf' OUTPUT_PATH = '/esarchive/recon/epa/nei2017gb17j/daily_mean/_/_.nc' SECTOR_LIST = ['ag'] -POLLUTANT_INFO_PATH = "/Users/ctena/PycharmProjects/hermesv3_gr/preproc/epa/nei2017gb17j_pollutant_info.csv" +POLLUTANT_INFO_PATH = "/home/Earth/ctena/Models/hermesv3_gr/preproc/epa/nei2017gb17j_pollutant_info.csv" # GRID_NETCDF created with HERMESv3_GR with taht configuration: # if domain_type == lcc: # lat_1 = 33.0 @@ -56,8 +56,8 @@ POLLUTANT_INFO_PATH = "/Users/ctena/PycharmProjects/hermesv3_gr/preproc/epa/nei2 GRID_NETCDF_PATH = "/scratch/Earth/ctena/HERMESv3_GR_aux/coverage/USA_lcc_12000.0_12000.0/temporal_coords.nc" START_DAY = datetime(year=2017, month=1, day=1) -# END_DAY = datetime(year=2017, month=12, day=31) -END_DAY = datetime(year=2017, month=1, day=1) +END_DAY = datetime(year=2017, month=12, day=31) +# END_DAY = datetime(year=2017, month=1, day=1) XARRAY = True # ============================================================== @@ -264,13 +264,14 @@ def main_xarray(): day_aux = START_DAY for sector in SECTOR_LIST: while day_aux <= END_DAY: + print(day_aux) in_nc = xr.open_dataset(parse_path(INPUT_PATH, date_aux=day_aux, sector=sector)) for i, poll_info in pol_info_df.iterrows(): out_nc = grid_nc.copy() poll_name = poll_info['Model Specie'].lower() out_nc[poll_name] = (('time', 'lat', 'lon'), (in_nc[poll_info['Model Specie']][:24].mean(dim='TSTEP').data * poll_info[ - 'Molecular Weight']) / grid_nc['cell_area'].data) + 'Molecular Weight']) / (grid_nc['cell_area'].data * 1000)) out_nc[poll_name].attrs['units'] = 'kg.s-1.m-2' out_nc[poll_name].attrs['long_name'] = poll_info['Definition'] out_nc['time'] = [day_aux] -- GitLab From ae5ca39fbbb206add31ef4e26f2dc443a3cc618a Mon Sep 17 00:00:00 2001 From: ctena Date: Tue, 8 Mar 2022 16:38:04 +0100 Subject: [PATCH 3/3] fix to run NEI on HERMES --- .../modules/emision_inventories/emission_inventory.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hermesv3_gr/modules/emision_inventories/emission_inventory.py b/hermesv3_gr/modules/emision_inventories/emission_inventory.py index 987ef7a..59c4451 100755 --- a/hermesv3_gr/modules/emision_inventories/emission_inventory.py +++ b/hermesv3_gr/modules/emision_inventories/emission_inventory.py @@ -266,7 +266,12 @@ class EmissionInventory(object): if self.coverage == 'global': regridded_emissions = self.regrid.start_regridding() else: - regridded_emissions = self.regrid.start_esmpy_regridding() + try: + # Used when the original regional EI doesn't cover the whole destination domain + regridded_emissions = self.regrid.start_esmpy_regridding() + except: + # Used when the destination domain is smaller than the original regional domain (over longitudes) + regridded_emissions = self.regrid.start_regridding() for emission in regridded_emissions: dict_aux = {'name': emission['name'], 'data': emission['data'], 'units': 'm'} self.emissions.append(dict_aux) -- GitLab