diff --git a/CHANGELOG b/CHANGELOG index b309af697c23282df56ba55e2bc8ae53dc3c91f7..5034747da9d470cae563c80733e28989c0e504f8 100755 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,7 @@ XXXX/XX/XX - Rotated nested grid - GFAS hourly working + - Compression level added to the configuration file 2.0.3 2020/02/07 diff --git a/conf/hermes.conf b/conf/hermes.conf index 3d57fc540f4d94cf1060b2e474a43564623bc548..170a86ebfa1156ebca0651e1f55a1184e879aa1e 100755 --- a/conf/hermes.conf +++ b/conf/hermes.conf @@ -20,6 +20,7 @@ erase_auxiliary_files = 1 output_model = MONARCH # output_model = CMAQ # output_model = WRF_CHEM +compression_level = 0 output_attributes = /data/global_attributes.csv # domain_type=[lcc, rotated, mercator, regular, rotated_nested] diff --git a/hermesv3_gr/config/config.py b/hermesv3_gr/config/config.py index c5adae94d21b59f5e479e334650cdbae025c48c3..b3522fd61874b39fd02399c9ff7e200249100862 100755 --- a/hermesv3_gr/config/config.py +++ b/hermesv3_gr/config/config.py @@ -75,6 +75,9 @@ class Config(ArgParser): p.add_argument('--output_model', required=True, help='Name of the output model.', choices=['MONARCH', 'CMAQ', 'WRF_CHEM']) + p.add_argument('--compression_level', required=False, type=int, choices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + help='Compression level of the NetCDF output (0 for no compressed output).', default=4) + p.add_argument('--output_attributes', required=False, help='Path to the file that contains the global attributes.') diff --git a/hermesv3_gr/config/settings.py b/hermesv3_gr/config/settings.py index 8a6966b3743a8dadd6fbe295d1852f74149ed8ac..c114f84b782db1d2e0f6cf1fc07a4b88aa556055 100755 --- a/hermesv3_gr/config/settings.py +++ b/hermesv3_gr/config/settings.py @@ -30,11 +30,6 @@ precision = np.float64 global writing_serial writing_serial = True -global compressed_netcdf -compressed_netcdf = True - -if not writing_serial: - compressed_netcdf = False global icomm global comm diff --git a/hermesv3_gr/hermes.py b/hermesv3_gr/hermes.py index 5a3344b14b1dc488650840a3733b12bda9d7291f..922c7a5e14b092db35c7c2405fd9f64b41c94a95 100755 --- a/hermesv3_gr/hermes.py +++ b/hermesv3_gr/hermes.py @@ -77,8 +77,7 @@ class HermesGr(object): self.writer = Writer.get_writer( self.options.output_model, self.config.get_output_name(self.options.start_date), self.grid, self.levels, self.options.start_date, self.delta_hours, self.options.output_attributes, - compress=settings.compressed_netcdf, - parallel=not settings.writing_serial) + self.options.compression_level, parallel=not settings.writing_serial) settings.write_log('End of HERMESv3 initialization.') settings.write_time('HERMES', 'Init', timeit.default_timer() - st_time, level=1) diff --git a/hermesv3_gr/modules/writing/writer.py b/hermesv3_gr/modules/writing/writer.py index fb8ebb959612d9bf1c3853f63e7f21c8b52aeb54..69f3f314712562d8a717e626e6b1f1f0beca2889 100755 --- a/hermesv3_gr/modules/writing/writer.py +++ b/hermesv3_gr/modules/writing/writer.py @@ -58,11 +58,21 @@ class Writer(object): :type parallel. bool """ - def __init__(self, path, grid, levels, date, hours, global_attributes_path, compress=True, parallel=False): + def __init__(self, path, grid, levels, date, hours, global_attributes_path, compression_level, parallel=False): self.path = path self.grid = grid - self.compress = compress + + if compression_level > 0: + if parallel: + settings.write_log("WARNING: Parallel write doesn't allow compression. Setting it to no compression") + self.compress = False + else: + self.compress = True + self.compression_level = compression_level + else: + self.compress = False + self.parallel = parallel self.variables_attributes = None @@ -316,7 +326,7 @@ class Writer(object): return new_list @staticmethod - def get_writer(output_model, path, grid, levels, date, hours, global_attributes_path, compress, parallel): + def get_writer(output_model, path, grid, levels, date, hours, global_attributes_path, compression_level, parallel): """ Choose between the different writers depending on the desired output model. @@ -341,8 +351,8 @@ class Writer(object): :param global_attributes_path: Path to the file that contains the static global attributes. :type global_attributes_path: str - :param compress: Indicates if you want to compress the netCDF variable data. - :type compress: bool + :param compression_level: Indicates if you want to compress the netCDF variable data. + :type compression_level: bool :param parallel: Indicates if you want to write in parallel mode. :type parallel. bool @@ -356,11 +366,11 @@ class Writer(object): settings.write_log('Selecting writing output type for {0}.'.format(output_model)) if output_model.lower() == 'monarch': - return WriterMonarch(path, grid, levels, date, hours, global_attributes_path, compress, parallel) + return WriterMonarch(path, grid, levels, date, hours, global_attributes_path, compression_level, parallel) elif output_model.lower() == 'cmaq': - return WriterCmaq(path, grid, levels, date, hours, global_attributes_path, compress, parallel) + return WriterCmaq(path, grid, levels, date, hours, global_attributes_path, compression_level, parallel) elif output_model.lower() == 'wrf_chem': - return WriterWrfChem(path, grid, levels, date, hours, global_attributes_path, compress, parallel) + return WriterWrfChem(path, grid, levels, date, hours, global_attributes_path, compression_level, parallel) else: settings.write_log('ERROR: Check the .err file to get more info.') if settings.rank == 0: @@ -463,14 +473,14 @@ class Writer(object): # ===== Variables ===== # Time if date is None: - time = netcdf.createVariable('time', 'd', ('time',), zlib=True) + time = netcdf.createVariable('time', 'd', ('time',), zlib=False) time.units = "months since 2000-01-01 00:00:00" time.standard_name = "time" time.calendar = "gregorian" time.long_name = "time" time[:] = [0.] else: - time = netcdf.createVariable('time', 'd', ('time',), zlib=True) + time = netcdf.createVariable('time', 'd', ('time',), zlib=False) # print u.offset_by_time(encode_time(date.year, date.month, date.day, date.hour, date.minute, date.second)) # Unit('hour since 1970-01-01 00:00:00.0000000 UTC') time.units = str(Unit('hours').offset_by_time( @@ -481,7 +491,7 @@ class Writer(object): time[:] = hours # Latitude - lats = netcdf.createVariable('lat', 'f', lat_dim, zlib=True) + lats = netcdf.createVariable('lat', 'f', lat_dim, zlib=False) lats.units = "degrees_north" lats.axis = "Y" lats.long_name = "latitude coordinate" @@ -490,12 +500,12 @@ class Writer(object): if boundary_latitudes is not None: lats.bounds = "lat_bnds" - lat_bnds = netcdf.createVariable('lat_bnds', 'f', lat_dim + ('nv',), zlib=True) + lat_bnds = netcdf.createVariable('lat_bnds', 'f', lat_dim + ('nv',), zlib=False) # print lat_bnds[:].shape, boundary_latitudes.shape lat_bnds[:] = boundary_latitudes # Longitude - lons = netcdf.createVariable('lon', 'f', lon_dim, zlib=True) + lons = netcdf.createVariable('lon', 'f', lon_dim, zlib=False) lons.units = "degrees_east" lons.axis = "X" @@ -505,32 +515,32 @@ class Writer(object): lons[:] = center_longitudes if boundary_longitudes is not None: lons.bounds = "lon_bnds" - lon_bnds = netcdf.createVariable('lon_bnds', 'f', lon_dim + ('nv',), zlib=True) + lon_bnds = netcdf.createVariable('lon_bnds', 'f', lon_dim + ('nv',), zlib=False) # print lon_bnds[:].shape, boundary_longitudes.shape lon_bnds[:] = boundary_longitudes if roated: # Rotated Latitude - rlat = netcdf.createVariable('rlat', 'f', ('rlat',), zlib=True) + rlat = netcdf.createVariable('rlat', 'f', ('rlat',), zlib=False) rlat.long_name = "latitude in rotated pole grid" rlat.units = Unit("degrees").symbol rlat.standard_name = "grid_latitude" rlat[:] = rotated_lats # Rotated Longitude - rlon = netcdf.createVariable('rlon', 'f', ('rlon',), zlib=True) + rlon = netcdf.createVariable('rlon', 'f', ('rlon',), zlib=False) rlon.long_name = "longitude in rotated pole grid" rlon.units = Unit("degrees").symbol rlon.standard_name = "grid_longitude" rlon[:] = rotated_lons if lcc or mercator: - x_var = netcdf.createVariable('x', 'd', ('x',), zlib=True) + x_var = netcdf.createVariable('x', 'd', ('x',), zlib=False) x_var.units = Unit("km").symbol x_var.long_name = "x coordinate of projection" x_var.standard_name = "projection_x_coordinate" x_var[:] = lcc_x - y_var = netcdf.createVariable('y', 'd', ('y',), zlib=True) + y_var = netcdf.createVariable('y', 'd', ('y',), zlib=False) y_var.units = Unit("km").symbol y_var.long_name = "y coordinate of projection" y_var.standard_name = "projection_y_coordinate" @@ -540,18 +550,18 @@ class Writer(object): # Levels if levels is not None: var_dim = ('lev',) + var_dim - lev = netcdf.createVariable('lev', 'f', ('lev',), zlib=True) + lev = netcdf.createVariable('lev', 'f', ('lev',), zlib=False) lev.units = Unit("m").symbol lev.positive = 'up' lev[:] = levels # All variables if len(data_list) is 0: - var = netcdf.createVariable('aux_var', 'f', ('time',) + var_dim, zlib=True) + var = netcdf.createVariable('aux_var', 'f', ('time',) + var_dim, zlib=False) var[:] = 0 for variable in data_list: # print ('time',) + var_dim - var = netcdf.createVariable(variable['name'], 'f', ('time',) + var_dim, zlib=True) + var = netcdf.createVariable(variable['name'], 'f', ('time',) + var_dim, zlib=False) var.units = Unit(variable['units']).symbol if 'long_name' in variable: var.long_name = str(variable['long_name']) diff --git a/hermesv3_gr/modules/writing/writer_cmaq.py b/hermesv3_gr/modules/writing/writer_cmaq.py index 71242c10677390be8f9aae06821640ff920c3e09..f9e46882793ea5331985994330e8bbea2338765a 100755 --- a/hermesv3_gr/modules/writing/writer_cmaq.py +++ b/hermesv3_gr/modules/writing/writer_cmaq.py @@ -49,15 +49,16 @@ class WriterCmaq(Writer): :param global_attributes_path: Path to the file that contains the static global attributes. :type global_attributes_path: str - :param compress: Indicates if you want to compress the netCDF variable data. - :type compress: bool + :param compression_level: Indicates if you want to compress the netCDF variable data. + :type compression_level: bool :param parallel: Indicates if you want to write in parallel mode. :type parallel. bool """ - def __init__(self, path, grid, levels, date, hours, global_attributes_path, compress=True, parallel=False): - super(WriterCmaq, self).__init__(path, grid, levels, date, hours, global_attributes_path, compress, parallel) + def __init__(self, path, grid, levels, date, hours, global_attributes_path, compression_level=True, parallel=False): + super(WriterCmaq, self).__init__(path, grid, levels, date, hours, global_attributes_path, compression_level, + parallel) self.global_attributes_order = [ 'IOAPI_VERSION', 'EXEC_ID', 'FTYPE', 'CDATE', 'CTIME', 'WDATE', 'WTIME', 'SDATE', 'STIME', 'TSTEP', 'NTHIK', @@ -434,7 +435,12 @@ class WriterCmaq(Writer): # data_list, var_list = self.change_variable_attributes(self.variables_attributes) for var_name in self.variables_attributes.keys(): index += 1 - var = netcdf.createVariable(var_name, 'f', ('TSTEP', 'LAY', 'ROW', 'COL',), zlib=self.compress) + if self.compress: + var = netcdf.createVariable(var_name, 'f', ('TSTEP', 'LAY', 'ROW', 'COL',), zlib=self.compress, + complevel=self.compression_level) + else: + var = netcdf.createVariable(var_name, 'f', ('TSTEP', 'LAY', 'ROW', 'COL',)) + var.setncatts(self.variables_attributes[var_name]) settings.write_log("\t\t\t'{0}' variable created with size: {1}".format(var_name, var[:].shape) + "\n\t\t\t\t'{0}' variable will be filled later.".format(var_name), level=3) @@ -567,8 +573,12 @@ class WriterCmaq(Writer): data = 0 st_time = timeit.default_timer() index += 1 + if self.compress: + var = netcdf.createVariable(var_name, 'f', ('TSTEP', 'LAY', 'ROW', 'COL',), zlib=True, + complevel=self.compression_level) + else: + var = netcdf.createVariable(var_name, 'f', ('TSTEP', 'LAY', 'ROW', 'COL',)) - var = netcdf.createVariable(var_name, 'f', ('TSTEP', 'LAY', 'ROW', 'COL',), zlib=self.compress) var.setncatts(self.variables_attributes[var_name]) # var.units = variable['units'] # var.long_name = str(variable['long_name']) diff --git a/hermesv3_gr/modules/writing/writer_monarch.py b/hermesv3_gr/modules/writing/writer_monarch.py index 8c5a55150329a942a298780ae65afc32ff3a3b42..6a087f4da26682fc9ddf889ddf6149991cfa23db 100755 --- a/hermesv3_gr/modules/writing/writer_monarch.py +++ b/hermesv3_gr/modules/writing/writer_monarch.py @@ -49,15 +49,16 @@ class WriterMonarch(Writer): :param global_attributes_path: Path to the file that contains the static global attributes. :type global_attributes_path: str - :param compress: Indicates if you want to compress the netCDF variable data. - :type compress: bool + :param compression_level: Indicates if you want to compress the netCDF variable data. + :type compression_level: bool :param parallel: Indicates if you want to write in parallel mode. :type parallel. bool """ - def __init__(self, path, grid, levels, date, hours, global_attributes_path, compress=True, parallel=False): - super(WriterMonarch, self).__init__(path, grid, levels, date, hours, global_attributes_path, compress, parallel) + def __init__(self, path, grid, levels, date, hours, global_attributes_path, compression_level=True, parallel=False): + super(WriterMonarch, self).__init__(path, grid, levels, date, hours, global_attributes_path, compression_level, + parallel) # self.global_attributes = { # 'nom_attribut': 'value_attribut' @@ -693,7 +694,11 @@ class WriterMonarch(Writer): except (UnboundLocalError, TypeError, IndexError): data = 0 index += 1 - var = netcdf.createVariable(var_name, 'f', ('time',) + var_dim, zlib=self.compress) + if self.compress: + var = netcdf.createVariable(var_name, 'f', ('time',) + var_dim, zlib=self.compress, + complevel=self.compression_level) + else: + var = netcdf.createVariable(var_name, 'f', ('time',) + var_dim) var.units = Unit(self.variables_attributes[var_name]['units']).symbol diff --git a/hermesv3_gr/modules/writing/writer_wrf_chem.py b/hermesv3_gr/modules/writing/writer_wrf_chem.py index 5f27d7998d5c8fc4e7491dbcd77c483fdf33ecc2..30df6befc04e310e21dd94505986af6f6af712c7 100755 --- a/hermesv3_gr/modules/writing/writer_wrf_chem.py +++ b/hermesv3_gr/modules/writing/writer_wrf_chem.py @@ -49,15 +49,16 @@ class WriterWrfChem(Writer): :param global_attributes_path: Path to the file that contains the static global attributes. :type global_attributes_path: str - :param compress: Indicates if you want to compress the netCDF variable data. - :type compress: bool + :param compression_level: Indicates if you want to compress the netCDF variable data. + :type compression_level: bool :param parallel: Indicates if you want to write in parallel mode. :type parallel. bool """ - def __init__(self, path, grid, levels, date, hours, global_attributes_path, compress=True, parallel=False): - super(WriterWrfChem, self).__init__(path, grid, levels, date, hours, global_attributes_path, compress, parallel) + def __init__(self, path, grid, levels, date, hours, global_attributes_path, compression_level=True, parallel=False): + super(WriterWrfChem, self).__init__(path, grid, levels, date, hours, global_attributes_path, compression_level, + parallel) self.global_attributes_order = [ 'TITLE', 'START_DATE', 'WEST-EAST_GRID_DIMENSION', 'SOUTH-NORTH_GRID_DIMENSION', @@ -308,11 +309,6 @@ class WriterWrfChem(Writer): return aux_times def create_parallel_netcdf(self): - # TODO Documentation - """ - - :return: - """ st_time = timeit.default_timer() settings.write_log("\tCreating parallel NetCDF file.", level=2) netcdf = Dataset(self.path, mode='w', format="NETCDF4") @@ -342,8 +338,12 @@ class WriterWrfChem(Writer): index = 0 for var_name in self.variables_attributes.keys(): index += 1 - var = netcdf.createVariable(var_name, 'f', ('Time', 'emissions_zdim', 'south_north', 'west_east',), - zlib=self.compress) + if self.compress: + var = netcdf.createVariable(var_name, 'f', ('Time', 'emissions_zdim', 'south_north', 'west_east',), + zlib=self.compress, complevel=self.compression_level) + else: + var = netcdf.createVariable(var_name, 'f', ('Time', 'emissions_zdim', 'south_north', 'west_east',)) + var.setncatts(self.variables_attributes[var_name]) settings.write_log("\t\t\t'{0}' variable created with size: {1}".format(var_name, var[:].shape) + "\n\t\t\t\t'{0}' variable will be filled later.".format(var_name), level=3)