diff --git a/VERSION b/VERSION index 94ff29cc4de4fe0be04331a1eb51f5c515b728a9..944880fa15e85084780c290b929924d3f8b6085f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.1 +3.2.0 diff --git a/doc/source/conf.py b/doc/source/conf.py index 390e6a1977ca07bcd908d564a00f8957454f01d0..780291eab386217363ad1cdd263b669998fa96da 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -56,16 +56,16 @@ master_doc = 'index' # General information about the project. project = u'Earth Diagnostics' -copyright = u'2018, BSC-CNS Earth Sciences Department' +copyright = u'2019, BSC-CNS Earth Sciences Department' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents.source ~/vi # # The short X.Y version. -version = '3.1' +version = '3.2' # The full version, including alpha/beta/rc tags. -release = '3.1.1' +release = '3.2.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/doc/source/diagnostic_list.rst b/doc/source/diagnostic_list.rst index dd25f93d617cad647d8a5562acf7824aff1f946c..1679fef9c01b338b5a4ce8604edded373418524f 100644 --- a/doc/source/diagnostic_list.rst +++ b/doc/source/diagnostic_list.rst @@ -584,22 +584,22 @@ Options: 2. Variable: Variable to average - -3. Grid_point: - NEMO grid point used to store the variable: T, U, V ... - -4. Basin = Global: + +3. Basin = Global: Basin to compute -5. Save 3d = True: - If True, it also stores the average per level +4. Grid_point: + NEMO grid point used to store the variable: T, U, V ... -6. Min depth: +5. Min depth: Minimum depth to compute in levels. If -1, average from the surface -7. Max depth: +6. Max depth: Maximum depth to compute in levels. If -1, average to the bottom +7. Save 3d = True: + If True, it also stores the average per level + 8. Variance = False: If True, it also stores the variance @@ -607,6 +607,10 @@ Options: Source grid to choose. By default this is the original data, but sometimes you will want to use another (for example, the 'rotated' one produced by the rotation diagnostic) +10. Original frequency: + Original frequency to use + + rotate ~~~~~~ diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index 17397c8ab068ed5e9a7d81bc2b91d08d52004cc3..bd1e45aff302ef4b7b41be929444c90d01195df0 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -46,10 +46,8 @@ class Config(object): "Custom mesh mask file to use" self.new_mask_glo = None "Custom new mask glo file to use" - self.mask_regions = None - "Custom mask regions file to use" - self.mask_regions_3d = None - "Custom mask regions 3D file to use" + self.basins = None + "Custom basins file to use" self.data_convention = None "Data convention to use" self.var_manager = None diff --git a/earthdiagnostics/constants.py b/earthdiagnostics/constants.py index f2df52f7272311332090df9826814880daa1c979..bb8378e0a88516db7ae85c310bf49d2aed5d4dbd 100644 --- a/earthdiagnostics/constants.py +++ b/earthdiagnostics/constants.py @@ -153,20 +153,15 @@ class Basins(object): self.__initiated = False - def get_available_basins(self, handler): + def get_available_basins(self, cube): """ Read available basins from file :param handler: :type handler: netCDF4.Dataset """ - basin_names = handler.variables.keys() - ignored_names = ('lat', 'latitude', 'lon', 'longitude', 'i', 'j', 'time', 'lev') - - for basin in basin_names: - if basin in ignored_names: - continue + for basin in cube.coord('region').points: basin_object = Basin(basin) setattr(self, basin, basin_object) self._add_alias(basin, basin_object) diff --git a/earthdiagnostics/datafile.py b/earthdiagnostics/datafile.py index a21248df673999e0ef194c79d8a8ef0343c7ab6e..4d02204c1dcd7218e65b920b2b099c0886e65adc 100644 --- a/earthdiagnostics/datafile.py +++ b/earthdiagnostics/datafile.py @@ -264,15 +264,18 @@ class DataFile(Publisher): def upload(self): """Send a loal file to the storage""" self.storage_status = StorageStatus.UPLOADING + remote_file = self.remote_file try: - Utils.copy_file(self.local_file, self.remote_file, save_hash=True) + if '/cmorfiles/' in remote_file: + remote_file = remote_file.replace('/cmorfiles/', '/diags/') + Utils.copy_file(self.local_file, remote_file, save_hash=True) except (OSError, Exception) as ex: - Log.error('File {0} can not be uploaded: {1}', self.remote_file, ex) + Log.error('File {0} can not be uploaded: {1}', remote_file, ex) self.storage_status = StorageStatus.FAILED return Log.info('File {0} uploaded!', self.remote_file) - self.create_link() + # self.create_link() self.storage_status = StorageStatus.READY def set_local_file(self, local_file, diagnostic=None, rename_var='', region=None): @@ -643,46 +646,54 @@ class NetCDFFile(DataFile): def download(self): """Get data from remote storage to the local one""" - try: - self.local_status = LocalStatus.DOWNLOADING - Log.debug('Downloading file {0}...', self.remote_file) - if not self.local_file: - self.local_file = TempFile.get() - # Utils.get_file_hash(self.remote_file, use_stored=True, save=True) - try: - Utils.copy_file(self.remote_file, self.local_file, retrials=1) - except Utils.CopyException: - # Utils.get_file_hash(self.remote_file, use_stored=False, save=True) - Utils.copy_file(self.remote_file, self.local_file, retrials=2) - - if self.data_convention == 'meteofrance': - Log.debug('Converting variable names from meteofrance convention') - alt_coord_names = {'time_counter': 'time', 'time_counter_bounds': 'time_bnds', - 'tbnds': 'bnds', 'nav_lat': 'lat', 'nav_lon': 'lon', 'x': 'i', - 'y': 'j'} - Utils.rename_variables(self.local_file, alt_coord_names, must_exist=False) - Log.info('File {0} ready!', self.remote_file) - self.local_status = LocalStatus.READY - - except Exception as ex: - if os.path.isfile(self.local_file): - os.remove(self.local_file) - Log.error('File {0} not available: {1}', self.remote_file, ex) - self.local_status = LocalStatus.FAILED - - def check_is_in_storage(self): - if os.path.isfile(self.remote_file): - if self.region: + for path in (self.remote_file.replace('/cmorfiles/', '/diags/'), self.remote_file): + if os.path.isfile(path): try: - cubes = iris.load(self.remote_file) - self._check_regions(cubes) - except iris.exceptions.TranslationError as ex: - # If the check goes wrong, we must execute everything - os.remove(self.remote_file) + self.local_status = LocalStatus.DOWNLOADING + Log.debug('Downloading file {0}...', path) + if not self.local_file: + self.local_file = TempFile.get() + # Utils.get_file_hash(self.remote_file, use_stored=True, save=True) + try: + Utils.copy_file(path, self.local_file, retrials=1) + except Utils.CopyException: + # Utils.get_file_hash(self.remote_file, use_stored=False, save=True) + Utils.copy_file(path, self.local_file, retrials=2) + + if self.data_convention == 'meteofrance': + Log.debug('Converting variable names from meteofrance convention') + alt_coord_names = {'time_counter': 'time', 'time_counter_bounds': 'time_bnds', + 'tbnds': 'bnds', 'nav_lat': 'lat', 'nav_lon': 'lon', 'x': 'i', + 'y': 'j'} + Utils.rename_variables(self.local_file, alt_coord_names, must_exist=False) + Log.info('File {0} ready!', path) + self.local_status = LocalStatus.READY + return + except Exception as ex: - Log.debug('Exception when checking file {0}: {1}', self.remote_file, ex) - else: - self.storage_status = StorageStatus.READY + if os.path.isfile(self.local_file): + os.remove(self.local_file) + Log.error('File {0} not available: {1}', path, ex) + self.local_status = LocalStatus.FAILED + return + Log.error('File {0} not available: {1}', self.remote_file, 'FileNotFound') + self.local_status = LocalStatus.FAILED + + def check_is_in_storage(self): + for path in (self.remote_file, self.remote_file.replace('/cmorfiles/', '/diags/')): + if os.path.isfile(path): + if self.region: + try: + cubes = iris.load(path) + self._check_regions(cubes) + except iris.exceptions.TranslationError as ex: + # If the check goes wrong, we must execute everything + os.remove(path) + except Exception as ex: + Log.debug('Exception when checking file {0}: {1}', path, ex) + else: + self.storage_status = StorageStatus.READY + return def _check_regions(self, cubes): for cube in cubes: diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index f33cb77e518a5006a683a325eaab701eedc3759a..d6ad4c72f5fca7f4acb8ef23f7e58ae3fd581ab3 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -3,7 +3,6 @@ """Entry point for EarthDiagnostics""" import argparse import os -import time import sys import shutil import tempfile @@ -11,6 +10,8 @@ from datetime import datetime import netCDF4 import pkg_resources +import iris + import bscearth.utils.path from bscearth.utils.log import Log @@ -66,7 +67,8 @@ class EarthDiags(object): ------- """ - Log.info('Initialising Earth Diagnostics Version {0}', EarthDiags.version) + Log.info( + 'Initialising Earth Diagnostics Version {0}', EarthDiags.version) self.config.parse(config_file) os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE' TempFile.scratch_folder = self.config.scratch_dir @@ -84,7 +86,8 @@ class EarthDiags(object): For more detailed documentation, use -h option """ # try: - parser = argparse.ArgumentParser(description='Main executable for Earth Diagnostics.') + parser = argparse.ArgumentParser( + description='Main executable for Earth Diagnostics.') parser.add_argument('-v', '--version', action='version', version=EarthDiags.version, help="returns Earth Diagnostics's version number and exit") parser.add_argument('--doc', action='store_true', @@ -104,7 +107,8 @@ class EarthDiags(object): parser.add_argument('-log', '--logfilepath', default=None, type=str) - parser.add_argument('-f', '--configfile', default='diags.conf', type=str) + parser.add_argument('-f', '--configfile', + default='diags.conf', type=str) args = parser.parse_args(args) if args.doc: @@ -142,7 +146,8 @@ class EarthDiags(object): True if successful """ Log.info('Opening documentation...') - doc_path = os.path.join('http://earthdiagnostics.readthedocs.io/en/latest') + doc_path = os.path.join( + 'http://earthdiagnostics.readthedocs.io/en/latest') Utils.execute_shell_command(('xdg-open', doc_path)) Log.result('Documentation opened!') return True @@ -192,22 +197,20 @@ class EarthDiags(object): return result def _initialize_basins(self): - self._read_basins_from_file('mask_regions.nc') - self._read_basins_from_file('mask_regions.3d.nc') + self._read_basins_from_file('basins.nc') @staticmethod def _read_basins_from_file(filename): if not os.path.isfile(filename): return - handler = Utils.open_cdf(filename) - Basins().get_available_basins(handler) - handler.close() + Basins().get_available_basins(iris.load_cube(filename)) def _prepare_scratch_dir(self): if self.config.use_ramdisk: self._remove_scratch_dir() tempfile.mkdtemp(dir='/dev/shm') - os.symlink(tempfile.mkdtemp(dir='/dev/shm'), self.config.scratch_dir) + os.symlink(tempfile.mkdtemp(dir='/dev/shm'), + self.config.scratch_dir) else: if not os.path.exists(self.config.scratch_dir): os.makedirs(self.config.scratch_dir) @@ -240,12 +243,14 @@ class EarthDiags(object): if os.path.islink(self.config.scratch_dir): # time.sleep(4) # shutil.rmtree(os.path.realpath(self.config.scratch_dir)) - Utils.execute_shell_command('rm -r {0}'.format(os.path.realpath(self.config.scratch_dir))) + Utils.execute_shell_command( + 'rm -r {0}'.format(os.path.realpath(self.config.scratch_dir))) os.remove(self.config.scratch_dir) elif os.path.isdir(self.config.scratch_dir): # time.sleep(4) # shutil.rmtree(self.config.scratch_dir) - Utils.execute_shell_command('rm -r {0}'.format(self.config.scratch_dir)) + Utils.execute_shell_command( + 'rm -r {0}'.format(self.config.scratch_dir)) def report(self): """ @@ -286,9 +291,11 @@ class EarthDiags(object): if not self.data_manager.file_exists(var.domain, var.short_name, startdate, member, 1, frequency=table.frequency): results.append((var, table, priority)) - Log.debug('Variable {0.short_name} not found in {1.name}', var, table) + Log.debug( + 'Variable {0.short_name} not found in {1.name}', var, table) else: - Log.result('Variable {0.short_name} found in {1.name}', var, table) + Log.result( + 'Variable {0.short_name} found in {1.name}', var, table) return results @@ -297,7 +304,8 @@ class EarthDiags(object): tables = set([result[1].name for result in results]) for table in tables: file_handler = open('{0}.{1}'.format(report_path, table), 'w') - table_results = [result for result in results if result[1].name == table] + table_results = [ + result for result in results if result[1].name == table] file_handler.write('\nTable {0}\n'.format(table)) file_handler.write('===================================\n') @@ -305,13 +313,17 @@ class EarthDiags(object): priorities = set([result[2] for result in table_results]) priorities = sorted(priorities) for priority in priorities: - priority_results = [result[0] for result in table_results if result[2] == priority] - priority_results = sorted(priority_results, key=lambda v: v.short_name) - file_handler.write('\nMissing variables with priority {0}:\n'.format(priority)) + priority_results = [result[0] + for result in table_results if result[2] == priority] + priority_results = sorted( + priority_results, key=lambda v: v.short_name) + file_handler.write( + '\nMissing variables with priority {0}:\n'.format(priority)) file_handler.write('--------------------------------------\n') for var in priority_results: - file_handler.write('{0:12}: {1}\n'.format(var.short_name, var.standard_name)) + file_handler.write('{0:12}: {1}\n'.format( + var.short_name, var.standard_name)) file_handler.flush() file_handler.close() @@ -327,8 +339,7 @@ class EarthDiags(object): mesh_mask = 'mesh_mask_nemo.{0}.nc'.format(model_version) new_mask_glo = 'new_maskglo.{0}.nc'.format(model_version) - mask_regions = 'mask.regions.{0}.nc'.format(model_version) - mask_regions_3d = 'mask.regions.3d.{0}.nc'.format(model_version) + basins = 'basins.{0}.nc'.format(model_version) if self.config.mesh_mask: mesh_mask_path = self.config.mesh_mask @@ -340,64 +351,60 @@ class EarthDiags(object): else: new_mask_glo_path = os.path.join(con_files, new_mask_glo) - if self.config.mask_regions: - mask_regions_path = self.config.mask_regions - else: - mask_regions_path = os.path.join(con_files, mask_regions) - - if self.config.mask_regions_3d: - mask_regions_3d_path = self.config.mask_regions_3d + if self.config.basins: + basins_path = self.config.basins else: - mask_regions_3d_path = os.path.join(con_files, mask_regions_3d) + basins_path = os.path.join(con_files, basins) if self.config.scratch_masks: - self._prepare_mesh_using_scratch(mask_regions, mask_regions_3d, mask_regions_3d_path, mask_regions_path, - mesh_mask, mesh_mask_path, new_mask_glo, new_mask_glo_path, - restore_meshes) + self._prepare_mesh_using_scratch( + basins, basins_path, + mesh_mask, mesh_mask_path, + new_mask_glo, new_mask_glo_path, + restore_meshes + ) else: self._copy_file(mesh_mask_path, 'mesh_hgr.nc', restore_meshes) self._link_file('mesh_hgr.nc', 'mesh_zgr.nc') self._link_file('mesh_hgr.nc', 'mask.nc') self._copy_file(new_mask_glo_path, 'new_maskglo.nc', restore_meshes) - self._copy_file(mask_regions_path, - 'mask_regions.nc', restore_meshes) - self._copy_file(mask_regions_3d_path, - 'mask_regions.3d.nc', restore_meshes) + self._copy_file(basins_path, 'basins.nc', restore_meshes) Log.result('Mesh files ready!') - def _prepare_mesh_using_scratch(self, mask_regions, mask_regions_3d, mask_regions_3d_path, mask_regions_path, - mesh_mask, mesh_mask_path, new_mask_glo, new_mask_glo_path, + def _prepare_mesh_using_scratch(self, basins, basins_path, + mesh_mask, mesh_mask_path, + new_mask_glo, new_mask_glo_path, restore_meshes): Utils.create_folder_tree(self.config.scratch_masks) Utils.give_group_write_permissions(self.config.scratch_masks) - mesh_mask_scratch_path = os.path.join(self.config.scratch_masks, mesh_mask) + mesh_mask_scratch_path = os.path.join( + self.config.scratch_masks, mesh_mask) if self._copy_file(mesh_mask_path, mesh_mask_scratch_path, restore_meshes): Utils.give_group_write_permissions(mesh_mask_scratch_path) self._link_file(mesh_mask_scratch_path, 'mesh_hgr.nc') self._link_file(mesh_mask_scratch_path, 'mesh_zgr.nc') self._link_file(mesh_mask_scratch_path, 'mask.nc') - new_maskglo_scratch_path = os.path.join(self.config.scratch_masks, new_mask_glo) + + new_maskglo_scratch_path = os.path.join( + self.config.scratch_masks, new_mask_glo) if self._copy_file(new_mask_glo_path, new_maskglo_scratch_path, restore_meshes): Utils.give_group_write_permissions(new_maskglo_scratch_path) self._link_file(new_maskglo_scratch_path, 'new_maskglo.nc') - mask_regions_scratch_path = os.path.join(self.config.scratch_masks, mask_regions) - if self._copy_file(mask_regions_path, - mask_regions_scratch_path, restore_meshes): - Utils.give_group_write_permissions(mask_regions_scratch_path) - self._link_file(mask_regions_scratch_path, 'mask_regions.nc') - mask_regions3d_scratch_path = os.path.join(self.config.scratch_masks, mask_regions_3d) - if self._copy_file(mask_regions_3d_path, - mask_regions3d_scratch_path, restore_meshes): - Utils.give_group_write_permissions(mask_regions3d_scratch_path) - self._link_file(mask_regions3d_scratch_path, 'mask_regions.3d.nc') + + basins_scratch_path = os.path.join( + self.config.scratch_masks, basins) + if self._copy_file(basins_path, basins_scratch_path, restore_meshes): + Utils.give_group_write_permissions(basins_path) + self._link_file(basins_scratch_path, 'basins.nc') def _copy_file(self, source, destiny, force): if not os.path.exists(source): - Log.user_warning('File {0} is not available for {1}', destiny, self.config.experiment.model_version) + Log.user_warning('File {0} is not available for {1}', + destiny, self.config.experiment.model_version) Log.debug('Looking for it in {0}', source) return False @@ -417,7 +424,8 @@ class EarthDiags(object): def _link_file(self, source, destiny): if not os.path.exists(source): - Log.user_warning('File {0} is not available for {1}', destiny, self.config.experiment.model_version) + Log.user_warning('File {0} is not available for {1}', + destiny, self.config.experiment.model_version) return if os.path.lexists(destiny): diff --git a/earthdiagnostics/frequency.py b/earthdiagnostics/frequency.py index 5cd15f48f509e6242a0cd11050cc9a7eef5e2cd4..45391649a1c1b9629aa20a526278cf728525c6b3 100644 --- a/earthdiagnostics/frequency.py +++ b/earthdiagnostics/frequency.py @@ -5,23 +5,31 @@ class Frequency(object): """Time frequency""" - _recognized = {'f': 'fx', 'fx': 'fx', 'fixed': 'fx', - 'c': 'clim', 'clim': 'clim', 'climatology': 'clim', 'monclim': 'clim', '1hrclimmon': 'clim', - 'dec': 'dec', 'decadal': 'dec', - 'y': 'year', 'yr': 'year', 'year': 'year', 'yearly': 'year', - 'm': 'mon', '1m': 'mon', 'mon': 'mon', 'monthly': 'mon', 'mm': 'mon', - 'w': 'week', '1w': 'week', 'week': 'week', 'weekly': 'week', - 'd': 'day', '1d': 'day', 'daily': 'day', 'day': 'day', - '15': '15hr', '15h': '15hr', '15hr': '15hr', '15_hourly': '15hr', '15hourly': '15hr', - '15 hourly': '15hr', - '6': '6hr', '6h': '6hr', '6hr': '6hr', '6_hourly': '6hr', '6hourly': '6hr', '6 hourly': '6hr', - '3': '3hr', '3h': '3hr', '3hr': '3hr', '3_hourly': '3hr', '3hourly': '3hr', '3 hourly': '3hr', - '1': '1hr', 'hr': '1hr', '1h': '1hr', 'hourly': '1hr', '1hr': '1hr', '1 hourly': '1hr', - '450mn': '450mn', - 'subhr': 'subhr'} + _recognized = { + 'f': 'fx', 'fx': 'fx', 'fixed': 'fx', + 'c': 'clim', 'clim': 'clim', 'climatology': 'clim', 'monclim': 'clim', '1hrclimmon': 'clim', + 'monc': 'clim', + 'dec': 'dec', 'decadal': 'dec', + 'y': 'year', 'yr': 'year', 'year': 'year', 'yearly': 'year', + 'm': 'mon', '1m': 'mon', 'mon': 'mon', 'monthly': 'mon', 'mm': 'mon', + 'w': 'week', '1w': 'week', 'week': 'week', 'weekly': 'week', + 'd': 'day', '1d': 'day', 'daily': 'day', 'day': 'day', + '15': '15hr', '15h': '15hr', '15hr': '15hr', '15_hourly': '15hr', '15hourly': '15hr', + '15 hourly': '15hr', + '6': '6hr', '6h': '6hr', '6hr': '6hr', '6_hourly': '6hr', '6hourly': '6hr', '6 hourly': '6hr', + '3': '3hr', '3h': '3hr', '3hr': '3hr', '3_hourly': '3hr', '3hourly': '3hr', '3 hourly': '3hr', + '1': '1hr', 'hr': '1hr', '1h': '1hr', 'hourly': '1hr', '1hr': '1hr', '1 hourly': '1hr', + '450mn': '450mn', + 'subhr': 'subhr' + } def __init__(self, freq): freq = freq.lower() + if freq.endswith('cm'): + freq = freq[:-2] + self.point = freq.endswith('pt') + if self.point: + freq = freq[:-2] try: self.frequency = Frequency._recognized[freq] except KeyError: @@ -49,18 +57,25 @@ class Frequency(object): str """ from earthdiagnostics.variable import VariableType - if self == Frequencies.daily: - freq_str = 'daily_{0}'.format(VariableType.to_str(vartype)) + if self == Frequencies.climatology: + return 'clim' + elif self == Frequencies.fixed: + return 'fx' + elif self == Frequencies.decadal: + freq_str = 'decadal' + elif self == Frequencies.yearly: + freq_str = 'yearly' + elif self == Frequencies.monthly: + freq_str = 'monthly' elif self == Frequencies.weekly: - freq_str = 'weekly_{0}'.format(VariableType.to_str(vartype)) - elif self == Frequencies.climatology: - freq_str = 'clim' - elif self in (Frequencies.three_hourly, Frequencies.six_hourly, Frequencies.hourly): + freq_str = 'weekly' + elif self == Frequencies.daily: + freq_str = 'daily' + elif self.frequency.endswith('hr'): freq_str = self.frequency[:-2] + 'hourly' - if vartype != VariableType.MEAN: - freq_str = '{0}_{1}'.format(freq_str, VariableType.to_str(vartype)) - else: - freq_str = 'monthly_{0}'.format(VariableType.to_str(vartype)) + self.point = True + if not self.point or vartype != VariableType.MEAN: + freq_str = '{0}_{1}'.format(freq_str, VariableType.to_str(vartype)) return freq_str @staticmethod @@ -88,6 +103,7 @@ class Frequencies(object): fixed = Frequency('fx') climatology = Frequency('clim') + decadal = Frequency('dec') yearly = Frequency('year') monthly = Frequency('mon') weekly = Frequency('week') diff --git a/earthdiagnostics/ocean/heatcontentlayer.py b/earthdiagnostics/ocean/heatcontentlayer.py index d15e3274ebaad500d7a833c0df9740eccf4de2af..1f64bcfc23685fcd2148c5e753f4bb39ae205afe 100644 --- a/earthdiagnostics/ocean/heatcontentlayer.py +++ b/earthdiagnostics/ocean/heatcontentlayer.py @@ -47,7 +47,7 @@ class HeatContentLayer(Diagnostic): "Diagnostic alias for the configuration file" def __init__(self, data_manager, startdate, member, chunk, box, areas, - weight, layers, basins, data_convention): + weight, layers, basins, data_convention, min_level, max_level): Diagnostic.__init__(self, data_manager) self.startdate = startdate self.member = member @@ -57,9 +57,9 @@ class HeatContentLayer(Diagnostic): self.weight = weight self.layers = layers self.basins = basins - self.required_vars = ['so', 'mlotst'] - self.generated_vars = ['scvertsum'] self.data_convention = data_convention + self.min_level = min_level + self.max_level = max_level def __str__(self): return 'Heat content layer Startdate: {0} Member: {1} Chunk: {2} Box: {3}'.format(self.startdate, self.member, @@ -108,6 +108,8 @@ class HeatContentLayer(Diagnostic): mask = mesh.get_landsea_mask() depth = mesh.get_depth(cell_point='W') weight = ohc.get_weights(layers, mask, e3t, depth) + max_level, min_level = cls._get_used_levels(weight) + weight[0] = weight[0][:, min_level:max_level,:, :] del mask, depth, e3t @@ -115,7 +117,7 @@ class HeatContentLayer(Diagnostic): job_list.append(HeatContentLayer( diags.data_manager, startdate, member, chunk, box, areas, weight, layers, basins, - diags.config.data_convention + diags.config.data_convention, min_level, max_level )) return job_list @@ -161,6 +163,18 @@ class HeatContentLayer(Diagnostic): self.startdate, self.member, self.chunk, box=self.box) + @classmethod + def _get_used_levels(cls, weight): + # Now we will reduce to the levels with any weight != 0 to avoid loading too much data on memory + levels = weight[0].shape[1] + min_level = 0 + while min_level < levels and not weight[0][:, min_level, :].any(): + min_level += 1 + max_level = min_level + while max_level < (levels - 1) and weight[0][:, max_level + 1, :].any(): + max_level += 1 + return max_level, min_level + def compute(self): """Run the diagnostic""" thetao_file = TempFile.get() @@ -171,23 +185,30 @@ class HeatContentLayer(Diagnostic): handler = Utils.open_cdf(thetao_file) Utils.convert_units(handler.variables['thetao'], 'K') heatc_sl, heatc_sl1D = ohc.compute(self.layers, self.weight, - handler.variables['thetao'][:], + handler.variables['thetao'][:,self.min_level:self.max_level,:,:], self.areas) handler.sync() handler.renameVariable('thetao', 'heatc_sl') results = TempFile.get() handler_results = Utils.open_cdf(results, 'w') + lat_name = next(alias for alias in ('lat', 'latitude') + if alias in handler.variables.keys()) + lon_name = next(alias for alias in ('lon', 'longitude') + if alias in handler.variables.keys()) Utils.copy_variable(handler, handler_results, 'time', True, True) - Utils.copy_variable(handler, handler_results, 'i', True, True) - Utils.copy_variable(handler, handler_results, 'j', True, True) - # Utils.rename_variables(results, {'x': 'i', 'y': 'j'}, False) + Utils.copy_variable(handler, handler_results, 'i', False, True) + Utils.copy_variable(handler, handler_results, 'j', False, True) + Utils.copy_variable(handler, handler_results, lat_name, True, True) + Utils.copy_variable(handler, handler_results, lon_name, True, True) var = handler_results.createVariable('heatc', float, ('time', 'j', 'i'), fill_value=1.e20) - var.units = 'J m-2' + var.units = 'J m-2''' + var.coordinates = ' '.join((lat_name, lon_name)) handler_results.sync() - handler_results.variables['heatc'][:] = heatc_sl[0] # temporary fix, needs to loop over layers + # temporary fix, needs to loop over layers + handler_results.variables['heatc'][:] = heatc_sl[0] handler_results.close() results1D = TempFile.get() @@ -195,8 +216,10 @@ class HeatContentLayer(Diagnostic): Utils.copy_variable(handler, handler_results1D, 'time', True, True) handler_results1D.createDimension('region', len(self.basins)) handler_results1D.createDimension('region_length', 50) - var_region = handler_results1D.createVariable('region', 'S1', ('region', 'region_length')) - var_ohc1D = handler_results1D.createVariable('heatcsum', float, ('time', 'region',),) + var_region = handler_results1D.createVariable( + 'region', 'S1', ('region', 'region_length')) + var_ohc1D = handler_results1D.createVariable( + 'heatcsum', float, ('time', 'region',),) handler_results1D.sync() for i, basin in enumerate(self.basins): var_region[i, ...] = netCDF4.stringtoarr(basin.name, 50) @@ -206,3 +229,4 @@ class HeatContentLayer(Diagnostic): Utils.setminmax(results, 'heatc') self.heatc.set_local_file(results) self.heatcsum.set_local_file(results1D) + diff --git a/earthdiagnostics/ocean/regionmean.py b/earthdiagnostics/ocean/regionmean.py index c70c950085fcef2dc3072608baebe6c67a5b348a..50da4315c38118c9aa87e73ba282ac217c334e98 100644 --- a/earthdiagnostics/ocean/regionmean.py +++ b/earthdiagnostics/ocean/regionmean.py @@ -14,7 +14,8 @@ from earthdiagnostics.box import Box from earthdiagnostics.constants import Basins from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, \ DiagnosticIntOption, DiagnosticDomainOption, \ - DiagnosticBoolOption, DiagnosticBasinListOption, DiagnosticVariableOption + DiagnosticBoolOption, DiagnosticBasinListOption, \ + DiagnosticVariableListOption, DiagnosticFrequencyOption from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.utils import Utils, TempFile @@ -52,7 +53,8 @@ class RegionMean(Diagnostic): "Diagnostic alias for the configuration file" def __init__(self, data_manager, startdate, member, chunk, domain, - variable, box, save3d, variance, basins, grid_point): + variable, box, save3d, variance, basins, grid_point, + frequency): Diagnostic.__init__(self, data_manager) self.startdate = startdate self.member = member @@ -64,6 +66,7 @@ class RegionMean(Diagnostic): self.variance = variance self.basins = basins self.grid_point = grid_point + self.frequency = frequency self.declared = {} @@ -97,30 +100,24 @@ class RegionMean(Diagnostic): :type options: list[str] :return: """ - options_available = (DiagnosticDomainOption(), - DiagnosticVariableOption(diags.data_manager.config.var_manager), - DiagnosticOption('grid_point', 'T'), - DiagnosticBasinListOption('basins', 'global'), - DiagnosticIntOption('min_depth', -1), - DiagnosticIntOption('max_depth', -1), - DiagnosticIntOption('min_lat', -1), - DiagnosticIntOption('max_lat', -1), - DiagnosticIntOption('min_lon', -1), - DiagnosticIntOption('max_lon', -1), - DiagnosticBoolOption('save3D', True), - DiagnosticBoolOption('variance', False), - DiagnosticOption('grid', '')) + options_available = ( + DiagnosticDomainOption(), + DiagnosticVariableListOption( + diags.data_manager.config.var_manager, 'variable'), + DiagnosticBasinListOption('basins', 'global'), + DiagnosticOption('grid_point', 'T'), + DiagnosticIntOption('min_depth', -1), + DiagnosticIntOption('max_depth', -1), + DiagnosticBoolOption('save3D', True), + DiagnosticBoolOption('variance', False), + DiagnosticOption('grid', ''), + DiagnosticFrequencyOption('frequency', diags.config.frequency), + ) options = cls.process_options(options, options_available) box = Box() box.min_depth = options['min_depth'] box.max_depth = options['max_depth'] - if options['min_lat'] != -1: - box.min_lat = options['min_lat'] - box.max_lat = options['max_lat'] - if options['min_lon'] != -1 or options['max_lon'] != -1: - box.min_lon = options['min_lon'] - box.max_lon = options['max_lon'] basins = options['basins'] if not basins: @@ -128,13 +125,17 @@ class RegionMean(Diagnostic): return() job_list = list() - for startdate, member, chunk in diags.config.experiment.get_chunk_list(): - job = RegionMean(diags.data_manager, startdate, member, chunk, - options['domain'], options['variable'], box, - options['save3D'], options['variance'], - options['basins'], - options['grid_point'].lower()) - job_list.append(job) + for var in options['variable']: + for startdate, member, chunk in diags.config.experiment.get_chunk_list(): + job = RegionMean( + diags.data_manager, startdate, member, chunk, + options['domain'], var, box, + options['save3D'], options['variance'], + options['basins'], + options['grid_point'].lower(), + options['frequency'], + ) + job_list.append(job) return job_list @@ -162,20 +163,7 @@ class RegionMean(Diagnostic): self.basins.sort() for basin in self.basins: masks[basin] = Utils.get_mask(basin) - mesh = Nemo('mesh_hgr.nc', 'mask_regions.nc') - if ( - self.box.min_lat is not -1 and self.box.max_lat is not -1 and - self.box.min_lon is not -1 and self.box.max_lat is not -1 - ): - name = '{0}_{1}'.format( - Box.get_lat_str(self.box), Box.get_lon_str(self.box) - ) - - masks[name] = mesh.get_region_mask(self.box.min_lat, - self.box.max_lat, - self.box.min_lon, - self.box.max_lon) if has_levels: self._meand_3d_variable(data, mesh, masks) else: @@ -192,7 +180,9 @@ class RegionMean(Diagnostic): e3 = self._rename_depth(e3) e3.coord('depth').bounds = data.coord('depth').bounds if self.box.min_depth is not -1 and self.box.max_depth is not -1: - depth_constraint = iris.Constraint(depth=lambda c: self.box.min_depth <= c <= self.box.max_depth) + depth_constraint = iris.Constraint( + depth=lambda c: self.box.min_depth <= c <= self.box.max_depth + ) e3 = e3.extract(depth_constraint) data = data.extract(depth_constraint) volcello = areacello*e3.data.astype(np.float32) @@ -204,11 +194,10 @@ class RegionMean(Diagnostic): def _try_load_cube(self, number): try: - cube = iris.load_cube('mesh_hgr.nc', 'e{0}{1}'.format(number, - self.grid_point)) + cube = iris.load_cube('mesh_hgr.nc', f'e{number}{self.grid_point}') except iris.exceptions.ConstraintMismatchError: - cube = iris.load_cube('mesh_hgr.nc', 'e{0}{1}_0'.format(number, - self.grid_point)) + cube = iris.load_cube( + 'mesh_hgr.nc', f'e{number}{self.grid_point}_0') cube = iris.util.squeeze(cube) dims = len(cube.shape) try: diff --git a/earthdiagnostics/ocean/siasiesiv.py b/earthdiagnostics/ocean/siasiesiv.py index 56b49c81d50123ae1f95a3f3aac3b6f272269780..24cf3c0e604991671113583716f456770757cbf9 100644 --- a/earthdiagnostics/ocean/siasiesiv.py +++ b/earthdiagnostics/ocean/siasiesiv.py @@ -151,6 +151,7 @@ class Siasiesiv(Diagnostic): sic_slices = [] for sic_data in sic.slices_over('time'): sic_data.data = np.ma.filled(sic_data.data, 0.0).astype(np.float32) + sic_data.data[np.isinf(sic_data.data)] = 0.0 sic_slices.append(sic_data) mesh = Nemo('mesh_hgr.nc', 'mask_regions.nc') areacello = mesh.get_areacello(cell_point='T') @@ -161,6 +162,7 @@ class Siasiesiv(Diagnostic): sit_slices = [] for sit_data in sit.slices_over('time'): sit_data.data = np.ma.filled(sit_data.data, 0.0).astype(np.float32) + sit_data.data[np.isinf(sit_data.data)] = 0.0 sit_slices.append(sit_data) results = siasie.compute(gphit, areacello, sic_slices, self.masks, sit_slices) self.results['siextentn'] = results[0] diff --git a/earthdiagnostics/utils.py b/earthdiagnostics/utils.py index a9805a7d1f96398c5951353adbb76d63b1724693..cc81b2bd10795466c3cd7e9b556f7ec4430640cf 100644 --- a/earthdiagnostics/utils.py +++ b/earthdiagnostics/utils.py @@ -82,15 +82,11 @@ class Utils(object): basin = Basins().parse(basin) if basin != Basins().Global: try: - if with_levels: - mask_handler = Utils.open_cdf('mask_regions.3d.nc') - mask = mask_handler.variables[basin.name][0, ...] - else: - mask_handler = Utils.open_cdf('mask_regions.nc') - mask = mask_handler.variables[basin.name][:, 0, :] - mask_handler.close() + basins = iris.load_cube('basins.nc') + return basins.extract(iris.Constraint(region=basin.name)).data except IOError: - raise Exception('File mask.regions.nc is required for basin {0}'.format(basin)) + raise Exception( + 'File basins.nc is required for basin {0}'.format(basin)) else: mask_handler = Utils.open_cdf('mask.nc') mask = mask_handler.variables['tmask'][0, 0, :] @@ -121,12 +117,14 @@ class Utils(object): Utils.nco().ncatted( input=filename, output=filename, - options=('-h -a valid_max,{0},m,f,{1}'.format(variable, values[0]),) + options=( + '-h -a valid_max,{0},m,f,{1}'.format(variable, values[0]),) ) Utils.nco().ncatted( input=filename, output=filename, - options=('-h -a valid_min,{0},m,f,{1}'.format(variable, values[1]),) + options=( + '-h -a valid_min,{0},m,f,{1}'.format(variable, values[1]),) ) handler.close() @@ -149,7 +147,8 @@ class Utils(object): Utils.rename_variables """ - Utils.rename_variables(filepath, {old_name: new_name}, must_exist, rename_dimension) + Utils.rename_variables( + filepath, {old_name: new_name}, must_exist, rename_dimension) @staticmethod def rename_variables(filepath, dic_names, must_exist=True, rename_dimension=True): @@ -175,18 +174,21 @@ class Utils(object): raise ValueError('{0} original name is the same as the new') original_handler = Utils.open_cdf(filepath) - original_names = set(original_handler.variables.keys()).union(original_handler.dimensions.keys()) + original_names = set(original_handler.variables.keys()).union( + original_handler.dimensions.keys()) if not any((True for x in dic_names.keys() if x in original_names)): original_handler.close() if must_exist: - raise Exception("Variables {0} does not exist in file {1}".format(','.join(dic_names.keys()), filepath)) + raise Exception("Variables {0} does not exist in file {1}".format( + ','.join(dic_names.keys()), filepath)) return temp = TempFile.get() new_handler = Utils.open_cdf(temp, 'w') for attribute in original_handler.ncattrs(): original = getattr(original_handler, attribute) - setattr(new_handler, attribute, Utils.convert_to_ascii_if_possible(original)) + setattr(new_handler, attribute, + Utils.convert_to_ascii_if_possible(original)) for dimension in original_handler.dimensions.keys(): Utils.copy_dimension(original_handler, new_handler, dimension, new_names=dic_names, rename_dimension=rename_dimension) @@ -285,7 +287,8 @@ class Utils(object): if old_name in handler.dimensions: handler.renameDimension(old_name, new_name) elif must_exist: - raise Exception("Dimension {0} does not exist in file {1}".format(old_name, filepath)) + raise Exception( + "Dimension {0} does not exist in file {1}".format(old_name, filepath)) if old_name in handler.variables: if new_name not in handler.variables: @@ -294,9 +297,11 @@ class Utils(object): if hasattr(var, 'coordinates') and " {0} ".format(old_name) in var.coordinates: new_coordinates = var.coordinates.replace(" {0} ".format(old_name), " {0} ".format(new_name)) - var.coordinates = Utils.convert_to_ascii_if_possible(new_coordinates) + var.coordinates = Utils.convert_to_ascii_if_possible( + new_coordinates) elif must_exist: - raise Exception("Variable {0} does not exist in file {1}".format(old_name, filepath)) + raise Exception( + "Variable {0} does not exist in file {1}".format(old_name, filepath)) handler.sync() @staticmethod @@ -516,9 +521,11 @@ class Utils(object): """Number of available virtual or physical CPUs on this system""" if Utils._cpu_count is None: try: - match = re.search(r'(?m)^Cpus_allowed:\s*(.*)$', open('/proc/self/status').read()) + match = re.search(r'(?m)^Cpus_allowed:\s*(.*)$', + open('/proc/self/status').read()) if match: - res = bin(int(match.group(1).replace(',', ''), 16)).count('1') + res = bin( + int(match.group(1).replace(',', ''), 16)).count('1') if res > 0: Utils._cpu_count = res except IOError: @@ -546,12 +553,14 @@ class Utils(object): Log.debug('Reformatting to netCDF-4') temp = TempFile.get() - Utils.execute_shell_command(["nccopy", "-4", "-d4", "-s", filetoconvert, temp]) + Utils.execute_shell_command( + ["nccopy", "-4", "-d4", "-s", filetoconvert, temp]) shutil.move(temp, filetoconvert) @classmethod def _is_compressed_netcdf4(cls, filetoconvert): - ncdump_result = Utils.execute_shell_command('ncdump -hs {0}'.format(filetoconvert), Log.NO_LOG) + ncdump_result = Utils.execute_shell_command( + 'ncdump -hs {0}'.format(filetoconvert), Log.NO_LOG) ncdump_result = ncdump_result[0].replace('\t', '').split('\n') if any(':_Shuffle = "true"' in line for line in ncdump_result) and \ any(':_DeflateLevel = 4' in line for line in ncdump_result): @@ -638,19 +647,23 @@ class Utils(object): # Just in case the variable we are copying match a dimension name return original_var = source.variables[variable] - new_var = destiny.createVariable(new_name, original_var.datatype, translated_dimensions) + new_var = destiny.createVariable( + new_name, original_var.datatype, translated_dimensions) Utils.copy_attributes(new_var, original_var) if hasattr(new_var, 'coordinates'): - coords = [new_names[coord] if coord in new_names else coord for coord in new_var.coordinates.split(' ')] + coords = [ + new_names[coord] if coord in new_names else coord for coord in new_var.coordinates.split(' ')] coords = [coord for coord in coords if coord] if coords: - new_var.coordinates = Utils.convert_to_ascii_if_possible(' '.join(coords)) + new_var.coordinates = Utils.convert_to_ascii_if_possible( + ' '.join(coords)) new_var[:] = original_var[:] @staticmethod def _copy_dimensions(add_dimensions, destiny, must_exist, new_names, rename_dimension, source, variable): if rename_dimension: - translated_dimensions = Utils._translate(source.variables[variable].dimensions, new_names) + translated_dimensions = Utils._translate( + source.variables[variable].dimensions, new_names) else: translated_dimensions = list(source.variables[variable].dimensions) if not set(translated_dimensions).issubset(destiny.dimensions): @@ -658,7 +671,8 @@ class Utils(object): raise Exception('Variable {0} can not be added because dimensions does not match: ' '{1} {2}'.format(variable, translated_dimensions, destiny.dimensions)) for dimension in source.variables[variable].dimensions: - Utils.copy_dimension(source, destiny, dimension, must_exist, new_names, rename_dimension) + Utils.copy_dimension( + source, destiny, dimension, must_exist, new_names, rename_dimension) return translated_dimensions @staticmethod @@ -727,7 +741,8 @@ class Utils(object): concatenated = dict() for var in handler_variable.variables: if var not in handler_total.variables: - Utils.copy_variable(handler_variable, handler_total, var, add_dimensions=True) + Utils.copy_variable( + handler_variable, handler_total, var, add_dimensions=True) else: variable = handler_variable.variables[var] if 'time' not in variable.dimensions: @@ -811,7 +826,8 @@ class Utils(object): import cf_units new_unit = cf_units.Unit(new_units, calendar=calendar) old_unit = cf_units.Unit(var_handler.units, calendar=old_calendar) - var_handler[:] = old_unit.convert(np.array(var_handler[:]), new_unit, inplace=True) + var_handler[:] = old_unit.convert( + np.array(var_handler[:]), new_unit, inplace=True) if 'valid_min' in var_handler.ncattrs(): var_handler.valid_min = old_unit.convert(float(var_handler.valid_min), new_unit, inplace=True) @@ -840,9 +856,11 @@ class Utils(object): continue else: if os.path.exists(os.path.join(destiny_path, file_compressed.name)): - os.remove(os.path.join(destiny_path, file_compressed.name)) + os.remove(os.path.join( + destiny_path, file_compressed.name)) tar.extract(file_compressed, destiny_path) - Log.debug('File {0} extracted', os.path.basename(file_compressed.name)) + Log.debug('File {0} extracted', + os.path.basename(file_compressed.name)) tar.close() @staticmethod @@ -865,9 +883,11 @@ class Utils(object): else: option = '' try: - Utils.execute_shell_command('gunzip{1} {0}'.format(filepath, option)) + Utils.execute_shell_command( + 'gunzip{1} {0}'.format(filepath, option)) except Exception as ex: - raise Utils.UnzipException('Can not unzip {0}: {1}'.format(filepath, ex)) + raise Utils.UnzipException( + 'Can not unzip {0}: {1}'.format(filepath, ex)) class UnzipException(Exception): """Exception raised when unzip fails""" @@ -919,7 +939,8 @@ class TempFile(object): if filename: path = os.path.join(TempFile.scratch_folder, filename) else: - file_descriptor, path = tempfile.mkstemp(dir=TempFile.scratch_folder, prefix=TempFile.prefix, suffix=suffix) + file_descriptor, path = tempfile.mkstemp( + dir=TempFile.scratch_folder, prefix=TempFile.prefix, suffix=suffix) path = str(path) os.close(file_descriptor) diff --git a/earthdiagnostics/variable.py b/earthdiagnostics/variable.py index d6fea362f5d2a10bb4b22ca1a7b2d18f49a7fb38..bb1ccb42fd69c1865875055446498cc2cdd19148 100644 --- a/earthdiagnostics/variable.py +++ b/earthdiagnostics/variable.py @@ -4,6 +4,7 @@ import csv import glob import json import os +from collections import Counter import openpyxl from bscearth.utils.log import Log @@ -24,8 +25,10 @@ class VariableManager(object): """Class for translating variable alias into standard names and provide the correct description for them""" def __init__(self): - self._cmor_tables_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'cmor_tables') - self._aliases_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'variable_alias') + self._cmor_tables_folder = os.path.join( + os.path.dirname(os.path.realpath(__file__)), 'cmor_tables') + self._aliases_folder = os.path.join(os.path.dirname( + os.path.realpath(__file__)), 'variable_alias') self.clean() def clean(self): @@ -50,7 +53,8 @@ class VariableManager(object): return self._dict_aliases[original_name.lower()][1] except KeyError: if not silent: - Log.warning('Variable {0} is not defined in the CMOR table. Please add it'.format(original_name)) + Log.warning('Variable {0} is not defined in the CMOR table. Please add it'.format( + original_name)) return None def get_all_variables(self): @@ -78,7 +82,8 @@ class VariableManager(object): return self._dict_aliases[original_name.lower()] except KeyError: if not silent: - Log.warning('Variable {0} is not defined in the CMOR table. Please add it'.format(original_name)) + Log.warning('Variable {0} is not defined in the CMOR table. Please add it'.format( + original_name)) return None, None def load_variables(self, table_name): @@ -117,11 +122,13 @@ class VariableManager(object): raise Exception('Data convention {0} unknown'.format(self.table_name)) def _get_csv_path(self, table_name): - csv_table_path = os.path.join(self._cmor_tables_folder, '{0}.csv'.format(table_name)) + csv_table_path = os.path.join( + self._cmor_tables_folder, '{0}.csv'.format(table_name)) return csv_table_path def _get_json_folder(self): - json_folder = os.path.join(self._cmor_tables_folder, '{0}/Tables'.format(self.table_name)) + json_folder = os.path.join( + self._cmor_tables_folder, '{0}/Tables'.format(self.table_name)) return json_folder def _load_file(self, csv_table_path, default=False): @@ -151,13 +158,13 @@ class VariableManager(object): self._dict_variables[var.short_name.lower()] = var def _load_json(self, json_folder): - executor = ThreadPoolExecutor() + # executor = ThreadPoolExecutor(max_workers=1) for file_name in os.listdir(json_folder): if file_name in ('CMIP6_grids.json', 'CMIP6_formula_terms.json'): continue - - executor.submit(self._load_json_file, os.path.join(json_folder, file_name)) - executor.shutdown(True) + self._load_json_file(os.path.join(json_folder, file_name)) + # executor.submit(self._load_json_file, os.path.join(json_folder, file_name)) + # executor.shutdown(True) def _load_json_file(self, json_path): with open(json_path) as json_file: @@ -169,12 +176,20 @@ class VariableManager(object): if 'variable_entry' in data: Log.debug('Parsing file {0}'.format(json_path)) table_id = data['Header']['table_id'][6:] - table = CMORTable(table_id, - Frequency(data['variable_entry'].values()[0]['frequency']), - data['Header']['table_date'], - ModelingRealms.parse(data['Header']['realm'])) + + var_freqs = (var['frequency'] + for var in data['variable_entry'].values()) + table_freq, _ = Counter(var_freqs).most_common(1)[0] + table = CMORTable( + table_id, + Frequency(table_freq), + data['Header']['table_date'], + ModelingRealms.parse(data['Header']['realm'].split(' ')[0]) + ) self.tables[table_id] = table self._load_json_variables(data['variable_entry'], table) + else: + Log.debug('Skipping file {0}'.format(json_path)) def _load_json_variables(self, json_data, table): for short_name in json_data.keys(): @@ -182,7 +197,7 @@ class VariableManager(object): pass short_name = str.strip(str(short_name)) if short_name.lower() in self._dict_variables: - self._dict_variables[short_name.lower()].tables.append(table) + self._dict_variables[short_name.lower()].add_table(table) continue variable = Variable() try: @@ -218,13 +233,15 @@ class VariableManager(object): cmor_vars.append(self._dict_variables[alias]) if len(cmor_vars) == 0: - Log.warning('Aliases {0} could not be mapped to any variable'.format(aliases)) + Log.warning( + 'Aliases {0} could not be mapped to any variable'.format(aliases)) continue elif len(cmor_vars) > 1: non_default = [var for var in cmor_vars if not var.default] if len(non_default) == 1: for default in [var for var in cmor_vars if var not in non_default]: - del self._dict_variables[default.short_name.lower()] + del self._dict_variables[default.short_name.lower( + )] cmor_vars = non_default else: @@ -254,7 +271,8 @@ class VariableManager(object): cmor_var.known_aliases.append(alias_object) def _get_aliases_csv_path(self, filename): - csv_table_path = os.path.join(self._aliases_folder, '{0}.csv'.format(filename)) + csv_table_path = os.path.join( + self._aliases_folder, '{0}.csv'.format(filename)) return csv_table_path def create_aliases_dict(self): @@ -271,11 +289,13 @@ class VariableManager(object): self._dict_aliases[alias.alias] = (alias, cmor_var) def _get_xlsx_path(self): - xlsx_table_path = os.path.join(self._cmor_tables_folder, '{0}.xlsx'.format(self.table_name)) + xlsx_table_path = os.path.join( + self._cmor_tables_folder, '{0}.xlsx'.format(self.table_name)) if os.path.isfile(xlsx_table_path): return xlsx_table_path - xlsx_table_path = os.path.join(self._cmor_tables_folder, self.table_name, 'etc', '*.xlsx') + xlsx_table_path = os.path.join( + self._cmor_tables_folder, self.table_name, 'etc', '*.xlsx') xlsx_table_path = glob.glob(xlsx_table_path) if len(xlsx_table_path) == 1: return xlsx_table_path[0] @@ -288,7 +308,8 @@ class VariableManager(object): data_sheet = excel.worksheets[0] for row in data_sheet.rows: if row[1].value in excel.sheetnames: - table_data[row[1].value] = (Frequency(row[2].value), 'Date missing') + table_data[row[1].value] = ( + Frequency(row[2].value), 'Date missing') for sheet_name in excel.sheetnames: sheet = excel[sheet_name] if sheet.title == 'Primday': @@ -314,7 +335,8 @@ class VariableManager(object): def _read_realm_from_json(self, table_name): for prefix in ('CMIP6', 'PRIMAVERA'): - json_path = os.path.join(self._get_json_folder(), '{0}_{1}.json'.format(prefix, table_name)) + json_path = os.path.join(self._get_json_folder( + ), '{0}_{1}.json'.format(prefix, table_name)) if os.path.isfile(json_path): with open(json_path) as json_file: json_data = json_file.read() @@ -412,7 +434,8 @@ class Variable(object): if 'out_name' in json_var: self.short_name = json_var['out_name'].strip() else: - raise VariableJsonException('Variable {0} has no out name defined'.format(variable)) + raise VariableJsonException( + 'Variable {0} has no out name defined'.format(variable)) self.standard_name = json_var['standard_name'].strip() self.long_name = json_var['long_name'].strip() @@ -443,14 +466,16 @@ class Variable(object): """ if len(domains) > 1: - Log.warning('Multiple modeling realms assigned to variable {0}: {1}. ', self, domains) + Log.warning( + 'Multiple modeling realms assigned to variable {0}: {1}. ', self, domains) parsed = [] for domain in domains: parsed.append(ModelingRealms.parse(domain)) selected = self._select_most_specific(parsed) if selected: - Log.warning('We will use {0} as it is the most specific', selected) + Log.warning( + 'We will use {0} as it is the most specific', selected) return selected Log.warning('We will use {0} as it is the first on the list and there is no one that is more specific', @@ -458,7 +483,8 @@ class Variable(object): return parsed[0] elif len(domains) == 0: - Log.warning('Variable {0} has no modeling realm defined'.format(self.short_name)) + Log.warning( + 'Variable {0} has no modeling realm defined'.format(self.short_name)) return None else: return ModelingRealms.parse(domains[0]) @@ -516,7 +542,8 @@ class Variable(object): if self.domain: table_name = self.domain.get_table_name(frequency, data_convention) return CMORTable(table_name, frequency, 'December 2013', self.domain) - raise ValueError('Can not get table for {0} and frequency {1}'.format(self, frequency)) + raise ValueError( + 'Can not get table for {0} and frequency {1}'.format(self, frequency)) @staticmethod def _select_most_specific(parsed): diff --git a/earthdiagnostics/work_manager.py b/earthdiagnostics/work_manager.py index 018dd601de619c883f26a044f86e94ed33f04dce..c3d2ccd92af3b08dae0b6c9e92e27e5664195997 100644 --- a/earthdiagnostics/work_manager.py +++ b/earthdiagnostics/work_manager.py @@ -97,7 +97,7 @@ class WorkManager(object): self.lock = threading.Lock() self.lock.acquire() - for job in self.jobs[DiagnosticStatus.WAITING]: + for job in self.jobs[DiagnosticStatus.WAITING].copy(): job.request_data() job.declare_data_generated() job.subscribe(self, self._job_status_changed) diff --git a/src/mixdiags b/src/mixdiags new file mode 160000 index 0000000000000000000000000000000000000000..199979700e38d3918a82bd2052855d46375e48ab --- /dev/null +++ b/src/mixdiags @@ -0,0 +1 @@ +Subproject commit 199979700e38d3918a82bd2052855d46375e48ab diff --git a/test/unit/ocean/test_region_mean.py b/test/unit/ocean/test_region_mean.py index 35f8a09cb38d5e64793fac60d43e1c912409b9ae..d9cf5d047443305319d518629da22ecbcf3ee70e 100644 --- a/test/unit/ocean/test_region_mean.py +++ b/test/unit/ocean/test_region_mean.py @@ -4,8 +4,9 @@ from unittest import TestCase from mock import Mock, patch from earthdiagnostics.box import Box +from earthdiagnostics.frequency import Frequencies from earthdiagnostics.constants import Basins -from earthdiagnostics.diagnostic import DiagnosticOptionError, DiagnosticVariableOption +from earthdiagnostics.diagnostic import DiagnosticOptionError, DiagnosticVariableListOption from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.ocean.regionmean import RegionMean from earthdiagnostics.utils import TempFile @@ -16,7 +17,9 @@ class TestRegionMean(TestCase): def setUp(self): self.data_manager = Mock() self.diags = Mock() - self.diags.config.experiment.get_chunk_list.return_value = (('20010101', 0, 0), ('20010101', 0, 1)) + self.diags.config.experiment.get_chunk_list.return_value = ( + ('20010101', 0, 0), ('20010101', 0, 1)) + self.diags.config.frequency = Frequencies.monthly def fake_parse(self, value): if not value: @@ -27,7 +30,7 @@ class TestRegionMean(TestCase): def fake_get(): return 'tempfile' - @patch.object(DiagnosticVariableOption, 'parse', fake_parse) + @patch.object(DiagnosticVariableListOption, 'parse', fake_parse) @patch.object(TempFile, 'get', fake_get) def test_generate_jobs(self): @@ -35,67 +38,74 @@ class TestRegionMean(TestCase): box.min_depth = -1 box.max_depth = -1 - jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var']) + jobs = RegionMean.generate_jobs( + self.diags, ['diagnostic', 'ocean', 'var']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', - box, True, False, Basins().Global, 't')) + box, True, False, Basins().Global, 't', Frequencies.monthly)) self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', - box, True, False, Basins().Global, 't')) + box, True, False, Basins().Global, 't', Frequencies.monthly)) - jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U']) + jobs = RegionMean.generate_jobs( + self.diags, ['diagnostic', 'ocean', 'var', '', 'U']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', - box, True, False, Basins().Global, 'u')) + box, True, False, Basins().Global, 'u', Frequencies.monthly)) self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', - box, True, False, Basins().Global, 'u')) + box, True, False, Basins().Global, 'u', Frequencies.monthly)) - jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U', 'global']) + jobs = RegionMean.generate_jobs( + self.diags, ['diagnostic', 'ocean', 'var', 'global', 'U']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', - box, True, False, Basins().Global, 'u')) + box, True, False, Basins().Global, 'u', Frequencies.monthly)) self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', - box, True, False, Basins().Global, 'u')) + box, True, False, Basins().Global, 'u', Frequencies.monthly)) box = Box() box.min_depth = 1.0 box.max_depth = 10.0 - jobs = RegionMean.generate_jobs(self.diags, ['diagnostic', 'ocean', 'var', 'U', 'global', '1', '10']) + jobs = RegionMean.generate_jobs( + self.diags, ['diagnostic', 'ocean', 'var', 'global', 'U', '1', '10']) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', - box, True, False, Basins().Global, 'u')) + box, True, False, Basins().Global, 'u', Frequencies.monthly)) self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', - box, True, False, Basins().Global, 'u')) + box, True, False, Basins().Global, 'u', Frequencies.monthly)) jobs = RegionMean.generate_jobs( self.diags, - ['diagnostic', 'ocean', 'var', 'U', 'global', '1', '10', '', '', '', '', 'false'] + ['diagnostic', 'ocean', 'var', 'global', 'U', + '1', '10', '', '', '', '', 'false'] ) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', - box, False, False, Basins().Global, 'u')) + box, False, False, Basins().Global, 'u', Frequencies.monthly)) self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', - box, False, False, Basins().Global, 'u')) + box, False, False, Basins().Global, 'u', Frequencies.monthly)) jobs = RegionMean.generate_jobs( self.diags, - ['diagnostic', 'ocean', 'var', 'U', 'global', '1', '10', '', '', '', '', 'false', 'True'] + ['diagnostic', 'ocean', 'var', 'global', 'U', + '1', '10', '', '', '', '', 'false', 'True'] ) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', - box, False, True, Basins().Global, 'u')) + box, False, True, Basins().Global, 'u', Frequencies.monthly)) self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', - box, False, True, Basins().Global, 'u')) + box, False, True, Basins().Global, 'u', Frequencies.monthly)) jobs = RegionMean.generate_jobs( self.diags, - ['diagnostic', 'ocean', 'var', 'U', 'global', '1', '10', '', '', '', '', 'false', 'True', 'grid'] + ['diagnostic', 'ocean', 'var', 'global', 'U', '1', + '10', '', '', '', '', 'false', 'True', 'grid'] ) self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0], RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', - box, False, Basins().Global, True, 'grid')) + box, False, Basins().Global, True, 'grid', Frequencies.monthly)) self.assertEqual(jobs[1], RegionMean(self.data_manager, '20010101', 0, 1, ModelingRealms.ocean, 'var', - box, False, Basins().Global, True, 'grid')) + box, False, Basins().Global, True, 'grid', Frequencies.monthly)) with self.assertRaises(DiagnosticOptionError): RegionMean.generate_jobs(self.diags, ['diagnostic']) @@ -103,7 +113,7 @@ class TestRegionMean(TestCase): with self.assertRaises(DiagnosticOptionError): RegionMean.generate_jobs( self.diags, - ['diagnostic', 'ocean', 'var', 'U', 'global', '1', '10', '', '', '', '', 'false', + ['diagnostic', 'ocean', 'var', 'global', 'U', '1', '10', '', '', '', '', 'false', 'True', 'grid', 'extra'] ) @@ -112,7 +122,7 @@ class TestRegionMean(TestCase): box.min_depth = 1 box.max_depth = 10 diag = RegionMean(self.data_manager, '20010101', 0, 0, ModelingRealms.ocean, 'var', box, False, - True, Basins().Global, 'T') + True, Basins().Global, 'T', Frequencies.monthly) self.assertEqual(str(diag), 'Region mean Startdate: 20010101 Member: 0 Chunk: 0 Variable: var Box: 1-10 ' 'Save 3D: False Save variance: True Grid point: T') diff --git a/test/unit/test_constants.py b/test/unit/test_constants.py index 1a5b96fbcf8b8a1aa1135ac2f643d2c4cf372378..6ad614b43379ada9fbfef8e820d5fa0573b99322 100644 --- a/test/unit/test_constants.py +++ b/test/unit/test_constants.py @@ -50,24 +50,3 @@ class TestBasin(TestCase): def test_order(self): self.assertTrue(self.basin < Basin('Vasin')) self.assertTrue(self.basin > Basin('Asin')) - - -class TestBasins(TestCase): - - def test_singleton(self): - self.assertIs(Basins(), Basins()) - - def test_get_basins(self): - mock_handler = Mock() - mock_handler.variables.keys.return_value = ('var1', 'lat') - Basins().get_available_basins(mock_handler) - self.assertTrue(hasattr(Basins(), 'var1')) - self.assertFalse(hasattr(Basins(), 'lat')) - - def test_get_basins_with_alias(self): - mock_handler = Mock() - mock_handler.variables.keys.return_value = ('Kara_Sea', 'lat') - basins = Basins() - Basins().get_available_basins(mock_handler) - self.assertTrue(hasattr(basins, 'Kara_Sea')) - self.assertFalse(hasattr(basins, 'lat'))