diff --git a/diags.conf b/diags.conf index ee1eef103147bb8fcd74f559647fa25daa4d00d4..c096fcbddc64d2c7b29786814d10fe1001e1d101 100644 --- a/diags.conf +++ b/diags.conf @@ -86,7 +86,7 @@ OCEAN_TIMESTEP = 6 # if 2, fc00 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment -EXPID = a0au +EXPID = a07o STARTDATES = 20000201 20000501 20010201 20010501 20020201 20020501 20030201 20030501 20040201 20040501 20050201 20050501 20060201 20060501 20070201 20070501 20080201 20080501 20090201 20090501 MEMBERS = 0 1 2 3 4 5 6 7 MEMBER_DIGITS = 1 diff --git a/earthdiagnostics/cmor_tables/primavera.csv b/earthdiagnostics/cmor_tables/primavera.csv deleted file mode 100644 index 53180efd0a0e34817c5091e1a0eacb09450f41ca..0000000000000000000000000000000000000000 --- a/earthdiagnostics/cmor_tables/primavera.csv +++ /dev/null @@ -1,15 +0,0 @@ -Variable,Shortname,Name,Long name,Domain,Basin,Units,Valid min,Valid max,Grid,Table -ibgheatco:sihc,hcicega,global mean ice heat content,Global mean ice heat content,seaIce,,,,,, -sbgheatco:sisnhc,hcsnga,global mean snow heat content,Global mean snow heat content,seaIce,,,,,, -iice_itd:siconc_cat:siitdconc:siconcat,siccat,ice_area_in_categories,Ice area in categories,seaIce,,,,,, -iicethic:sithic:sithick,sit,sea_ice_thickness,Sea Ice Thickness,seaIce,,m,,,, -iice_hid:sithic_cat:sithicat:siitdthick,sitcat,ice_thicknesss_in_categories,Ice thickness in categories,seaIce,,,,,, -iicevelo:sivelo:sispeed,sivelo,ice_velocity,Ice velocity,seaIce,,,,,, -iicevelu:sivelu:siu,sivelu,ice_velocity_u,Ice velocity u,seaIce,,,,,, -iicevelv:sivelv:siv,sivelv,ice_velocity_v,Ice velocity v,seaIce,,,,,, -sidivvel,siddivvel,divergence_of_sea_ice_velocity,Divergence of sea ice velocity,seaIce,,1e-8s^-1,,,, -sivolu:sivol,sivolu,sea_ice_volume_per_unit_gridcell_area,Sea ice volume per gridcell area unit,seaIce,,,,,, -iice_hsd:snthicats:siitdsnthick,sndcat,snow_thickness_in_categories,Snow thickness in in categories,seaIce,,,,,, -isnowthi:sisnthick,snthic,surface_snow_thickness,Surface snow thickness,seaIce,,,,,, -iicesali:iice_std:sisali,ssi,sea_ice_salinity,Sea ice salinity,seaIce,,psu,,,, -iicesurt:soicetem:sistem:sitemptop,tsice,surface_temperature,Surface temperature of sea ice,seaIce,,K,,,, diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index f988b366c250e2454cc77b2037630cdf6f0af9d6..9ed85f87ed304c0bd5e89004a6a6d222b1f85de9 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -20,7 +20,7 @@ class Cmorizer(object): Class to manage CMORization :param data_manager: experiment's data manager - :type data_manager: DataManager + :type data_manager: CMORManager :param startdate: startdate to cmorize :type startdate: str :param member: member to cmorize @@ -51,7 +51,7 @@ class Cmorizer(object): self.original_files_path = os.path.join(self.config.data_dir, self.experiment.expid, 'original_files', self.startdate, self.member_str, 'outputs') self.atmos_timestep = None - self.cmor_scratch = os.path.join(self.config.scratch_dir, 'CMOR') + self.cmor_scratch = os.path.join(self.config.scratch_dir, 'CMOR', self.startdate, self.member_str) def cmorize_ocean(self): """ @@ -61,6 +61,7 @@ class Cmorizer(object): if not self.cmor.ocean: Log.info('Skipping ocean cmorization due to configuration') return + Log.info('\nCMORizing ocean\n') self._cmorize_ocean_files('MMO') self._cmorize_ocean_files('PPO') self._cmorize_ocean_files('diags') @@ -71,10 +72,18 @@ class Cmorizer(object): tar_files.sort() count = 1 for tarfile in tar_files: + if not self.cmorization_required(self.get_chunk(os.path.basename(tarfile)), ModelingRealms.ocean): + Log.info('No need to unpack file {0}/{1}'.format(count, len(tar_files))) + count += 1 + continue + Log.info('Unpacking oceanic file {0}/{1}'.format(count, len(tar_files))) - self._unpack_tar_file(tarfile) - self._cmorize_nc_files() - Log.result('Oceanic file {0}/{1} finished'.format(count, len(tar_files))) + try: + self._unpack_tar_file(tarfile) + self._cmorize_nc_files() + Log.result('Oceanic file {0}/{1} finished'.format(count, len(tar_files))) + except Exception as ex: + Log.error('Could not CMORize oceanic file {0}: {1}', count, ex) count += 1 def _cmorize_nc_files(self): @@ -82,7 +91,7 @@ class Cmorizer(object): self._cmorize_nc_file(filename) def _correct_fluxes(self): - fluxes_vars = ("prsn", "rss", "rls", "rsscs", "rsds", "rlds") + fluxes_vars = ("prsn", "rss", "rls", "rsscs", "rsds", "rlds", "hfss", 'hfls') for filename in glob.glob(os.path.join(self.cmor_scratch, '*.nc')): handler = Utils.openCdf(filename) for varname in handler.variables.keys(): @@ -101,21 +110,24 @@ class Cmorizer(object): def _merge_mma_files(self, tarfile): temp = TempFile.get() - for filename in glob.glob(os.path.join(self.cmor_scratch, 'MMA_*_SH_*.nc')): - Utils.cdo.sp2gpl(options='-O', input=filename, output=temp) - shutil.move(temp, filename) sh_files = glob.glob(os.path.join(self.cmor_scratch, 'MMA_*_SH_*.nc')) - Utils.cdo.mergetime(input=sh_files, output=os.path.join(self.cmor_scratch, 'sh.nc')) gg_files = glob.glob(os.path.join(self.cmor_scratch, 'MMA_*_GG_*.nc')) - Utils.cdo.mergetime(input=gg_files, output=os.path.join(self.cmor_scratch, 'gg.nc')) + + merged_sh = TempFile.get() + merged_gg = TempFile.get() + + for filename in sh_files: + Utils.cdo.sp2gpl(options='-O', input=filename, output=temp) + shutil.move(temp, filename) + Utils.cdo.mergetime(input=sh_files, output=merged_sh) + Utils.cdo.mergetime(input=gg_files, output=merged_gg) for filename in sh_files + gg_files: os.remove(filename) - Utils.nco.ncks(input=os.path.join(self.cmor_scratch, 'sh.nc'), - output=os.path.join(self.cmor_scratch, 'gg.nc'), options='-A') - os.remove(os.path.join(self.cmor_scratch, 'sh.nc')) + Utils.nco.ncks(input=merged_sh, output=merged_gg, options='-A') + os.remove(merged_sh) tar_startdate = tarfile[0:-4].split('_')[5].split('-') new_name = 'MMA_1m_{0[0]}_{0[1]}.nc'.format(tar_startdate) - shutil.move(os.path.join(self.cmor_scratch, 'gg.nc'), os.path.join(self.cmor_scratch, new_name)) + shutil.move(merged_gg, os.path.join(self.cmor_scratch, new_name)) def cmorize_atmos(self): """ @@ -126,6 +138,7 @@ class Cmorizer(object): Log.info('Skipping atmosphere cmorization due to configuration') return + Log.info('\nCMORizing atmosphere\n') if self.cmor.use_grib and self.gribfiles_available(): self._cmorize_grib_files() else: @@ -136,33 +149,47 @@ class Cmorizer(object): tar_files.sort() count = 1 for tarfile in tar_files: + if not self.cmorization_required(self.get_chunk(os.path.basename(tarfile)), ModelingRealms.atmos): + Log.info('No need to unpack file {0}/{1}'.format(count, len(tar_files))) + count += 1 + continue Log.info('Unpacking atmospheric file {0}/{1}'.format(count, len(tar_files))) - self._unpack_tar_file(tarfile) - self._merge_mma_files(tarfile) - self._correct_fluxes() - self._cmorize_nc_files() - Log.result('Atmospheric file {0}/{1} finished'.format(count, len(tar_files))) + try: + self._unpack_tar_file(tarfile) + self._merge_mma_files(tarfile) + self._correct_fluxes() + self._cmorize_nc_files() + Log.result('Atmospheric file {0}/{1} finished'.format(count, len(tar_files))) + except Exception as ex: + Log.error('Could not cmorize atmospheric file {0}: {1}', count, ex) + count += 1 def _cmorize_grib_files(self): - count = 1 + chunk = 1 chunk_start = parse_date(self.startdate) while os.path.exists(self.get_original_grib_path(chunk_start, 'GG')) or \ os.path.exists(self.get_original_grib_path(chunk_start, 'SH')): - chunk_end = chunk_end_date(chunk_start, self.experiment.chunk_size, 'month', 'standard') - chunk_end = previous_day(chunk_end, 'standard') - Log.info('CMORizing chunk {0}-{1}', date2str(chunk_start), date2str(chunk_end)) - for grid in ('SH', 'GG'): - Log.info('Processing {0} variables', grid) - - if not os.path.exists(self.get_original_grib_path(chunk_start, grid)): - continue - self.cmorize_grib_file(chunk_end, chunk_start, count, grid) + if self.cmorization_required(chunk, ModelingRealms.atmos): + chunk_end = chunk_end_date(chunk_start, self.experiment.chunk_size, 'month', 'standard') + chunk_end = previous_day(chunk_end, 'standard') + Log.info('CMORizing chunk {0}-{1}', date2str(chunk_start), date2str(chunk_end)) + try: + for grid in ('SH', 'GG'): + Log.info('Processing {0} variables', grid) + + if not os.path.exists(self.get_original_grib_path(chunk_start, grid)): + continue + self.cmorize_grib_file(chunk_end, chunk_start, grid) + except Exception as ex: + Log.error('Can not cmorize GRIB file for chunk {0}-{1}: {2}', + date2str(chunk_start), date2str(chunk_end), ex) chunk_start = chunk_end_date(chunk_start, self.experiment.chunk_size, 'month', 'standard') + chunk += 1 - def cmorize_grib_file(self, chunk_end, chunk_start, count, grid): + def cmorize_grib_file(self, chunk_end, chunk_start, grid): for month in range(0, self.experiment.chunk_size): current_date = add_months(chunk_start, month, 'standard') original_gribfile = self.get_original_grib_path(current_date, grid) @@ -208,7 +235,7 @@ class Cmorizer(object): os.remove(splited_file) Log.result('Month {0}, {1} variables finished', date2str(current_date), grid) - count += 1 + self._merge_and_cmorize_atmos(chunk_start, chunk_end, grid, Frequencies.monthly) self._merge_and_cmorize_atmos(chunk_start, chunk_end, grid, Frequencies.daily) self._merge_and_cmorize_atmos(chunk_start, chunk_end, grid, @@ -339,13 +366,29 @@ class Cmorizer(object): file_parts = os.path.basename(file_path).split('_') if file_parts[0] in (self.experiment.expid, 'MMA', 'MMO') or file_parts[0].startswith('ORCA'): # Model output - return '{0}-{1}'.format(file_parts[2][0:6], file_parts[3][0:6]) + if file_parts[-1].endswith('.tar'): + file_parts = file_parts[-1][0:-4].split('-') + return '{0}-{1}'.format(file_parts[0][0:6], file_parts[1][0:6]) + else: + return '{0}-{1}'.format(file_parts[2][0:6], file_parts[3][0:6]) elif file_parts[1] == self.experiment.expid: # Files generated by the old version of the diagnostics return '{0}-{1}'.format(file_parts[4][0:6], file_parts[5][0:6]) else: return None + def get_chunk(self, file_path): + chunk_start = parse_date(self.get_date_str(file_path).split('-')[0]) + current_date = parse_date(self.startdate) + chunk = 1 + while current_date < chunk_start: + current_date = chunk_end_date(current_date, self.experiment.chunk_size, 'month', 'standard') + chunk += 1 + + if current_date != chunk_start: + raise Exception('File {0} start date is not a valid chunk start date'.format(file_path)) + return chunk + @staticmethod def _add_coordinate_variables(handler, temp): handler_cmor = Utils.openCdf(temp) @@ -539,6 +582,9 @@ class Cmorizer(object): gribfiles = glob.glob(grb_path) return len(gribfiles) > 0 + def cmorization_required(self, chunk, domain): + return self.config.cmor.force or not self.data_manager.is_cmorized(self.startdate, self.member, chunk, domain) + class CMORException(Exception): pass diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index 70ad736160db8bce2214a8c61d1beb047fc4e6f1..86259cc40e068aecd45608bd16894bde7cb533d8 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -10,8 +10,8 @@ from autosubmit.date.chunk_date_lib import parse_date, chunk_start_date, chunk_e from earthdiagnostics.cmorizer import Cmorizer from earthdiagnostics.datamanager import DataManager, NetCDFFile from earthdiagnostics.frequency import Frequencies, Frequency +from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.utils import TempFile, Utils -from earthdiagnostics.variable import Variable from earthdiagnostics.variable_type import VariableType @@ -21,6 +21,7 @@ class CMORManager(DataManager): """ def __init__(self, config): super(CMORManager, self).__init__(config) + self._dic_cmorized = dict() data_folders = self.config.data_dir.split(':') experiment_folder = self.experiment.model.lower() if experiment_folder.startswith('ec-earth'): @@ -41,6 +42,16 @@ class CMORManager(DataManager): raise Exception('Can not find model data') self.cmor_path = os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles') + def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, + vartype=VariableType.MEAN): + filepath = self.get_file_path(startdate, member, domain, var, chunk, frequency, box, grid, None, None) + + # noinspection PyBroadException + try: + return os.path.isfile(filepath) + except Exception: + return False + def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN): """ @@ -340,22 +351,26 @@ class CMORManager(DataManager): # Check if cmorized and convert if not for startdate, member in self.experiment.get_member_list(): - if not self.config.cmor.force and not self.config.cmor.force_untar and self._is_cmorized(startdate, member): - continue if not self._unpack_cmor_files(startdate, member): self._cmorize_member(startdate, member) - def _is_cmorized(self, startdate, member): + def is_cmorized(self, startdate, member, chunk, domain): + identifier = (startdate, member, chunk, domain.name) + if identifier not in self._dic_cmorized: + self._dic_cmorized[identifier] = self._is_cmorized(startdate, member, chunk, domain) + return self._dic_cmorized[identifier] + + def _is_cmorized(self, startdate, member, chunk, domain): startdate_path = self._get_startdate_path(startdate) - if not os.path.exists(startdate_path): + if not os.path.isdir(startdate_path): return False for freq in os.listdir(startdate_path): - freq_path = os.path.join(startdate_path, freq) - for domain in os.listdir(freq_path): - domain_path = os.path.join(freq_path, domain) + domain_path = os.path.join(startdate_path, freq, + domain.name) + if os.path.isdir(domain_path): for var in os.listdir(domain_path): - member_path = os.path.join(domain_path, var, 'r{0}i1p1'.format(member + 1)) - if os.path.exists(member_path): + var_path = self.get_file_path(startdate, member, domain, var, chunk, Frequency(freq)) + if os.path.isfile(var_path): return True return False @@ -366,34 +381,53 @@ class CMORManager(DataManager): cmorizer = Cmorizer(self, startdate, member) cmorizer.cmorize_ocean() cmorizer.cmorize_atmos() - Log.result('CMORized startdate {0} member {1}!\n\n', startdate, member_str, datetime.now() - start_time) + Log.result('CMORized startdate {0} member {1}! Ellpased time: {2}\n\n', startdate, member_str, + datetime.now() - start_time) def _unpack_cmor_files(self, startdate, member): if self.config.cmor.force: return False - filepaths = self._get_transferred_cmor_data_filepaths(startdate, member, 'tar.gz') + chunk = 1 + cmorized = False + + if not self.config.cmor.force_untar: + while self.is_cmorized(startdate, member, chunk, ModelingRealms.ocean) or\ + self.is_cmorized(startdate, member, chunk, ModelingRealms.atmos): + chunk += 1 + + while self._unpack_chunk(startdate, member, chunk): + chunk += 1 + cmorized = True + + return cmorized + + def _unpack_chunk(self, startdate, member, chunk): + + filepaths = self._get_transferred_cmor_data_filepaths(startdate, member, chunk, 'tar.gz') if len(filepaths) > 0: - Log.info('Unzipping cmorized data for {0} {1}...', startdate, member) + Log.info('Unzipping cmorized data for {0} {1} {2}...', startdate, member, chunk) Utils.unzip(filepaths, True) if not os.path.exists(self.cmor_path): os.mkdir(self.cmor_path) - filepaths = self._get_transferred_cmor_data_filepaths(startdate, member, 'tar') + filepaths = self._get_transferred_cmor_data_filepaths(startdate, member, chunk, 'tar') if len(filepaths) > 0: - Log.info('Unpacking cmorized data for {0} {1}...', startdate, member) + Log.info('Unpacking cmorized data for {0} {1} {2}...', startdate, member, chunk) Utils.untar(filepaths, self.cmor_path) self._correct_paths(startdate) self._create_links(startdate) return True return False - def _get_transferred_cmor_data_filepaths(self, startdate, member, extension): + def _get_transferred_cmor_data_filepaths(self, startdate, member, chunk, extension): tar_path = os.path.join(self.config.data_dir, self.experiment.expid, 'original_files', 'cmorfiles') tar_original_files = os.path.join(self.config.data_dir, 'original_files', self.experiment.expid, 'cmorfiles') - file_name = 'CMOR?_{0}_{1}_{2}_*.{3}'.format(self.experiment.expid, startdate, - self.experiment.get_member_str(member), extension) + file_name = 'CMOR?_{0}_{1}_{2}_{3}-*.{4}'.format(self.experiment.expid, startdate, + self.experiment.get_member_str(member), + self.experiment.get_chunk_start_str(startdate, chunk), + extension) filepaths = glob.glob(os.path.join(tar_path, file_name)) filepaths += glob.glob(os.path.join(tar_path, 'outputs', file_name)) filepaths += glob.glob(os.path.join(tar_original_files, file_name)) @@ -463,8 +497,8 @@ class CMORManager(DataManager): vartype=VariableType.MEAN) else: for filename in os.listdir(filepath): - self._create_link(domain, os.path.join(filepath, filename), freq, var, "", False, - vartype=VariableType.MEAN) + self._create_link(domain, os.path.join(filepath, filename), frequency, var, "", + False, vartype=VariableType.MEAN) Log.debug('Links ready') def _get_startdate_path(self, startdate): @@ -477,3 +511,5 @@ class CMORManager(DataManager): """ return os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles', self.experiment.institute, self.experiment.model, self.experiment.experiment_name, 'S' + startdate) + + diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index 8ab9159d29fd8fd0c9220b236be7278bebd8c439..2b9c736f963c9105f7fa2c229db0f016ff06a936 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -2,7 +2,7 @@ import os from autosubmit.config.log import Log -from autosubmit.date.chunk_date_lib import parse_date, chunk_start_date, chunk_end_date +from autosubmit.date.chunk_date_lib import parse_date, chunk_start_date, chunk_end_date, date2str from earthdiagnostics.frequency import Frequency, Frequencies from earthdiagnostics.parser import Parser @@ -257,7 +257,7 @@ class ExperimentConfig(object): date = parse_date(startdate) chunks = list() for chunk in range(1, self.num_chunks + 1): - chunk_start = chunk_start_date(date, chunk, self.chunk_size, 'month', self.calendar) + chunk_start = self.get_chunk_start(date, chunk) if chunk_start.year > year: break elif chunk_start.year == year or chunk_end_date(chunk_start, self.chunk_size, 'month', @@ -266,6 +266,20 @@ class ExperimentConfig(object): return chunks + def get_chunk_start(self, startdate, chunk): + if isinstance(startdate, basestring): + startdate = parse_date(startdate) + return chunk_start_date(startdate, chunk, self.chunk_size, 'month', self.calendar) + + def get_chunk_start_str(self, startdate, chunk): + return date2str(self.get_chunk_start(startdate, chunk)) + + def get_chunk_end(self, startdate, chunk): + return chunk_end_date(self.get_chunk_start(startdate, chunk), self.chunk_size, 'month', self.calendar) + + def get_chunk_end_str(self, startdate, chunk): + return date2str(self.get_chunk_end(startdate, chunk)) + def get_full_years(self, startdate): """ Returns the list of full years that are in the given startdate diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index 16c310ab6700dbb592106e4986acb9f6977c4382..3e6f6f3d628adfeabfd8a5992f575f726dac3aba 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -31,6 +31,34 @@ class DataManager(object): UnitConversion.load_conversions() self.lock = threading.Lock() + def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, + vartype=VariableType.MEAN): + """ + Checks if a given file exists + + :param domain: CMOR domain + :type domain: Domain + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str + :param box: file's box (only needed to retrieve sections or averages) + :type box: Box + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: Frequency + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: path to the copy created on the scratch folder + :rtype: str + """ + raise NotImplementedError() + def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN): """ diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 3cc62eab53bf3980ab1e306969d57a6d7eb048d7..990b9213b9d12db697d297cf5d58f5db4098ef65 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -20,6 +20,7 @@ from earthdiagnostics.diagnostic import Diagnostic from earthdiagnostics.ocean import * from earthdiagnostics.general import * from earthdiagnostics.statistics import * +from earthdiagnostics.variable import VariableManager class EarthDiags(object): @@ -72,6 +73,8 @@ class EarthDiags(object): help="opens documentation and exits") parser.add_argument('--clean', action='store_true', help="clean the scratch folder and exits") + parser.add_argument('--report', action='store_true', + help="generates a report about the available files") parser.add_argument('-lf', '--logfile', choices=('EVERYTHING', 'DEBUG', 'INFO', 'RESULT', 'USER_WARNING', 'WARNING', 'ERROR', 'CRITICAL', 'NO_LOG'), default='DEBUG', type=str, @@ -110,6 +113,8 @@ class EarthDiags(object): diags = EarthDiags(config_file_path) if args.clean: result = diags.clean() + elif args.report: + result = diags.report() else: result = diags.run() TempFile.clean() @@ -140,13 +145,7 @@ class EarthDiags(object): self._register_diagnostics() - parse_date('20000101') - - if self.config.data_adaptor == 'CMOR': - self.data_manager = CMORManager(self.config) - elif self.config.data_adaptor == 'THREDDS': - self.data_manager = THREDDSManager(self.config) - self.data_manager.prepare() + self._prepare_data_manager() # Run diagnostics Log.info('Running diagnostics') @@ -173,6 +172,13 @@ class EarthDiags(object): self.print_stats() return self.had_errors + def _prepare_data_manager(self): + if self.config.data_adaptor == 'CMOR': + self.data_manager = CMORManager(self.config) + elif self.config.data_adaptor == 'THREDDS': + self.data_manager = THREDDSManager(self.config) + self.data_manager.prepare() + def print_stats(self): Log.info('Time consumed by each diagnostic class') Log.info('--------------------------------------') @@ -247,6 +253,54 @@ class EarthDiags(object): Log.result('Scratch folder removed') return True + def report(self): + Log.info('Looking for existing vars...') + self._prepare_data_manager() + for startdate in self.config.experiment.startdates: + for member in self.config.experiment.members: + results = self._get_variable_report(startdate, member) + report_path = os.path.join(self.config.scratch_dir, '{0}_fc{1}.report'.format(startdate, member)) + self.create_report(report_path, results) + + Log.result('Report finished') + return True + + def _get_variable_report(self, startdate, member): + var_manager = VariableManager() + results = list() + for var in var_manager.get_all_variables(): + if var.priority is None or var.domain is None: + continue + for table in var.tables: + if not self.data_manager.file_exists(var.domain, var.short_name, startdate, member, 1, + frequency=table.frequency): + results.append((var, table)) + return results + + def create_report(self, report_path, results): + current_table = None + current_priority = 0 + results = sorted(results, key=lambda result: result[0].short_name) + results = sorted(results, key=lambda result: result[0].priority) + results = sorted(results, key=lambda result: result[1].name) + + file_handler = open(report_path, 'w') + + for var, table in results: + if current_table != table.name: + file_handler.write('\nTable {0}\n'.format(table.name)) + file_handler.write('===================================\n') + current_table = table.name + current_priority = 0 + + if current_priority != var.priority: + file_handler.write('\nMissing variables with priority {0}:\n'.format(var.priority)) + file_handler.write('--------------------------------------\n') + current_priority = var.priority + + file_handler.write('{0:12}: {1}\n'.format(var.short_name, var.standard_name)) + file_handler.close() + def _run_jobs(self, queue, numthread): def _run_job(current_job, retrials=1): while retrials >= 0: diff --git a/earthdiagnostics/frequency.py b/earthdiagnostics/frequency.py index 3099ce3d9b9b11913cc33d02316a026c0bcb82ed..12e1cbe42c9ce4b570b4339af650ac999094e0ce 100644 --- a/earthdiagnostics/frequency.py +++ b/earthdiagnostics/frequency.py @@ -1,3 +1,4 @@ +# coding=utf-8 from earthdiagnostics.variable_type import VariableType diff --git a/earthdiagnostics/general/scale.py b/earthdiagnostics/general/scale.py index 2f5e743d9ef0eef8c954dc66013a643bfe36003e..ceaac656cedcac6ee8c7b17f6881a471eea72b58 100644 --- a/earthdiagnostics/general/scale.py +++ b/earthdiagnostics/general/scale.py @@ -74,7 +74,7 @@ class Scale(Diagnostic): DiagnosticFloatOption('offset'), DiagnosticDomainOption('domain'), DiagnosticOption('variable'), - DiagnosticOption('grid'), + DiagnosticOption('grid', ''), DiagnosticFloatOption('min_limit', float('nan')), DiagnosticFloatOption('max_limit', float('nan'))) options = cls.process_options(options, options_available) diff --git a/earthdiagnostics/modelingrealm.py b/earthdiagnostics/modelingrealm.py index 1df08d02dc5db189272348582dbf8184b80d01b9..3a970eee675dcc735145737b6cb1c903e4c80b5f 100644 --- a/earthdiagnostics/modelingrealm.py +++ b/earthdiagnostics/modelingrealm.py @@ -1,7 +1,7 @@ +# coding=utf-8 from earthdiagnostics.frequency import Frequencies - class ModelingRealm(object): @staticmethod diff --git a/earthdiagnostics/ocean/areamoc.py b/earthdiagnostics/ocean/areamoc.py index 763da95fdb9cdb3b8b3cfca76c53760c1c8929c5..d2ea66ef1699f48bc4912eef7fdedadec62b3be2 100644 --- a/earthdiagnostics/ocean/areamoc.py +++ b/earthdiagnostics/ocean/areamoc.py @@ -93,7 +93,7 @@ class AreaMoc(Diagnostic): cdo = Utils.cdo temp2 = TempFile.get() - temp = self.data_manager.get_file('ocean', 'vsftmyz', self.startdate, self.member, self.chunk) + temp = self.data_manager.get_file(ModelingRealms.ocean, 'vsftmyz', self.startdate, self.member, self.chunk) handler = Utils.openCdf(temp) if 'i' in handler.dimensions: diff --git a/earthdiagnostics/ocean/convectionsites.py b/earthdiagnostics/ocean/convectionsites.py index 876ec080ac30f7af6259bbe346a34f241a8101ca..ca140c2233fc82e7ee73ed2ca84472361584704b 100644 --- a/earthdiagnostics/ocean/convectionsites.py +++ b/earthdiagnostics/ocean/convectionsites.py @@ -85,7 +85,8 @@ class ConvectionSites(Diagnostic): else: raise Exception("Input grid {0} not recognized".format(self.model_version)) - mlotst_file = self.data_manager.get_file('ocean', 'mlotst', self.startdate, self.member, self.chunk) + mlotst_file = self.data_manager.get_file(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, + self.chunk) output = TempFile.get() self.mlotst_handler = Utils.openCdf(mlotst_file) diff --git a/earthdiagnostics/ocean/gyres.py b/earthdiagnostics/ocean/gyres.py index 29b113980b200f5d24c120232055d14e444906cc..c7eb720f6dc659ba78a983ca844e57d80938b7f1 100644 --- a/earthdiagnostics/ocean/gyres.py +++ b/earthdiagnostics/ocean/gyres.py @@ -93,7 +93,8 @@ class Gyres(Diagnostic): raise Exception("Input grid {0} not recognized".format(self.model_version)) output = TempFile.get() - vsftbarot_file = self.data_manager.get_file('ocean', 'vsftbarot', self.startdate, self.member, self.chunk) + vsftbarot_file = self.data_manager.get_file(ModelingRealms.ocean, 'vsftbarot', self.startdate, + self.member, self.chunk) handler_original = Utils.openCdf(vsftbarot_file) self.var_vsftbarot = handler_original.variables['vsftbarot'] diff --git a/earthdiagnostics/ocean/heatcontent.py b/earthdiagnostics/ocean/heatcontent.py index f7843cb00e70159fc4d89f3d727ea697b9fa63c1..a82a4da64bca7383255f8e859a11ff9929fcafd0 100644 --- a/earthdiagnostics/ocean/heatcontent.py +++ b/earthdiagnostics/ocean/heatcontent.py @@ -76,7 +76,7 @@ class HeatContent(Diagnostic): DiagnosticIntOption('min_depth'), DiagnosticIntOption('max_depth')) options = cls.process_options(options, options_available) - box = Box(True) + box = Box(False) box.min_depth = options['min_depth'] box.max_depth = options['max_depth'] job_list = list() @@ -90,9 +90,11 @@ class HeatContent(Diagnostic): Runs the diagnostic """ nco = Utils.nco - temperature_file = self.data_manager.get_file('ocean', 'thetao', self.startdate, self.member, self.chunk) + temperature_file = self.data_manager.get_file(ModelingRealms.ocean, 'thetao', self.startdate, + self.member, self.chunk) if self.mxloption != 0: - mlotst_file = self.data_manager.get_file('ocean', 'mlotst', self.startdate, self.member, self.chunk) + mlotst_file = self.data_manager.get_file(ModelingRealms.ocean, 'mlotst', self.startdate, + self.member, self.chunk) nco.ncks(input=mlotst_file, output=temperature_file, options='-A -v mlotst') para = list() diff --git a/earthdiagnostics/ocean/interpolatecdo.py b/earthdiagnostics/ocean/interpolatecdo.py index bc90f2912a3ab0a35410de7c2829278a968f8feb..68a04f5817dac95412b05bf64a16bdfaab5ce3d9 100644 --- a/earthdiagnostics/ocean/interpolatecdo.py +++ b/earthdiagnostics/ocean/interpolatecdo.py @@ -84,12 +84,12 @@ class InterpolateCDO(Diagnostic): @classmethod def _translate_ifs_grids_to_cdo_names(cls, target_grid): - if target_grid.startswith('T159L'): - target_grid = 't106' - if target_grid.startswith('T255L'): - target_grid = 't170' - if target_grid.startswith('T511L'): - target_grid = 't340' + if target_grid.upper().startswith('T159L'): + target_grid = 't159grid' + if target_grid.upper().startswith('T255L'): + target_grid = 't255grid' + if target_grid.upper().startswith('T511L'): + target_grid = 't511grid' return target_grid def compute(self): diff --git a/earthdiagnostics/ocean/verticalmean.py b/earthdiagnostics/ocean/verticalmean.py index 9a58a8ecd6b251bdf4d185c2907786e636f728fc..4564bb0aae96cd23d279796624a7b2802bfec0b4 100644 --- a/earthdiagnostics/ocean/verticalmean.py +++ b/earthdiagnostics/ocean/verticalmean.py @@ -86,7 +86,8 @@ class VerticalMean(Diagnostic): Runs the diagnostic """ temp = TempFile.get() - variable_file = self.data_manager.get_file('ocean', self.variable, self.startdate, self.member, self.chunk) + variable_file = self.data_manager.get_file(ModelingRealms.ocean, self.variable, self.startdate, self.member, + self.chunk) handler = Utils.openCdf(variable_file) if self.box.min_depth is None: diff --git a/earthdiagnostics/ocean/verticalmeanmeters.py b/earthdiagnostics/ocean/verticalmeanmeters.py index 24eb1e63bf1f75d14d13d46510b87a70beed2bcf..3f280356c7e5e3f46e3b199b887cdd337357c1d1 100644 --- a/earthdiagnostics/ocean/verticalmeanmeters.py +++ b/earthdiagnostics/ocean/verticalmeanmeters.py @@ -84,7 +84,8 @@ class VerticalMeanMeters(Diagnostic): Runs the diagnostic """ temp = TempFile.get() - variable_file = self.data_manager.get_file('ocean', self.variable, self.startdate, self.member, self.chunk) + variable_file = self.data_manager.get_file(ModelingRealms.ocean, self.variable, self.startdate, self.member, + self.chunk) handler = Utils.openCdf(variable_file) if self.box.min_depth is None: diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 73745ed01a09077f536d97f65f2c71c34d736b9f..de75cd022e101d2dd1b2eef996399811872eeb62 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -51,6 +51,40 @@ class THREDDSManager(DataManager): Utils.cdo.selmonth(selected_months, input=thredds_subset, output=temp) return temp + def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, + vartype=VariableType.MEAN): + """ + Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + :param domain: CMOR domain + :type domain: str + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str + :param box: file's box (only needed to retrieve sections or averages) + :type box: Box + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: Frequency + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: path to the copy created on the scratch folder + :rtype: str + """ + aggregation_path = self.get_var_url(var, startdate, frequency, box, vartype) + + start_chunk = chunk_start_date(parse_date(startdate), chunk, self.experiment.chunk_size, 'month', 'standard') + end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', 'standard') + + thredds_subset = THREDDSSubset(aggregation_path, var, start_chunk, end_chunk) + return thredds_subset.check() + def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN): """ @@ -221,6 +255,20 @@ class THREDDSManager(DataManager): return thredds_subset.download() def get_var_url(self, var, startdate, frequency, box, vartype): + """ + Get url for dataset + :param var: variable to retrieve + :type var: str + :param startdate: startdate to retrieve + :type startdate: str + :param frequency: frequency to get: + :type frequency: Frequency | None + :param box: box to get + :type box: Box + :param vartype: type of variable + :type vartype: VariableType + :return: + """ if not frequency: frequency = self.config.frequency var = self._get_final_var_name(box, var) @@ -300,6 +348,15 @@ class THREDDSSubset: url = self.get_url() return self._download_url(url) + def check(self): + # noinspection PyBroadException + try: + self.handler = Utils.openCdf(self.get_url()) + self.handler.close() + return True + except Exception: + return False + def _read_metadata(self): self.var_dimensions = self.handler.variables[self.var].dimensions for dimension in self.var_dimensions: diff --git a/earthdiagnostics/utils.py b/earthdiagnostics/utils.py index 81818c483db9cb5f801e114104891713d374ec71..f8248ad19ce3e28413f806d66d100c4ac99b32de 100644 --- a/earthdiagnostics/utils.py +++ b/earthdiagnostics/utils.py @@ -287,7 +287,8 @@ class Utils(object): Log.log.log(log_level, line) output.append(line) if process.returncode != 0: - raise Utils.ExecutionError('Error executing {0}\n Return code: {1}', ' '.join(command), process.returncode) + raise Utils.ExecutionError('Error executing {0}\n Return code: {1}'.format(' '.join(command), + process.returncode)) return output _cpu_count = None diff --git a/earthdiagnostics/variable.py b/earthdiagnostics/variable.py index c9e370e49e8ac717e4954f992dfc27e273036865..30ae1154e28e7ccc54fe402ab1a3a4d1658c8033 100644 --- a/earthdiagnostics/variable.py +++ b/earthdiagnostics/variable.py @@ -50,6 +50,15 @@ class VariableManager(object): Log.warning('Variable {0} is not defined in the CMOR table. Please add it'.format(original_name)) return None + def get_all_variables(self): + """ + Returns all variables + + :return: CMOR variable list + :rtype: set[Variable] + """ + return set(self._dict_variables.values()) + def get_variable_and_alias(self, original_name, silent=False): """ Returns the cmor variable instance given a variable name @@ -229,16 +238,17 @@ class VariableManager(object): if sheet['A1'].value != 'Priority': continue table_frequency, table_date = table_data[sheet.title] - + table = CMORTable(sheet.title, table_frequency, table_date) for row in sheet.rows: if row[0].value == 'Priority' or not row[5].value: continue if row[5].value.lower() in self._dict_variables: - self._dict_variables[row[5].value.lower()].tables.append(sheet.title) + self._dict_variables[row[5].value.lower()].tables.append(table) continue var = Variable() + var.priority = row[0].value var.short_name = row[5].value var.standard_name = row[6].value var.long_name = row[1].value @@ -246,7 +256,7 @@ class VariableManager(object): self._process_modelling_realm(var, row[12].value) var.units = row[2].value - var.tables.append(CMORTable(sheet.title, table_frequency, table_date)) + var.tables.append(table) self._dict_variables[var.short_name.lower()] = var def _process_modelling_realm(self, var, value): @@ -282,7 +292,9 @@ class Variable(object): self.valid_min = None self.valid_max = None self.grid = None + self.priority = None self.default = False + self.domain = None self.known_aliases = [] self.tables = [] @@ -353,7 +365,7 @@ class VariableAlias(object): class CMORTable(object): def __init__(self, name, frequency, date): self.name = name - self.frequency = frequency + self.frequency = Frequency.parse(frequency) self.date = date def __str__(self): diff --git a/earthdiagnostics/variable_type.py b/earthdiagnostics/variable_type.py index ae9c61fcebfca6df221eea9b31c7a31d902d901a..4b3f17daa0b515582f1870cbbc7f05dacfa3379a 100644 --- a/earthdiagnostics/variable_type.py +++ b/earthdiagnostics/variable_type.py @@ -1,3 +1,4 @@ +# coding=utf-8 class VariableType(object): MEAN = 1 STATISTIC = 2 diff --git a/test/unit/test_variable.py b/test/unit/test_variable.py index 3951dfa61fd47f537d3677708ce3c8147e02c545..7f3e0b8472157c518f517af25cbafdb1a4812bf5 100644 --- a/test/unit/test_variable.py +++ b/test/unit/test_variable.py @@ -1,8 +1,8 @@ # coding=utf-8 -from unittest import TestCase - -from earthdiagnostics.variable import Variable -from earthdiagnostics.modelingrealm import ModelingRealms +# from unittest import TestCase +# +# from earthdiagnostics.variable import Variable +# from earthdiagnostics.modelingrealm import ModelingRealms # class TestVariable(TestCase):