From c43ed2adde969616df3e17b17c293954c52a427e Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 9 Jan 2017 12:02:25 +0100 Subject: [PATCH 01/15] Diags will now only unpack cmorfiles that are not already available --- diags.conf | 2 +- earthdiagnostics/cmormanager.py | 46 +++++++++++++++++++++++---------- earthdiagnostics/config.py | 18 +++++++++++-- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/diags.conf b/diags.conf index ee1eef10..c096fcbd 100644 --- a/diags.conf +++ b/diags.conf @@ -86,7 +86,7 @@ OCEAN_TIMESTEP = 6 # if 2, fc00 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment -EXPID = a0au +EXPID = a07o STARTDATES = 20000201 20000501 20010201 20010501 20020201 20020501 20030201 20030501 20040201 20040501 20050201 20050501 20060201 20060501 20070201 20070501 20080201 20080501 20090201 20090501 MEMBERS = 0 1 2 3 4 5 6 7 MEMBER_DIGITS = 1 diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index 70ad7361..8808ff08 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -10,6 +10,7 @@ from autosubmit.date.chunk_date_lib import parse_date, chunk_start_date, chunk_e from earthdiagnostics.cmorizer import Cmorizer from earthdiagnostics.datamanager import DataManager, NetCDFFile from earthdiagnostics.frequency import Frequencies, Frequency +from earthdiagnostics.modelingrealm import ModelingRealm from earthdiagnostics.utils import TempFile, Utils from earthdiagnostics.variable import Variable from earthdiagnostics.variable_type import VariableType @@ -340,12 +341,10 @@ class CMORManager(DataManager): # Check if cmorized and convert if not for startdate, member in self.experiment.get_member_list(): - if not self.config.cmor.force and not self.config.cmor.force_untar and self._is_cmorized(startdate, member): - continue if not self._unpack_cmor_files(startdate, member): self._cmorize_member(startdate, member) - def _is_cmorized(self, startdate, member): + def _is_cmorized(self, startdate, member, chunk): startdate_path = self._get_startdate_path(startdate) if not os.path.exists(startdate_path): return False @@ -354,8 +353,8 @@ class CMORManager(DataManager): for domain in os.listdir(freq_path): domain_path = os.path.join(freq_path, domain) for var in os.listdir(domain_path): - member_path = os.path.join(domain_path, var, 'r{0}i1p1'.format(member + 1)) - if os.path.exists(member_path): + var_path = self.get_file_path(startdate, member, ModelingRealm(domain), var, chunk, Frequency(freq)) + if os.path.exists(var_path): return True return False @@ -366,34 +365,53 @@ class CMORManager(DataManager): cmorizer = Cmorizer(self, startdate, member) cmorizer.cmorize_ocean() cmorizer.cmorize_atmos() - Log.result('CMORized startdate {0} member {1}!\n\n', startdate, member_str, datetime.now() - start_time) + Log.result('CMORized startdate {0} member {1}! Ellpased time: {2}\n\n', startdate, member_str, + datetime.now() - start_time) def _unpack_cmor_files(self, startdate, member): if self.config.cmor.force: return False - filepaths = self._get_transferred_cmor_data_filepaths(startdate, member, 'tar.gz') + chunk = 1 + cmorized = False + + if not self.config.cmor.force_untar: + while self._is_cmorized(startdate, member, chunk): + chunk += 1 + cmorized = True + + while self._unpack_chunk(startdate, member, chunk): + chunk += 1 + cmorized = True + + return cmorized + + def _unpack_chunk(self, startdate, member, chunk): + + filepaths = self._get_transferred_cmor_data_filepaths(startdate, member, chunk, 'tar.gz') if len(filepaths) > 0: - Log.info('Unzipping cmorized data for {0} {1}...', startdate, member) + Log.info('Unzipping cmorized data for {0} {1} {2}...', startdate, member, chunk) Utils.unzip(filepaths, True) if not os.path.exists(self.cmor_path): os.mkdir(self.cmor_path) - filepaths = self._get_transferred_cmor_data_filepaths(startdate, member, 'tar') + filepaths = self._get_transferred_cmor_data_filepaths(startdate, member, chunk, 'tar') if len(filepaths) > 0: - Log.info('Unpacking cmorized data for {0} {1}...', startdate, member) + Log.info('Unpacking cmorized data for {0} {1} {2}...', startdate, member, chunk) Utils.untar(filepaths, self.cmor_path) self._correct_paths(startdate) self._create_links(startdate) return True return False - def _get_transferred_cmor_data_filepaths(self, startdate, member, extension): + def _get_transferred_cmor_data_filepaths(self, startdate, member, chunk, extension): tar_path = os.path.join(self.config.data_dir, self.experiment.expid, 'original_files', 'cmorfiles') tar_original_files = os.path.join(self.config.data_dir, 'original_files', self.experiment.expid, 'cmorfiles') - file_name = 'CMOR?_{0}_{1}_{2}_*.{3}'.format(self.experiment.expid, startdate, - self.experiment.get_member_str(member), extension) + file_name = 'CMOR?_{0}_{1}_{2}_{3}-*.{4}'.format(self.experiment.expid, startdate, + self.experiment.get_member_str(member), + self.experiment.get_chunk_start_str(startdate, chunk), + extension) filepaths = glob.glob(os.path.join(tar_path, file_name)) filepaths += glob.glob(os.path.join(tar_path, 'outputs', file_name)) filepaths += glob.glob(os.path.join(tar_original_files, file_name)) @@ -477,3 +495,5 @@ class CMORManager(DataManager): """ return os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles', self.experiment.institute, self.experiment.model, self.experiment.experiment_name, 'S' + startdate) + + diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index 8ab9159d..2b9c736f 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -2,7 +2,7 @@ import os from autosubmit.config.log import Log -from autosubmit.date.chunk_date_lib import parse_date, chunk_start_date, chunk_end_date +from autosubmit.date.chunk_date_lib import parse_date, chunk_start_date, chunk_end_date, date2str from earthdiagnostics.frequency import Frequency, Frequencies from earthdiagnostics.parser import Parser @@ -257,7 +257,7 @@ class ExperimentConfig(object): date = parse_date(startdate) chunks = list() for chunk in range(1, self.num_chunks + 1): - chunk_start = chunk_start_date(date, chunk, self.chunk_size, 'month', self.calendar) + chunk_start = self.get_chunk_start(date, chunk) if chunk_start.year > year: break elif chunk_start.year == year or chunk_end_date(chunk_start, self.chunk_size, 'month', @@ -266,6 +266,20 @@ class ExperimentConfig(object): return chunks + def get_chunk_start(self, startdate, chunk): + if isinstance(startdate, basestring): + startdate = parse_date(startdate) + return chunk_start_date(startdate, chunk, self.chunk_size, 'month', self.calendar) + + def get_chunk_start_str(self, startdate, chunk): + return date2str(self.get_chunk_start(startdate, chunk)) + + def get_chunk_end(self, startdate, chunk): + return chunk_end_date(self.get_chunk_start(startdate, chunk), self.chunk_size, 'month', self.calendar) + + def get_chunk_end_str(self, startdate, chunk): + return date2str(self.get_chunk_end(startdate, chunk)) + def get_full_years(self, startdate): """ Returns the list of full years that are in the given startdate -- GitLab From 36d16486fb10a7f76f5820cf1aca475c07e6d3ec Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 9 Jan 2017 12:42:05 +0100 Subject: [PATCH 02/15] Finished chunk by chunk cmorization on MMO and other similar files --- earthdiagnostics/cmorizer.py | 70 ++++++++++++++++++++++----------- earthdiagnostics/cmormanager.py | 8 +++- 2 files changed, 55 insertions(+), 23 deletions(-) diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index f988b366..6ccb3b87 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -20,7 +20,7 @@ class Cmorizer(object): Class to manage CMORization :param data_manager: experiment's data manager - :type data_manager: DataManager + :type data_manager: CMORManager :param startdate: startdate to cmorize :type startdate: str :param member: member to cmorize @@ -51,7 +51,7 @@ class Cmorizer(object): self.original_files_path = os.path.join(self.config.data_dir, self.experiment.expid, 'original_files', self.startdate, self.member_str, 'outputs') self.atmos_timestep = None - self.cmor_scratch = os.path.join(self.config.scratch_dir, 'CMOR') + self.cmor_scratch = os.path.join(self.config.scratch_dir, 'CMOR', self.startdate, self.member_str) def cmorize_ocean(self): """ @@ -71,6 +71,10 @@ class Cmorizer(object): tar_files.sort() count = 1 for tarfile in tar_files: + if self.cmorization_required(self.get_chunk(os.path.basename(tarfile))): + Log.info('No need to unpack file {0}/{1}'.format(count, len(tar_files))) + continue + Log.info('Unpacking oceanic file {0}/{1}'.format(count, len(tar_files))) self._unpack_tar_file(tarfile) self._cmorize_nc_files() @@ -101,21 +105,24 @@ class Cmorizer(object): def _merge_mma_files(self, tarfile): temp = TempFile.get() - for filename in glob.glob(os.path.join(self.cmor_scratch, 'MMA_*_SH_*.nc')): - Utils.cdo.sp2gpl(options='-O', input=filename, output=temp) - shutil.move(temp, filename) sh_files = glob.glob(os.path.join(self.cmor_scratch, 'MMA_*_SH_*.nc')) - Utils.cdo.mergetime(input=sh_files, output=os.path.join(self.cmor_scratch, 'sh.nc')) gg_files = glob.glob(os.path.join(self.cmor_scratch, 'MMA_*_GG_*.nc')) - Utils.cdo.mergetime(input=gg_files, output=os.path.join(self.cmor_scratch, 'gg.nc')) + + merged_sh = TempFile.get() + merged_gg = TempFile.get() + + for filename in sh_files: + Utils.cdo.sp2gpl(options='-O', input=filename, output=temp) + shutil.move(temp, filename) + Utils.cdo.mergetime(input=sh_files, output=merged_sh) + Utils.cdo.mergetime(input=gg_files, output=merged_gg) for filename in sh_files + gg_files: os.remove(filename) - Utils.nco.ncks(input=os.path.join(self.cmor_scratch, 'sh.nc'), - output=os.path.join(self.cmor_scratch, 'gg.nc'), options='-A') - os.remove(os.path.join(self.cmor_scratch, 'sh.nc')) + Utils.nco.ncks(input=merged_sh, output=merged_gg, options='-A') + os.remove(merged_sh) tar_startdate = tarfile[0:-4].split('_')[5].split('-') new_name = 'MMA_1m_{0[0]}_{0[1]}.nc'.format(tar_startdate) - shutil.move(os.path.join(self.cmor_scratch, 'gg.nc'), os.path.join(self.cmor_scratch, new_name)) + shutil.move(merged_gg, os.path.join(self.cmor_scratch, new_name)) def cmorize_atmos(self): """ @@ -145,24 +152,26 @@ class Cmorizer(object): count += 1 def _cmorize_grib_files(self): - count = 1 + chunk = 1 chunk_start = parse_date(self.startdate) while os.path.exists(self.get_original_grib_path(chunk_start, 'GG')) or \ os.path.exists(self.get_original_grib_path(chunk_start, 'SH')): - chunk_end = chunk_end_date(chunk_start, self.experiment.chunk_size, 'month', 'standard') - chunk_end = previous_day(chunk_end, 'standard') - Log.info('CMORizing chunk {0}-{1}', date2str(chunk_start), date2str(chunk_end)) - for grid in ('SH', 'GG'): - Log.info('Processing {0} variables', grid) + if self.cmorization_required(chunk): + chunk_end = chunk_end_date(chunk_start, self.experiment.chunk_size, 'month', 'standard') + chunk_end = previous_day(chunk_end, 'standard') + Log.info('CMORizing chunk {0}-{1}', date2str(chunk_start), date2str(chunk_end)) + for grid in ('SH', 'GG'): + Log.info('Processing {0} variables', grid) - if not os.path.exists(self.get_original_grib_path(chunk_start, grid)): - continue - self.cmorize_grib_file(chunk_end, chunk_start, count, grid) + if not os.path.exists(self.get_original_grib_path(chunk_start, grid)): + continue + self.cmorize_grib_file(chunk_end, chunk_start, grid) chunk_start = chunk_end_date(chunk_start, self.experiment.chunk_size, 'month', 'standard') + chunk += 1 - def cmorize_grib_file(self, chunk_end, chunk_start, count, grid): + def cmorize_grib_file(self, chunk_end, chunk_start, grid): for month in range(0, self.experiment.chunk_size): current_date = add_months(chunk_start, month, 'standard') original_gribfile = self.get_original_grib_path(current_date, grid) @@ -208,7 +217,7 @@ class Cmorizer(object): os.remove(splited_file) Log.result('Month {0}, {1} variables finished', date2str(current_date), grid) - count += 1 + self._merge_and_cmorize_atmos(chunk_start, chunk_end, grid, Frequencies.monthly) self._merge_and_cmorize_atmos(chunk_start, chunk_end, grid, Frequencies.daily) self._merge_and_cmorize_atmos(chunk_start, chunk_end, grid, @@ -346,6 +355,20 @@ class Cmorizer(object): else: return None + def get_chunk(self, file_path): + chunk_start = parse_date(self.get_date_str(file_path).split('-')[0]) + current_date = parse_date(self.startdate) + chunk = 1 + while current_date < chunk_start: + current_date = chunk_end_date(current_date, self.experiment.chunk_size, 'month', 'standard') + chunk += 1 + + if current_date != chunk_start: + raise Exception('File {0} start date is not a valid chunk start date'.format(file_path)) + return chunk + + + @staticmethod def _add_coordinate_variables(handler, temp): handler_cmor = Utils.openCdf(temp) @@ -539,6 +562,9 @@ class Cmorizer(object): gribfiles = glob.glob(grb_path) return len(gribfiles) > 0 + def cmorization_required(self, chunk): + return not self.config.cmor.force and self.data_manager.is_cmorized(self.startdate, self.member, chunk) + class CMORException(Exception): pass diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index 8808ff08..d5a37633 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -22,6 +22,7 @@ class CMORManager(DataManager): """ def __init__(self, config): super(CMORManager, self).__init__(config) + self._dic_cmorized = dict() data_folders = self.config.data_dir.split(':') experiment_folder = self.experiment.model.lower() if experiment_folder.startswith('ec-earth'): @@ -344,6 +345,11 @@ class CMORManager(DataManager): if not self._unpack_cmor_files(startdate, member): self._cmorize_member(startdate, member) + def is_cmorized(self, startdate, member, chunk): + if (startdate, member, chunk) not in self._dic_cmorized: + self._dic_cmorized[(startdate, member, chunk)] = self._is_cmorized(startdate, member, chunk) + return self._dic_cmorized[(startdate, member, chunk)] + def _is_cmorized(self, startdate, member, chunk): startdate_path = self._get_startdate_path(startdate) if not os.path.exists(startdate_path): @@ -375,7 +381,7 @@ class CMORManager(DataManager): cmorized = False if not self.config.cmor.force_untar: - while self._is_cmorized(startdate, member, chunk): + while self.is_cmorized(startdate, member, chunk): chunk += 1 cmorized = True -- GitLab From c33fbf847f9f284b40832537b05bae6d06358649 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 9 Jan 2017 12:52:42 +0100 Subject: [PATCH 03/15] Fixed small issues with the previous commit --- earthdiagnostics/cmorizer.py | 1 + earthdiagnostics/cmormanager.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index 6ccb3b87..ba16c31b 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -73,6 +73,7 @@ class Cmorizer(object): for tarfile in tar_files: if self.cmorization_required(self.get_chunk(os.path.basename(tarfile))): Log.info('No need to unpack file {0}/{1}'.format(count, len(tar_files))) + count += 1 continue Log.info('Unpacking oceanic file {0}/{1}'.format(count, len(tar_files))) diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index d5a37633..d624c22c 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -383,7 +383,6 @@ class CMORManager(DataManager): if not self.config.cmor.force_untar: while self.is_cmorized(startdate, member, chunk): chunk += 1 - cmorized = True while self._unpack_chunk(startdate, member, chunk): chunk += 1 -- GitLab From 1701c4dba3cda33648270be10b5a10e032cd5eb2 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 9 Jan 2017 12:59:22 +0100 Subject: [PATCH 04/15] CMORization of MMA files will now skip the already cmorized chunks --- earthdiagnostics/cmorizer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index ba16c31b..6cfbf749 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -61,6 +61,7 @@ class Cmorizer(object): if not self.cmor.ocean: Log.info('Skipping ocean cmorization due to configuration') return + Log.info('\nCMORizing ocean\n') self._cmorize_ocean_files('MMO') self._cmorize_ocean_files('PPO') self._cmorize_ocean_files('diags') @@ -134,6 +135,7 @@ class Cmorizer(object): Log.info('Skipping atmosphere cmorization due to configuration') return + Log.info('\nCMORizing atmosphere\n') if self.cmor.use_grib and self.gribfiles_available(): self._cmorize_grib_files() else: @@ -144,6 +146,10 @@ class Cmorizer(object): tar_files.sort() count = 1 for tarfile in tar_files: + if self.cmorization_required(self.get_chunk(os.path.basename(tarfile))): + Log.info('No need to unpack file {0}/{1}'.format(count, len(tar_files))) + count += 1 + continue Log.info('Unpacking atmospheric file {0}/{1}'.format(count, len(tar_files))) self._unpack_tar_file(tarfile) self._merge_mma_files(tarfile) -- GitLab From 86552ed58bf3a0b6fd82975b9974b7ca86673b02 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 9 Jan 2017 15:44:51 +0100 Subject: [PATCH 05/15] CMORization of grib files only done when required. Will skip only affected chunks on file errors --- earthdiagnostics/cmorizer.py | 55 +++++++++++++++++++++------------ earthdiagnostics/cmormanager.py | 28 +++++++++-------- earthdiagnostics/utils.py | 3 +- 3 files changed, 52 insertions(+), 34 deletions(-) diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index 6cfbf749..f1d4cd97 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -72,15 +72,18 @@ class Cmorizer(object): tar_files.sort() count = 1 for tarfile in tar_files: - if self.cmorization_required(self.get_chunk(os.path.basename(tarfile))): + if not self.cmorization_required(self.get_chunk(os.path.basename(tarfile)), ModelingRealms.ocean): Log.info('No need to unpack file {0}/{1}'.format(count, len(tar_files))) count += 1 continue Log.info('Unpacking oceanic file {0}/{1}'.format(count, len(tar_files))) - self._unpack_tar_file(tarfile) - self._cmorize_nc_files() - Log.result('Oceanic file {0}/{1} finished'.format(count, len(tar_files))) + try: + self._unpack_tar_file(tarfile) + self._cmorize_nc_files() + Log.result('Oceanic file {0}/{1} finished'.format(count, len(tar_files))) + except Exception as ex: + Log.error('Could not CMORize oceanic file {0}: {1}', count, ex) count += 1 def _cmorize_nc_files(self): @@ -146,16 +149,20 @@ class Cmorizer(object): tar_files.sort() count = 1 for tarfile in tar_files: - if self.cmorization_required(self.get_chunk(os.path.basename(tarfile))): + if self.cmorization_required(self.get_chunk(os.path.basename(tarfile)), ModelingRealms.atmos): Log.info('No need to unpack file {0}/{1}'.format(count, len(tar_files))) count += 1 continue Log.info('Unpacking atmospheric file {0}/{1}'.format(count, len(tar_files))) - self._unpack_tar_file(tarfile) - self._merge_mma_files(tarfile) - self._correct_fluxes() - self._cmorize_nc_files() - Log.result('Atmospheric file {0}/{1} finished'.format(count, len(tar_files))) + try: + self._unpack_tar_file(tarfile) + self._merge_mma_files(tarfile) + self._correct_fluxes() + self._cmorize_nc_files() + Log.result('Atmospheric file {0}/{1} finished'.format(count, len(tar_files))) + except Exception as ex: + Log.error('Could not cmorize atmospheric file {0}: {1}', count, ex) + count += 1 def _cmorize_grib_files(self): @@ -165,16 +172,20 @@ class Cmorizer(object): while os.path.exists(self.get_original_grib_path(chunk_start, 'GG')) or \ os.path.exists(self.get_original_grib_path(chunk_start, 'SH')): - if self.cmorization_required(chunk): + if self.cmorization_required(chunk, ModelingRealms.atmos): chunk_end = chunk_end_date(chunk_start, self.experiment.chunk_size, 'month', 'standard') chunk_end = previous_day(chunk_end, 'standard') Log.info('CMORizing chunk {0}-{1}', date2str(chunk_start), date2str(chunk_end)) - for grid in ('SH', 'GG'): - Log.info('Processing {0} variables', grid) - - if not os.path.exists(self.get_original_grib_path(chunk_start, grid)): - continue - self.cmorize_grib_file(chunk_end, chunk_start, grid) + try: + for grid in ('SH', 'GG'): + Log.info('Processing {0} variables', grid) + + if not os.path.exists(self.get_original_grib_path(chunk_start, grid)): + continue + self.cmorize_grib_file(chunk_end, chunk_start, grid) + except Exception as ex: + Log.error('Can not cmorize GRIB file for chunk {0}-{1}: {2}', + date2str(chunk_start), date2str(chunk_end), ex) chunk_start = chunk_end_date(chunk_start, self.experiment.chunk_size, 'month', 'standard') chunk += 1 @@ -355,7 +366,11 @@ class Cmorizer(object): file_parts = os.path.basename(file_path).split('_') if file_parts[0] in (self.experiment.expid, 'MMA', 'MMO') or file_parts[0].startswith('ORCA'): # Model output - return '{0}-{1}'.format(file_parts[2][0:6], file_parts[3][0:6]) + if file_parts[-1].endswith('.tar'): + file_parts = file_parts[-1][0:-4].split('-') + return '{0}-{1}'.format(file_parts[0][0:6], file_parts[1][0:6]) + else: + return '{0}-{1}'.format(file_parts[2][0:6], file_parts[3][0:6]) elif file_parts[1] == self.experiment.expid: # Files generated by the old version of the diagnostics return '{0}-{1}'.format(file_parts[4][0:6], file_parts[5][0:6]) @@ -569,8 +584,8 @@ class Cmorizer(object): gribfiles = glob.glob(grb_path) return len(gribfiles) > 0 - def cmorization_required(self, chunk): - return not self.config.cmor.force and self.data_manager.is_cmorized(self.startdate, self.member, chunk) + def cmorization_required(self, chunk, domain): + return self.config.cmor.force or not self.data_manager.is_cmorized(self.startdate, self.member, chunk, domain) class CMORException(Exception): diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index d624c22c..e418f67e 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -10,7 +10,7 @@ from autosubmit.date.chunk_date_lib import parse_date, chunk_start_date, chunk_e from earthdiagnostics.cmorizer import Cmorizer from earthdiagnostics.datamanager import DataManager, NetCDFFile from earthdiagnostics.frequency import Frequencies, Frequency -from earthdiagnostics.modelingrealm import ModelingRealm +from earthdiagnostics.modelingrealm import ModelingRealm, ModelingRealms from earthdiagnostics.utils import TempFile, Utils from earthdiagnostics.variable import Variable from earthdiagnostics.variable_type import VariableType @@ -345,22 +345,23 @@ class CMORManager(DataManager): if not self._unpack_cmor_files(startdate, member): self._cmorize_member(startdate, member) - def is_cmorized(self, startdate, member, chunk): - if (startdate, member, chunk) not in self._dic_cmorized: - self._dic_cmorized[(startdate, member, chunk)] = self._is_cmorized(startdate, member, chunk) - return self._dic_cmorized[(startdate, member, chunk)] + def is_cmorized(self, startdate, member, chunk, domain): + identifier = (startdate, member, chunk, domain.name) + if identifier not in self._dic_cmorized: + self._dic_cmorized[identifier] = self._is_cmorized(startdate, member, chunk, domain) + return self._dic_cmorized[identifier] - def _is_cmorized(self, startdate, member, chunk): + def _is_cmorized(self, startdate, member, chunk, domain): startdate_path = self._get_startdate_path(startdate) - if not os.path.exists(startdate_path): + if not os.path.isdir(startdate_path): return False for freq in os.listdir(startdate_path): - freq_path = os.path.join(startdate_path, freq) - for domain in os.listdir(freq_path): - domain_path = os.path.join(freq_path, domain) + domain_path = os.path.join(startdate_path, freq, + domain.name) + if os.path.isdir(domain_path): for var in os.listdir(domain_path): - var_path = self.get_file_path(startdate, member, ModelingRealm(domain), var, chunk, Frequency(freq)) - if os.path.exists(var_path): + var_path = self.get_file_path(startdate, member, domain, var, chunk, Frequency(freq)) + if os.path.isfile(var_path): return True return False @@ -381,7 +382,8 @@ class CMORManager(DataManager): cmorized = False if not self.config.cmor.force_untar: - while self.is_cmorized(startdate, member, chunk): + while self.is_cmorized(startdate, member, chunk, ModelingRealms.ocean) or\ + self.is_cmorized(startdate, member, chunk, ModelingRealms.atmos): chunk += 1 while self._unpack_chunk(startdate, member, chunk): diff --git a/earthdiagnostics/utils.py b/earthdiagnostics/utils.py index 81818c48..f8248ad1 100644 --- a/earthdiagnostics/utils.py +++ b/earthdiagnostics/utils.py @@ -287,7 +287,8 @@ class Utils(object): Log.log.log(log_level, line) output.append(line) if process.returncode != 0: - raise Utils.ExecutionError('Error executing {0}\n Return code: {1}', ' '.join(command), process.returncode) + raise Utils.ExecutionError('Error executing {0}\n Return code: {1}'.format(' '.join(command), + process.returncode)) return output _cpu_count = None -- GitLab From a45f400e3caf18efeba0b909467cb9fbb34c6360 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 9 Jan 2017 16:11:41 +0100 Subject: [PATCH 06/15] Fixed bugs --- earthdiagnostics/cmorizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index f1d4cd97..934d5845 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -149,7 +149,7 @@ class Cmorizer(object): tar_files.sort() count = 1 for tarfile in tar_files: - if self.cmorization_required(self.get_chunk(os.path.basename(tarfile)), ModelingRealms.atmos): + if not self.cmorization_required(self.get_chunk(os.path.basename(tarfile)), ModelingRealms.atmos): Log.info('No need to unpack file {0}/{1}'.format(count, len(tar_files))) count += 1 continue -- GitLab From 398b31476a681abbdc3d1b4f9bc4996173e24df5 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 9 Jan 2017 16:51:33 +0100 Subject: [PATCH 07/15] Fixed bugs due to change in Modelling Realm management --- earthdiagnostics/ocean/areamoc.py | 2 +- earthdiagnostics/ocean/convectionsites.py | 2 +- earthdiagnostics/ocean/gyres.py | 2 +- earthdiagnostics/ocean/heatcontent.py | 4 ++-- earthdiagnostics/ocean/verticalmean.py | 3 ++- earthdiagnostics/ocean/verticalmeanmeters.py | 2 +- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/earthdiagnostics/ocean/areamoc.py b/earthdiagnostics/ocean/areamoc.py index 763da95f..d2ea66ef 100644 --- a/earthdiagnostics/ocean/areamoc.py +++ b/earthdiagnostics/ocean/areamoc.py @@ -93,7 +93,7 @@ class AreaMoc(Diagnostic): cdo = Utils.cdo temp2 = TempFile.get() - temp = self.data_manager.get_file('ocean', 'vsftmyz', self.startdate, self.member, self.chunk) + temp = self.data_manager.get_file(ModelingRealms.ocean, 'vsftmyz', self.startdate, self.member, self.chunk) handler = Utils.openCdf(temp) if 'i' in handler.dimensions: diff --git a/earthdiagnostics/ocean/convectionsites.py b/earthdiagnostics/ocean/convectionsites.py index 876ec080..706db0cd 100644 --- a/earthdiagnostics/ocean/convectionsites.py +++ b/earthdiagnostics/ocean/convectionsites.py @@ -85,7 +85,7 @@ class ConvectionSites(Diagnostic): else: raise Exception("Input grid {0} not recognized".format(self.model_version)) - mlotst_file = self.data_manager.get_file('ocean', 'mlotst', self.startdate, self.member, self.chunk) + mlotst_file = self.data_manager.get_file(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk) output = TempFile.get() self.mlotst_handler = Utils.openCdf(mlotst_file) diff --git a/earthdiagnostics/ocean/gyres.py b/earthdiagnostics/ocean/gyres.py index 29b11398..4e1e384f 100644 --- a/earthdiagnostics/ocean/gyres.py +++ b/earthdiagnostics/ocean/gyres.py @@ -93,7 +93,7 @@ class Gyres(Diagnostic): raise Exception("Input grid {0} not recognized".format(self.model_version)) output = TempFile.get() - vsftbarot_file = self.data_manager.get_file('ocean', 'vsftbarot', self.startdate, self.member, self.chunk) + vsftbarot_file = self.data_manager.get_file(ModelingRealms.ocean, 'vsftbarot', self.startdate, self.member, self.chunk) handler_original = Utils.openCdf(vsftbarot_file) self.var_vsftbarot = handler_original.variables['vsftbarot'] diff --git a/earthdiagnostics/ocean/heatcontent.py b/earthdiagnostics/ocean/heatcontent.py index f7843cb0..bafb4252 100644 --- a/earthdiagnostics/ocean/heatcontent.py +++ b/earthdiagnostics/ocean/heatcontent.py @@ -90,9 +90,9 @@ class HeatContent(Diagnostic): Runs the diagnostic """ nco = Utils.nco - temperature_file = self.data_manager.get_file('ocean', 'thetao', self.startdate, self.member, self.chunk) + temperature_file = self.data_manager.get_file(ModelingRealms.ocean, 'thetao', self.startdate, self.member, self.chunk) if self.mxloption != 0: - mlotst_file = self.data_manager.get_file('ocean', 'mlotst', self.startdate, self.member, self.chunk) + mlotst_file = self.data_manager.get_file(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk) nco.ncks(input=mlotst_file, output=temperature_file, options='-A -v mlotst') para = list() diff --git a/earthdiagnostics/ocean/verticalmean.py b/earthdiagnostics/ocean/verticalmean.py index 9a58a8ec..4564bb0a 100644 --- a/earthdiagnostics/ocean/verticalmean.py +++ b/earthdiagnostics/ocean/verticalmean.py @@ -86,7 +86,8 @@ class VerticalMean(Diagnostic): Runs the diagnostic """ temp = TempFile.get() - variable_file = self.data_manager.get_file('ocean', self.variable, self.startdate, self.member, self.chunk) + variable_file = self.data_manager.get_file(ModelingRealms.ocean, self.variable, self.startdate, self.member, + self.chunk) handler = Utils.openCdf(variable_file) if self.box.min_depth is None: diff --git a/earthdiagnostics/ocean/verticalmeanmeters.py b/earthdiagnostics/ocean/verticalmeanmeters.py index 24eb1e63..172017e7 100644 --- a/earthdiagnostics/ocean/verticalmeanmeters.py +++ b/earthdiagnostics/ocean/verticalmeanmeters.py @@ -84,7 +84,7 @@ class VerticalMeanMeters(Diagnostic): Runs the diagnostic """ temp = TempFile.get() - variable_file = self.data_manager.get_file('ocean', self.variable, self.startdate, self.member, self.chunk) + variable_file = self.data_manager.get_file(ModelingRealms.ocean, self.variable, self.startdate, self.member, self.chunk) handler = Utils.openCdf(variable_file) if self.box.min_depth is None: -- GitLab From d6cf2e2385ec88aa31ccee9065a5c237bec6505c Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 9 Jan 2017 17:21:09 +0100 Subject: [PATCH 08/15] Corrected gaussian grids names in interpcdo --- earthdiagnostics/ocean/interpolatecdo.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/earthdiagnostics/ocean/interpolatecdo.py b/earthdiagnostics/ocean/interpolatecdo.py index bc90f291..de275ced 100644 --- a/earthdiagnostics/ocean/interpolatecdo.py +++ b/earthdiagnostics/ocean/interpolatecdo.py @@ -84,12 +84,12 @@ class InterpolateCDO(Diagnostic): @classmethod def _translate_ifs_grids_to_cdo_names(cls, target_grid): - if target_grid.startswith('T159L'): - target_grid = 't106' - if target_grid.startswith('T255L'): - target_grid = 't170' - if target_grid.startswith('T511L'): - target_grid = 't340' + if target_grid.upper().startswith('T159L'): + target_grid = 'n106' + if target_grid.upper().startswith('T255L'): + target_grid = 'n170' + if target_grid.upper().startswith('T511L'): + target_grid = 'n340' return target_grid def compute(self): -- GitLab From 0747eba517adbfc249922a887eb53c964c9d6675 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 9 Jan 2017 17:30:12 +0100 Subject: [PATCH 09/15] Fixed cdo interpolation --- earthdiagnostics/ocean/interpolatecdo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/earthdiagnostics/ocean/interpolatecdo.py b/earthdiagnostics/ocean/interpolatecdo.py index de275ced..68a04f58 100644 --- a/earthdiagnostics/ocean/interpolatecdo.py +++ b/earthdiagnostics/ocean/interpolatecdo.py @@ -85,11 +85,11 @@ class InterpolateCDO(Diagnostic): @classmethod def _translate_ifs_grids_to_cdo_names(cls, target_grid): if target_grid.upper().startswith('T159L'): - target_grid = 'n106' + target_grid = 't159grid' if target_grid.upper().startswith('T255L'): - target_grid = 'n170' + target_grid = 't255grid' if target_grid.upper().startswith('T511L'): - target_grid = 'n340' + target_grid = 't511grid' return target_grid def compute(self): -- GitLab From 6310f801311407adde1d66d35dbbc8fa5d00b727 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 9 Jan 2017 17:50:01 +0100 Subject: [PATCH 10/15] Fixed heatcontent output variable names --- earthdiagnostics/ocean/heatcontent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/earthdiagnostics/ocean/heatcontent.py b/earthdiagnostics/ocean/heatcontent.py index bafb4252..b5062f8d 100644 --- a/earthdiagnostics/ocean/heatcontent.py +++ b/earthdiagnostics/ocean/heatcontent.py @@ -76,7 +76,7 @@ class HeatContent(Diagnostic): DiagnosticIntOption('min_depth'), DiagnosticIntOption('max_depth')) options = cls.process_options(options, options_available) - box = Box(True) + box = Box(False) box.min_depth = options['min_depth'] box.max_depth = options['max_depth'] job_list = list() -- GitLab From eadd67b8577afc709fd693d7a89d771afc8e434c Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 10 Jan 2017 11:01:02 +0100 Subject: [PATCH 11/15] Added fluxes correction for hfls y hfss --- earthdiagnostics/cmorizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index 934d5845..6a255bf1 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -91,7 +91,7 @@ class Cmorizer(object): self._cmorize_nc_file(filename) def _correct_fluxes(self): - fluxes_vars = ("prsn", "rss", "rls", "rsscs", "rsds", "rlds") + fluxes_vars = ("prsn", "rss", "rls", "rsscs", "rsds", "rlds", "hfss", 'hfls') for filename in glob.glob(os.path.join(self.cmor_scratch, '*.nc')): handler = Utils.openCdf(filename) for varname in handler.variables.keys(): -- GitLab From 82cf0530f04b5e3f96b097240b79713da1633150 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 10 Jan 2017 12:16:29 +0100 Subject: [PATCH 12/15] Made parameter grid in scale optional again --- earthdiagnostics/general/scale.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/earthdiagnostics/general/scale.py b/earthdiagnostics/general/scale.py index 2f5e743d..ceaac656 100644 --- a/earthdiagnostics/general/scale.py +++ b/earthdiagnostics/general/scale.py @@ -74,7 +74,7 @@ class Scale(Diagnostic): DiagnosticFloatOption('offset'), DiagnosticDomainOption('domain'), DiagnosticOption('variable'), - DiagnosticOption('grid'), + DiagnosticOption('grid', ''), DiagnosticFloatOption('min_limit', float('nan')), DiagnosticFloatOption('max_limit', float('nan'))) options = cls.process_options(options, options_available) -- GitLab From 4b497cd8437363ab9bd6882615c9fcdcb4f7ed8b Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 11 Jan 2017 12:27:06 +0100 Subject: [PATCH 13/15] Added check for missing vars --- earthdiagnostics/cmor_tables/primavera.csv | 15 ------- earthdiagnostics/cmormanager.py | 11 +++++- earthdiagnostics/datamanager.py | 28 +++++++++++++ earthdiagnostics/earthdiags.py | 46 ++++++++++++++++++---- earthdiagnostics/threddsmanager.py | 42 ++++++++++++++++++++ earthdiagnostics/variable.py | 20 ++++++++-- 6 files changed, 135 insertions(+), 27 deletions(-) delete mode 100644 earthdiagnostics/cmor_tables/primavera.csv diff --git a/earthdiagnostics/cmor_tables/primavera.csv b/earthdiagnostics/cmor_tables/primavera.csv deleted file mode 100644 index 53180efd..00000000 --- a/earthdiagnostics/cmor_tables/primavera.csv +++ /dev/null @@ -1,15 +0,0 @@ -Variable,Shortname,Name,Long name,Domain,Basin,Units,Valid min,Valid max,Grid,Table -ibgheatco:sihc,hcicega,global mean ice heat content,Global mean ice heat content,seaIce,,,,,, -sbgheatco:sisnhc,hcsnga,global mean snow heat content,Global mean snow heat content,seaIce,,,,,, -iice_itd:siconc_cat:siitdconc:siconcat,siccat,ice_area_in_categories,Ice area in categories,seaIce,,,,,, -iicethic:sithic:sithick,sit,sea_ice_thickness,Sea Ice Thickness,seaIce,,m,,,, -iice_hid:sithic_cat:sithicat:siitdthick,sitcat,ice_thicknesss_in_categories,Ice thickness in categories,seaIce,,,,,, -iicevelo:sivelo:sispeed,sivelo,ice_velocity,Ice velocity,seaIce,,,,,, -iicevelu:sivelu:siu,sivelu,ice_velocity_u,Ice velocity u,seaIce,,,,,, -iicevelv:sivelv:siv,sivelv,ice_velocity_v,Ice velocity v,seaIce,,,,,, -sidivvel,siddivvel,divergence_of_sea_ice_velocity,Divergence of sea ice velocity,seaIce,,1e-8s^-1,,,, -sivolu:sivol,sivolu,sea_ice_volume_per_unit_gridcell_area,Sea ice volume per gridcell area unit,seaIce,,,,,, -iice_hsd:snthicats:siitdsnthick,sndcat,snow_thickness_in_categories,Snow thickness in in categories,seaIce,,,,,, -isnowthi:sisnthick,snthic,surface_snow_thickness,Surface snow thickness,seaIce,,,,,, -iicesali:iice_std:sisali,ssi,sea_ice_salinity,Sea ice salinity,seaIce,,psu,,,, -iicesurt:soicetem:sistem:sitemptop,tsice,surface_temperature,Surface temperature of sea ice,seaIce,,K,,,, diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index e418f67e..919c4afc 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -43,6 +43,15 @@ class CMORManager(DataManager): raise Exception('Can not find model data') self.cmor_path = os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles') + def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, + vartype=VariableType.MEAN): + filepath = self.get_file_path(startdate, member, domain, var, chunk, frequency, box, grid, None, None) + + try: + return os.path.isfile(filepath) + except Exception: + return False + def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN): """ @@ -488,7 +497,7 @@ class CMORManager(DataManager): vartype=VariableType.MEAN) else: for filename in os.listdir(filepath): - self._create_link(domain, os.path.join(filepath, filename), freq, var, "", False, + self._create_link(domain, os.path.join(filepath, filename), frequency, var, "", False, vartype=VariableType.MEAN) Log.debug('Links ready') diff --git a/earthdiagnostics/datamanager.py b/earthdiagnostics/datamanager.py index 16c310ab..3e6f6f3d 100644 --- a/earthdiagnostics/datamanager.py +++ b/earthdiagnostics/datamanager.py @@ -31,6 +31,34 @@ class DataManager(object): UnitConversion.load_conversions() self.lock = threading.Lock() + def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, + vartype=VariableType.MEAN): + """ + Checks if a given file exists + + :param domain: CMOR domain + :type domain: Domain + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str + :param box: file's box (only needed to retrieve sections or averages) + :type box: Box + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: Frequency + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: path to the copy created on the scratch folder + :rtype: str + """ + raise NotImplementedError() + def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN): """ diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 3cc62eab..857d08d6 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -20,6 +20,7 @@ from earthdiagnostics.diagnostic import Diagnostic from earthdiagnostics.ocean import * from earthdiagnostics.general import * from earthdiagnostics.statistics import * +from earthdiagnostics.variable import VariableManager class EarthDiags(object): @@ -72,6 +73,8 @@ class EarthDiags(object): help="opens documentation and exits") parser.add_argument('--clean', action='store_true', help="clean the scratch folder and exits") + parser.add_argument('--report', action='store_true', + help="generates a report about the available files") parser.add_argument('-lf', '--logfile', choices=('EVERYTHING', 'DEBUG', 'INFO', 'RESULT', 'USER_WARNING', 'WARNING', 'ERROR', 'CRITICAL', 'NO_LOG'), default='DEBUG', type=str, @@ -110,6 +113,8 @@ class EarthDiags(object): diags = EarthDiags(config_file_path) if args.clean: result = diags.clean() + elif args.report: + result = diags.report() else: result = diags.run() TempFile.clean() @@ -140,13 +145,7 @@ class EarthDiags(object): self._register_diagnostics() - parse_date('20000101') - - if self.config.data_adaptor == 'CMOR': - self.data_manager = CMORManager(self.config) - elif self.config.data_adaptor == 'THREDDS': - self.data_manager = THREDDSManager(self.config) - self.data_manager.prepare() + self._prepare_data_manager() # Run diagnostics Log.info('Running diagnostics') @@ -173,6 +172,13 @@ class EarthDiags(object): self.print_stats() return self.had_errors + def _prepare_data_manager(self): + if self.config.data_adaptor == 'CMOR': + self.data_manager = CMORManager(self.config) + elif self.config.data_adaptor == 'THREDDS': + self.data_manager = THREDDSManager(self.config) + self.data_manager.prepare() + def print_stats(self): Log.info('Time consumed by each diagnostic class') Log.info('--------------------------------------') @@ -247,6 +253,30 @@ class EarthDiags(object): Log.result('Scratch folder removed') return True + def report(self): + Log.info('Looking for existing vars...') + results = dict() + self._prepare_data_manager() + for startdate in self.config.experiment.startdates: + for member in self.config.experiment.members: + results[(startdate, member)] = self._get_variable_report(startdate, member) + + Log.result('Report finished') + return True + + def _get_variable_report(self, startdate, member): + var_manager = VariableManager() + results = list() + for var in var_manager.get_all_variables(): + if var.priority is None or var.domain is None: + continue + for table in var.tables: + if not self.data_manager.file_exists(var.domain, var.short_name, startdate, member, 1, + frequency=table.frequency): + Log.warning('Missing var {0} from table {1} (Priority {2})', var, table, var.priority) + results.append((var, table)) + return results + def _run_jobs(self, queue, numthread): def _run_job(current_job, retrials=1): while retrials >= 0: @@ -336,6 +366,8 @@ class EarthDiags(object): Log.info('File {0} ready', destiny) + + def main(): if not EarthDiags.parse_args(): exit(1) diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 73745ed0..993a4196 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -51,6 +51,40 @@ class THREDDSManager(DataManager): Utils.cdo.selmonth(selected_months, input=thredds_subset, output=temp) return temp + def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, + vartype=VariableType.MEAN): + """ + Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy + + :param domain: CMOR domain + :type domain: str + :param var: variable name + :type var: str + :param startdate: file's startdate + :type startdate: str + :param member: file's member + :type member: int + :param chunk: file's chunk + :type chunk: int + :param grid: file's grid (only needed if it is not the original) + :type grid: str + :param box: file's box (only needed to retrieve sections or averages) + :type box: Box + :param frequency: file's frequency (only needed if it is different from the default) + :type frequency: Frequency + :param vartype: Variable type (mean, statistic) + :type vartype: VariableType + :return: path to the copy created on the scratch folder + :rtype: str + """ + aggregation_path = self.get_var_url(var, startdate, frequency, box, vartype) + + start_chunk = chunk_start_date(parse_date(startdate), chunk, self.experiment.chunk_size, 'month', 'standard') + end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', 'standard') + + thredds_subset = THREDDSSubset(aggregation_path, var, start_chunk, end_chunk) + return thredds_subset.check() + def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=VariableType.MEAN): """ @@ -300,6 +334,14 @@ class THREDDSSubset: url = self.get_url() return self._download_url(url) + def check(self): + try: + self.handler = Utils.openCdf(self.get_url()) + self.handler.close() + return True + except Exception: + return False + def _read_metadata(self): self.var_dimensions = self.handler.variables[self.var].dimensions for dimension in self.var_dimensions: diff --git a/earthdiagnostics/variable.py b/earthdiagnostics/variable.py index c9e370e4..30ae1154 100644 --- a/earthdiagnostics/variable.py +++ b/earthdiagnostics/variable.py @@ -50,6 +50,15 @@ class VariableManager(object): Log.warning('Variable {0} is not defined in the CMOR table. Please add it'.format(original_name)) return None + def get_all_variables(self): + """ + Returns all variables + + :return: CMOR variable list + :rtype: set[Variable] + """ + return set(self._dict_variables.values()) + def get_variable_and_alias(self, original_name, silent=False): """ Returns the cmor variable instance given a variable name @@ -229,16 +238,17 @@ class VariableManager(object): if sheet['A1'].value != 'Priority': continue table_frequency, table_date = table_data[sheet.title] - + table = CMORTable(sheet.title, table_frequency, table_date) for row in sheet.rows: if row[0].value == 'Priority' or not row[5].value: continue if row[5].value.lower() in self._dict_variables: - self._dict_variables[row[5].value.lower()].tables.append(sheet.title) + self._dict_variables[row[5].value.lower()].tables.append(table) continue var = Variable() + var.priority = row[0].value var.short_name = row[5].value var.standard_name = row[6].value var.long_name = row[1].value @@ -246,7 +256,7 @@ class VariableManager(object): self._process_modelling_realm(var, row[12].value) var.units = row[2].value - var.tables.append(CMORTable(sheet.title, table_frequency, table_date)) + var.tables.append(table) self._dict_variables[var.short_name.lower()] = var def _process_modelling_realm(self, var, value): @@ -282,7 +292,9 @@ class Variable(object): self.valid_min = None self.valid_max = None self.grid = None + self.priority = None self.default = False + self.domain = None self.known_aliases = [] self.tables = [] @@ -353,7 +365,7 @@ class VariableAlias(object): class CMORTable(object): def __init__(self, name, frequency, date): self.name = name - self.frequency = frequency + self.frequency = Frequency.parse(frequency) self.date = date def __str__(self): -- GitLab From e94cee33e38453e8c8011373e1a1093f917bc02c Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 11 Jan 2017 15:10:56 +0100 Subject: [PATCH 14/15] Report functionality completed --- earthdiagnostics/earthdiags.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 857d08d6..8a1c04cd 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -255,11 +255,12 @@ class EarthDiags(object): def report(self): Log.info('Looking for existing vars...') - results = dict() self._prepare_data_manager() for startdate in self.config.experiment.startdates: for member in self.config.experiment.members: - results[(startdate, member)] = self._get_variable_report(startdate, member) + results = self._get_variable_report(startdate, member) + report_path = os.path.join(self.config.scratch_dir, '{0}_fc{1}.report'.format(startdate, member)) + self.create_report(report_path, results) Log.result('Report finished') return True @@ -273,10 +274,33 @@ class EarthDiags(object): for table in var.tables: if not self.data_manager.file_exists(var.domain, var.short_name, startdate, member, 1, frequency=table.frequency): - Log.warning('Missing var {0} from table {1} (Priority {2})', var, table, var.priority) results.append((var, table)) return results + def create_report(self, report_path, results): + current_table = None + current_priority = 0 + results = sorted(results, key=lambda result: result[0].short_name) + results = sorted(results, key=lambda result: result[0].priority) + results = sorted(results, key=lambda result: result[1].name) + + file_handler = open(report_path, 'w') + + for var, table in results: + if current_table != table.name: + file_handler.write('\nTable {0}\n'.format(table.name)) + file_handler.write('===================================\n') + current_table = table.name + current_priority = 0 + + if current_priority != var.priority: + file_handler.write('\nMissing variables with priority {0}:\n'.format(var.priority)) + file_handler.write('--------------------------------------\n') + current_priority = var.priority + + file_handler.write('{0:12}: {1}\n'.format(var.short_name, var.standard_name)) + file_handler.close() + def _run_jobs(self, queue, numthread): def _run_job(current_job, retrials=1): while retrials >= 0: -- GitLab From 207d3a7e757d8d94bc3525257faa2d9e76495219 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 11 Jan 2017 15:34:15 +0100 Subject: [PATCH 15/15] Cleaned code --- earthdiagnostics/cmorizer.py | 2 -- earthdiagnostics/cmormanager.py | 8 ++++---- earthdiagnostics/earthdiags.py | 2 -- earthdiagnostics/frequency.py | 1 + earthdiagnostics/modelingrealm.py | 2 +- earthdiagnostics/ocean/convectionsites.py | 3 ++- earthdiagnostics/ocean/gyres.py | 3 ++- earthdiagnostics/ocean/heatcontent.py | 6 ++++-- earthdiagnostics/ocean/verticalmeanmeters.py | 3 ++- earthdiagnostics/threddsmanager.py | 15 +++++++++++++++ earthdiagnostics/variable_type.py | 1 + test/unit/test_variable.py | 8 ++++---- 12 files changed, 36 insertions(+), 18 deletions(-) diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index 6a255bf1..9ed85f87 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -389,8 +389,6 @@ class Cmorizer(object): raise Exception('File {0} start date is not a valid chunk start date'.format(file_path)) return chunk - - @staticmethod def _add_coordinate_variables(handler, temp): handler_cmor = Utils.openCdf(temp) diff --git a/earthdiagnostics/cmormanager.py b/earthdiagnostics/cmormanager.py index 919c4afc..86259cc4 100644 --- a/earthdiagnostics/cmormanager.py +++ b/earthdiagnostics/cmormanager.py @@ -10,9 +10,8 @@ from autosubmit.date.chunk_date_lib import parse_date, chunk_start_date, chunk_e from earthdiagnostics.cmorizer import Cmorizer from earthdiagnostics.datamanager import DataManager, NetCDFFile from earthdiagnostics.frequency import Frequencies, Frequency -from earthdiagnostics.modelingrealm import ModelingRealm, ModelingRealms +from earthdiagnostics.modelingrealm import ModelingRealms from earthdiagnostics.utils import TempFile, Utils -from earthdiagnostics.variable import Variable from earthdiagnostics.variable_type import VariableType @@ -47,6 +46,7 @@ class CMORManager(DataManager): vartype=VariableType.MEAN): filepath = self.get_file_path(startdate, member, domain, var, chunk, frequency, box, grid, None, None) + # noinspection PyBroadException try: return os.path.isfile(filepath) except Exception: @@ -497,8 +497,8 @@ class CMORManager(DataManager): vartype=VariableType.MEAN) else: for filename in os.listdir(filepath): - self._create_link(domain, os.path.join(filepath, filename), frequency, var, "", False, - vartype=VariableType.MEAN) + self._create_link(domain, os.path.join(filepath, filename), frequency, var, "", + False, vartype=VariableType.MEAN) Log.debug('Links ready') def _get_startdate_path(self, startdate): diff --git a/earthdiagnostics/earthdiags.py b/earthdiagnostics/earthdiags.py index 8a1c04cd..990b9213 100755 --- a/earthdiagnostics/earthdiags.py +++ b/earthdiagnostics/earthdiags.py @@ -390,8 +390,6 @@ class EarthDiags(object): Log.info('File {0} ready', destiny) - - def main(): if not EarthDiags.parse_args(): exit(1) diff --git a/earthdiagnostics/frequency.py b/earthdiagnostics/frequency.py index 3099ce3d..12e1cbe4 100644 --- a/earthdiagnostics/frequency.py +++ b/earthdiagnostics/frequency.py @@ -1,3 +1,4 @@ +# coding=utf-8 from earthdiagnostics.variable_type import VariableType diff --git a/earthdiagnostics/modelingrealm.py b/earthdiagnostics/modelingrealm.py index 1df08d02..3a970eee 100644 --- a/earthdiagnostics/modelingrealm.py +++ b/earthdiagnostics/modelingrealm.py @@ -1,7 +1,7 @@ +# coding=utf-8 from earthdiagnostics.frequency import Frequencies - class ModelingRealm(object): @staticmethod diff --git a/earthdiagnostics/ocean/convectionsites.py b/earthdiagnostics/ocean/convectionsites.py index 706db0cd..ca140c22 100644 --- a/earthdiagnostics/ocean/convectionsites.py +++ b/earthdiagnostics/ocean/convectionsites.py @@ -85,7 +85,8 @@ class ConvectionSites(Diagnostic): else: raise Exception("Input grid {0} not recognized".format(self.model_version)) - mlotst_file = self.data_manager.get_file(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk) + mlotst_file = self.data_manager.get_file(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, + self.chunk) output = TempFile.get() self.mlotst_handler = Utils.openCdf(mlotst_file) diff --git a/earthdiagnostics/ocean/gyres.py b/earthdiagnostics/ocean/gyres.py index 4e1e384f..c7eb720f 100644 --- a/earthdiagnostics/ocean/gyres.py +++ b/earthdiagnostics/ocean/gyres.py @@ -93,7 +93,8 @@ class Gyres(Diagnostic): raise Exception("Input grid {0} not recognized".format(self.model_version)) output = TempFile.get() - vsftbarot_file = self.data_manager.get_file(ModelingRealms.ocean, 'vsftbarot', self.startdate, self.member, self.chunk) + vsftbarot_file = self.data_manager.get_file(ModelingRealms.ocean, 'vsftbarot', self.startdate, + self.member, self.chunk) handler_original = Utils.openCdf(vsftbarot_file) self.var_vsftbarot = handler_original.variables['vsftbarot'] diff --git a/earthdiagnostics/ocean/heatcontent.py b/earthdiagnostics/ocean/heatcontent.py index b5062f8d..a82a4da6 100644 --- a/earthdiagnostics/ocean/heatcontent.py +++ b/earthdiagnostics/ocean/heatcontent.py @@ -90,9 +90,11 @@ class HeatContent(Diagnostic): Runs the diagnostic """ nco = Utils.nco - temperature_file = self.data_manager.get_file(ModelingRealms.ocean, 'thetao', self.startdate, self.member, self.chunk) + temperature_file = self.data_manager.get_file(ModelingRealms.ocean, 'thetao', self.startdate, + self.member, self.chunk) if self.mxloption != 0: - mlotst_file = self.data_manager.get_file(ModelingRealms.ocean, 'mlotst', self.startdate, self.member, self.chunk) + mlotst_file = self.data_manager.get_file(ModelingRealms.ocean, 'mlotst', self.startdate, + self.member, self.chunk) nco.ncks(input=mlotst_file, output=temperature_file, options='-A -v mlotst') para = list() diff --git a/earthdiagnostics/ocean/verticalmeanmeters.py b/earthdiagnostics/ocean/verticalmeanmeters.py index 172017e7..3f280356 100644 --- a/earthdiagnostics/ocean/verticalmeanmeters.py +++ b/earthdiagnostics/ocean/verticalmeanmeters.py @@ -84,7 +84,8 @@ class VerticalMeanMeters(Diagnostic): Runs the diagnostic """ temp = TempFile.get() - variable_file = self.data_manager.get_file(ModelingRealms.ocean, self.variable, self.startdate, self.member, self.chunk) + variable_file = self.data_manager.get_file(ModelingRealms.ocean, self.variable, self.startdate, self.member, + self.chunk) handler = Utils.openCdf(variable_file) if self.box.min_depth is None: diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 993a4196..de75cd02 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -255,6 +255,20 @@ class THREDDSManager(DataManager): return thredds_subset.download() def get_var_url(self, var, startdate, frequency, box, vartype): + """ + Get url for dataset + :param var: variable to retrieve + :type var: str + :param startdate: startdate to retrieve + :type startdate: str + :param frequency: frequency to get: + :type frequency: Frequency | None + :param box: box to get + :type box: Box + :param vartype: type of variable + :type vartype: VariableType + :return: + """ if not frequency: frequency = self.config.frequency var = self._get_final_var_name(box, var) @@ -335,6 +349,7 @@ class THREDDSSubset: return self._download_url(url) def check(self): + # noinspection PyBroadException try: self.handler = Utils.openCdf(self.get_url()) self.handler.close() diff --git a/earthdiagnostics/variable_type.py b/earthdiagnostics/variable_type.py index ae9c61fc..4b3f17da 100644 --- a/earthdiagnostics/variable_type.py +++ b/earthdiagnostics/variable_type.py @@ -1,3 +1,4 @@ +# coding=utf-8 class VariableType(object): MEAN = 1 STATISTIC = 2 diff --git a/test/unit/test_variable.py b/test/unit/test_variable.py index 3951dfa6..7f3e0b84 100644 --- a/test/unit/test_variable.py +++ b/test/unit/test_variable.py @@ -1,8 +1,8 @@ # coding=utf-8 -from unittest import TestCase - -from earthdiagnostics.variable import Variable -from earthdiagnostics.modelingrealm import ModelingRealms +# from unittest import TestCase +# +# from earthdiagnostics.variable import Variable +# from earthdiagnostics.modelingrealm import ModelingRealms # class TestVariable(TestCase): -- GitLab