From 274a9cede9215392e697f3216a3ca53696dbd1db Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 22 Nov 2016 13:26:23 +0100 Subject: [PATCH 1/8] Added some changes to --- diags.conf | 13 +++++++------ earthdiagnostics/config.py | 6 +++++- earthdiagnostics/threddsmanager.py | 19 +++++++++++-------- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/diags.conf b/diags.conf index 1b3cae91..f57b1cc3 100644 --- a/diags.conf +++ b/diags.conf @@ -4,7 +4,8 @@ DATA_ADAPTOR = THREDDS # Path to the folder where you want to create the temporary files SCRATCH_DIR = /scratch/Earth/$USER # Root path for the cmorized data to use -DATA_DIR = /esnas/exp/:/esarchive/exp/ +DATA_DIR = /esnas:/esarchive +DATA_TYPE = exp # Path to NEMO's mask and grid files needed for CDFTools CON_FILES = /esnas/autosubmit/con_files/ @@ -63,8 +64,8 @@ SERVER_URL = http://earth.bsc.es/thredds [EXPERIMENT] # Experiments parameters as defined in CMOR standard -INSTITUTE = meteofrance -MODEL = system4_m1 +INSTITUTE = ecmwf +MODEL = erainterim # Model version: Available versions MODEL_VERSION =Ec2.3_O1L46 # Atmospheric output timestep in hours @@ -81,11 +82,11 @@ OCEAN_TIMESTEP = 6 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment EXPID = resilience -STARTDATES = 19911101 +STARTDATES = 19790101 MEMBERS = 0 MEMBER_DIGITS = 1 -CHUNK_SIZE = 7 -CHUNKS = 1 +CHUNK_SIZE = 1 +CHUNKS = 10 # CHUNKS = 1 diff --git a/earthdiagnostics/config.py b/earthdiagnostics/config.py index 152fbce4..ab206c12 100644 --- a/earthdiagnostics/config.py +++ b/earthdiagnostics/config.py @@ -29,13 +29,17 @@ class Config(object): "Scratch folder path" self.data_dir = Utils.expand_path(parser.get_option('DIAGNOSTICS', 'DATA_DIR')) "Root data folder path" + self.data_type = Utils.expand_path(parser.get_option('DIAGNOSTICS', 'DATA_TYPE', 'exp')).lower() + "Data type (experiment, observation or reconstruction)" + if self.data_type not in ('exp', 'obs', 'recon'): + raise Exception('Data type must be exp, obs or recon') self.con_files = Utils.expand_path(parser.get_option('DIAGNOSTICS', 'CON_FILES')) "Mask and meshes folder path" self._diags = parser.get_option('DIAGNOSTICS', 'DIAGS') self.frequency = parser.get_option('DIAGNOSTICS', 'FREQUENCY').lower() + "Default data frequency to be used by the diagnostics" if self.frequency == 'month': self.frequency = 'mon' - "Default data frequency to be used by the diagnostics" self.cdftools_path = Utils.expand_path(parser.get_option('DIAGNOSTICS', 'CDFTOOLS_PATH')) "Path to CDFTOOLS executables" self.max_cores = parser.get_int_option('DIAGNOSTICS', 'MAX_CORES', 100000) diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index cb60f22b..3e6152e4 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -19,7 +19,7 @@ class THREDDSManager(DataManager): data_folders = self.config.data_dir.split(':') self.config.data_dir = None for data_folder in data_folders: - if os.path.isdir(os.path.join(data_folder, self.experiment.institute.lower(), + if os.path.isdir(os.path.join(data_folder, self.config.data_type, self.experiment.institute.lower(), self.experiment.model.lower())): self.config.data_dir = data_folder break @@ -160,16 +160,13 @@ class THREDDSManager(DataManager): var = self._get_final_var_name(box, var) folder_path = self._get_folder_path(frequency, domain, var, grid, vartype) - if startdate: - file_name = '{0}_{1}.nc'.format(var, startdate) - else: - file_name = '{0}.nc'.format(var) + file_name = self._get_file_name(startdate, var) filepath = os.path.join(folder_path, file_name) return filepath def _get_folder_path(self, frequency, domain, variable, grid, vartype): - folder_path = os.path.join(self.config.data_dir, + folder_path = os.path.join(self.config.data_dir, self.config.data_type, self.experiment.institute.lower(), self.experiment.model.lower(), self.frequency_folder_name(frequency, vartype), @@ -202,9 +199,15 @@ class THREDDSManager(DataManager): protocol = 'fileServer' else: protocol = 'dodsC' - return os.path.join(self.server_url, protocol, 'exp', self.experiment.institute, + return os.path.join(self.server_url, protocol, self.config.data_type, self.experiment.institute, self.experiment.model, self.frequency_folder_name(frequency, vartype), - var, '{0}_{1}.nc'.format(var, startdate)) + var, self._get_file_name(startdate, var)) + + def _get_file_name(self, startdate, var): + if startdate and self.config.data_type == 'exp': + return '{0}_{1}.nc'.format(var, startdate) + else: + return '{0}.nc'.format(var) def link_file(self, domain, var, startdate, member, chunk=None, grid=None, box=None, frequency=None, year=None, date_str=None, move_old=False, vartype=VarType.MEAN): -- GitLab From 793cc7aff2dd6ff61672d084e05bf2233d05624f Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 23 Nov 2016 15:12:22 +0100 Subject: [PATCH 2/8] Changed thredds manager to use dodosC and nccopy to retrieve files --- diags.conf | 2 +- earthdiagnostics/threddsmanager.py | 52 +++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/diags.conf b/diags.conf index f57b1cc3..886679cb 100644 --- a/diags.conf +++ b/diags.conf @@ -5,7 +5,7 @@ DATA_ADAPTOR = THREDDS SCRATCH_DIR = /scratch/Earth/$USER # Root path for the cmorized data to use DATA_DIR = /esnas:/esarchive -DATA_TYPE = exp +DATA_TYPE = recon # Path to NEMO's mask and grid files needed for CDFTools CON_FILES = /esnas/autosubmit/con_files/ diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 3e6152e4..5d6e6081 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -1,6 +1,7 @@ # coding=utf-8 import os -from autosubmit.date.chunk_date_lib import parse_date, add_months +from autosubmit.config.log import Log +from autosubmit.date.chunk_date_lib import parse_date, add_months, chunk_start_date, chunk_end_date from earthdiagnostics.datamanager import DataManager, NetCDFFile from earthdiagnostics.utils import TempFile, Utils @@ -30,7 +31,7 @@ class THREDDSManager(DataManager): def get_leadtimes(self, domain, variable, startdate, member, leadtimes, frequency=None, vartype=VarType.MEAN): if not frequency: frequency = self.config.frequency - aggregation_path = self.get_var_url(variable, startdate, frequency, None, False, vartype) + aggregation_path = self.get_var_url(variable, startdate, frequency, None, vartype) temp = TempFile.get() startdate = parse_date(startdate) selected_months = ','.join([str(add_months(startdate, i, 'standard').month) for i in leadtimes]) @@ -65,9 +66,38 @@ class THREDDSManager(DataManager): """ if not frequency: frequency = self.config.frequency - aggregation_path = self.get_var_url(var, startdate, frequency, box, True, vartype) + aggregation_path = self.get_var_url(var, startdate, frequency, box, vartype) temp = TempFile.get() - urllib.urlretrieve(aggregation_path, temp) + + start_chunk = chunk_start_date(parse_date(startdate), chunk, self.experiment.chunk_size, 'month', 'standard') + end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', 'standard') + + handler = Utils.openCdf(aggregation_path) + times = Utils.get_datetime_from_netcdf(handler) + lat_size = handler.dimensions['latitude'].size + lon_size = handler.dimensions['longitude'].size + handler.close() + time_start = 0 + while time_start < times.size and times[time_start] < start_chunk: + time_start += 1 + if time_start == times.size: + raise Exception('Timesteps not available for chunk {0}'.format(chunk)) + + time_end = time_start + + if times[time_end] >= end_chunk: + raise Exception('Timesteps not available for chunk {0}'.format(chunk)) + + while time_end < times.size-1 and times[time_end+1] < end_chunk: + time_end += 1 + + slice_path = '{0}?time[{1},1,{2}],latitude[0,1,{3}],longitude[0,1,{4}],' \ + '{5}[{1},1,{2}][0,1,{3}][0,1,{4}],{5}'.format(aggregation_path, time_start, time_end, + lat_size, lon_size, var) + Log.debug(slice_path) + + Utils.nco.nccopy(slice_path, temp) + if not Utils.check_netcdf_file(temp): raise THREDDSError('Can not retrieve {0} from server'.format(aggregation_path)) return temp @@ -193,15 +223,15 @@ class THREDDSManager(DataManager): :return: """ - def get_var_url(self, var, startdate, frequency, box, fileserver, vartype): + def get_var_url(self, var, startdate, frequency, box, vartype): var = self._get_final_var_name(box, var) - if fileserver: - protocol = 'fileServer' + full_path = os.path.join(self.server_url, 'dodsC', self.config.data_type, self.experiment.institute, + self.experiment.model, self.frequency_folder_name(frequency, vartype)) + if self.config.data_type == 'exp': + full_path = os.path.join(full_path, var, self._get_file_name(startdate, var)) else: - protocol = 'dodsC' - return os.path.join(self.server_url, protocol, self.config.data_type, self.experiment.institute, - self.experiment.model, self.frequency_folder_name(frequency, vartype), - var, self._get_file_name(startdate, var)) + full_path = os.path.join(full_path, self._get_file_name(startdate, var)) + return full_path def _get_file_name(self, startdate, var): if startdate and self.config.data_type == 'exp': -- GitLab From 29e8c6ddaf11c914d8bd759d5030413e417f45c9 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 23 Nov 2016 16:10:22 +0100 Subject: [PATCH 3/8] Added support for obs and recon to THREDDSmanager --- earthdiagnostics/threddsmanager.py | 35 ++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 5d6e6081..321d2f2e 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -1,7 +1,7 @@ # coding=utf-8 import os from autosubmit.config.log import Log -from autosubmit.date.chunk_date_lib import parse_date, add_months, chunk_start_date, chunk_end_date +from autosubmit.date.chunk_date_lib import parse_date, add_months, chunk_start_date, chunk_end_date, date2str from earthdiagnostics.datamanager import DataManager, NetCDFFile from earthdiagnostics.utils import TempFile, Utils @@ -28,6 +28,10 @@ class THREDDSManager(DataManager): if not self.config.data_dir: raise Exception('Can not find model data') + if self.config.data_type in ('obs', 'recon') and self.experiment.chunk_size !=1 : + raise Exception('For obs and recon data chunk_size must be always 1') + + def get_leadtimes(self, domain, variable, startdate, member, leadtimes, frequency=None, vartype=VarType.MEAN): if not frequency: frequency = self.config.frequency @@ -74,8 +78,8 @@ class THREDDSManager(DataManager): handler = Utils.openCdf(aggregation_path) times = Utils.get_datetime_from_netcdf(handler) - lat_size = handler.dimensions['latitude'].size - lon_size = handler.dimensions['longitude'].size + lat_size = handler.dimensions['latitude'].size -1 + lon_size = handler.dimensions['longitude'].size -1 handler.close() time_start = 0 while time_start < times.size and times[time_start] < start_chunk: @@ -91,12 +95,12 @@ class THREDDSManager(DataManager): while time_end < times.size-1 and times[time_end+1] < end_chunk: time_end += 1 - slice_path = '{0}?time[{1},1,{2}],latitude[0,1,{3}],longitude[0,1,{4}],' \ - '{5}[{1},1,{2}][0,1,{3}][0,1,{4}],{5}'.format(aggregation_path, time_start, time_end, - lat_size, lon_size, var) + slice_path = '{0}?time[{1}:1:{2}],latitude[0:1:{3}],longitude[0:1:{4}],' \ + '{5}[{1}:1:{2}][0:1:{3}][0:1:{4}]'.format(aggregation_path, time_start, time_end, + lat_size, lon_size, var) Log.debug(slice_path) - Utils.nco.nccopy(slice_path, temp) + Utils.execute_shell_command(['nccopy', slice_path, temp]) if not Utils.check_netcdf_file(temp): raise THREDDSError('Can not retrieve {0} from server'.format(aggregation_path)) @@ -158,7 +162,10 @@ class THREDDSManager(DataManager): if not frequency: frequency = self.config.frequency - filepath = self.get_file_path(startdate, domain, var, frequency, vartype, box, grid) + start_chunk = chunk_start_date(parse_date(startdate), chunk, self.experiment.chunk_size, 'month', + 'standard') + + filepath = self.get_file_path(date2str(start_chunk)[0:6], domain, var, frequency, vartype, box, grid) netcdf_file = NetCDFFile(filepath, filetosend, domain, var, cmor_var) if diagnostic: netcdf_file.add_diagnostic_history(diagnostic) @@ -196,11 +203,17 @@ class THREDDSManager(DataManager): return filepath def _get_folder_path(self, frequency, domain, variable, grid, vartype): + + if self.config.data_type == 'exp': + var_folder = self.get_varfolder(domain, variable, grid) + else: + var_folder = variable + folder_path = os.path.join(self.config.data_dir, self.config.data_type, self.experiment.institute.lower(), self.experiment.model.lower(), self.frequency_folder_name(frequency, vartype), - self.get_varfolder(domain, variable, grid)) + var_folder) return folder_path def get_year(self, domain, var, startdate, member, year, grid=None, box=None): @@ -230,11 +243,11 @@ class THREDDSManager(DataManager): if self.config.data_type == 'exp': full_path = os.path.join(full_path, var, self._get_file_name(startdate, var)) else: - full_path = os.path.join(full_path, self._get_file_name(startdate, var)) + full_path = os.path.join(full_path, self._get_file_name(None, var)) return full_path def _get_file_name(self, startdate, var): - if startdate and self.config.data_type == 'exp': + if startdate: return '{0}_{1}.nc'.format(var, startdate) else: return '{0}.nc'.format(var) -- GitLab From 05ff93fc292765b413be6e5b5389e10e5a143594 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Wed, 23 Nov 2016 17:27:47 +0100 Subject: [PATCH 4/8] Fixed cmorizer bug --- diags.conf | 2 +- earthdiagnostics/cmorizer.py | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/diags.conf b/diags.conf index 886679cb..a49265cc 100644 --- a/diags.conf +++ b/diags.conf @@ -12,7 +12,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnpostics just to CMORize, leave it # empty -DIAGS = monpercent,atmos,sfcWind,90 monpercent,atmos,sfcWind,10 +DIAGS = monpercent,atmos,sfcWind,66 monpercent,atmos,sfcWind,33 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/cmorizer.py b/earthdiagnostics/cmorizer.py index 88c9746b..02deb036 100644 --- a/earthdiagnostics/cmorizer.py +++ b/earthdiagnostics/cmorizer.py @@ -70,7 +70,6 @@ class Cmorizer(object): count = 1 for tarfile in tar_files: Log.info('Unpacking oceanic file {0}/{1}'.format(count, len(tar_files))) - self._check_cmorization_required() self._unpack_tar_file(tarfile) self._cmorize_nc_files() Log.result('Oceanic file {0}/{1} finished'.format(count, len(tar_files))) @@ -532,9 +531,6 @@ class Cmorizer(object): gribfiles = glob.glob(grb_path) return len(gribfiles) > 0 - def _check_cmorization_required(self, tarfile): - pass - class CMORException(Exception): pass -- GitLab From 7db3b8092332dc71839a027c08cdb0429eead75c Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 24 Nov 2016 12:03:47 +0100 Subject: [PATCH 5/8] Added THREDDSSubset class --- earthdiagnostics/threddsmanager.py | 129 ++++++++++++++++++++--------- 1 file changed, 90 insertions(+), 39 deletions(-) diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 321d2f2e..4b5a1fad 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -1,11 +1,10 @@ # coding=utf-8 import os -from autosubmit.config.log import Log from autosubmit.date.chunk_date_lib import parse_date, add_months, chunk_start_date, chunk_end_date, date2str from earthdiagnostics.datamanager import DataManager, NetCDFFile from earthdiagnostics.utils import TempFile, Utils -import urllib +from datetime import datetime from earthdiagnostics.variable import Variable, VarType @@ -31,16 +30,19 @@ class THREDDSManager(DataManager): if self.config.data_type in ('obs', 'recon') and self.experiment.chunk_size !=1 : raise Exception('For obs and recon data chunk_size must be always 1') - def get_leadtimes(self, domain, variable, startdate, member, leadtimes, frequency=None, vartype=VarType.MEAN): - if not frequency: - frequency = self.config.frequency + aggregation_path = self.get_var_url(variable, startdate, frequency, None, vartype) - temp = TempFile.get() startdate = parse_date(startdate) + start_chunk = chunk_start_date(startdate, self.experiment.chunks, self.experiment.chunk_size, 'month', 'standard') + end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', 'standard') + + thredds_subset = THREDDSSubset(aggregation_path, variable, startdate, end_chunk) selected_months = ','.join([str(add_months(startdate, i, 'standard').month) for i in leadtimes]) - select_months = '-selmonth,{0} {1}'.format(selected_months, aggregation_path) + select_months = '-selmonth,{0} {1}'.format(selected_months, thredds_subset.get_subset_url()) selected_years = ','.join([str(add_months(startdate, i, 'standard').year) for i in leadtimes]) + + temp = TempFile.get() Utils.cdo.selyear(selected_years, input=select_months, output=temp) return temp @@ -68,43 +70,14 @@ class THREDDSManager(DataManager): :return: path to the copy created on the scratch folder :rtype: str """ - if not frequency: - frequency = self.config.frequency aggregation_path = self.get_var_url(var, startdate, frequency, box, vartype) - temp = TempFile.get() start_chunk = chunk_start_date(parse_date(startdate), chunk, self.experiment.chunk_size, 'month', 'standard') end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', 'standard') - handler = Utils.openCdf(aggregation_path) - times = Utils.get_datetime_from_netcdf(handler) - lat_size = handler.dimensions['latitude'].size -1 - lon_size = handler.dimensions['longitude'].size -1 - handler.close() - time_start = 0 - while time_start < times.size and times[time_start] < start_chunk: - time_start += 1 - if time_start == times.size: - raise Exception('Timesteps not available for chunk {0}'.format(chunk)) - - time_end = time_start - - if times[time_end] >= end_chunk: - raise Exception('Timesteps not available for chunk {0}'.format(chunk)) - - while time_end < times.size-1 and times[time_end+1] < end_chunk: - time_end += 1 + thredds_subset = THREDDSSubset(aggregation_path, var, start_chunk, end_chunk) + return thredds_subset.download() - slice_path = '{0}?time[{1}:1:{2}],latitude[0:1:{3}],longitude[0:1:{4}],' \ - '{5}[{1}:1:{2}][0:1:{3}][0:1:{4}]'.format(aggregation_path, time_start, time_end, - lat_size, lon_size, var) - Log.debug(slice_path) - - Utils.execute_shell_command(['nccopy', slice_path, temp]) - - if not Utils.check_netcdf_file(temp): - raise THREDDSError('Can not retrieve {0} from server'.format(aggregation_path)) - return temp def send_file(self, filetosend, domain, var, startdate, member, chunk=None, grid=None, region=None, box=None, rename_var=None, frequency=None, year=None, date_str=None, move_old=False, @@ -216,7 +189,7 @@ class THREDDSManager(DataManager): var_folder) return folder_path - def get_year(self, domain, var, startdate, member, year, grid=None, box=None): + def get_year(self, domain, var, startdate, member, year, grid=None, box=None, vartype=VarType.MEAN): """ Ge a file containing all the data for one year for one variable :param domain: variable's domain @@ -235,8 +208,14 @@ class THREDDSManager(DataManager): :type box: Box :return: """ + aggregation_path = self.get_var_url(var, startdate, None, box, vartype) + thredds_subset = THREDDSSubset(aggregation_path, var, datetime(year, 1, 1), datetime(year+1, 1, 1)) + return thredds_subset.download() + def get_var_url(self, var, startdate, frequency, box, vartype): + if not frequency: + frequency = self.config.frequency var = self._get_final_var_name(box, var) full_path = os.path.join(self.server_url, 'dodsC', self.config.data_type, self.experiment.institute, self.experiment.model, self.frequency_folder_name(frequency, vartype)) @@ -287,3 +266,75 @@ class THREDDSManager(DataManager): class THREDDSError(Exception): pass + +class THREDDSSubset: + def __init__(self, thredds_path, var, start_time, end_time): + self.thredds_path = thredds_path + self.var = var + self.dimension_indexes = {} + self.handler = None + self.start_time = start_time + self.end_time = end_time + + def get_url(self): + self.handler = Utils.openCdf(self.thredds_path) + self._read_metadata() + self.handler.close() + + self._get_time_indexes() + + return self._get_subset_url() + + def download(self): + url = self.get_url() + return self._download_url(url) + + def _read_metadata(self): + self.var_dimensions = self.handler.variables[self.var].dimensions + for dimension in self.var_dimensions: + if dimension == 'time': + continue + self.dimension_indexes[dimension] = (0, self.handler.dimensions[dimension].size - 1) + + if 'time' in self.var_dimensions: + self.times = Utils.get_datetime_from_netcdf(self.handler) + + def _get_time_indexes(self): + if 'time' not in self.var_dimensions: + return + + time_start = 0 + while time_start < self.times.size and self.times[time_start] < self.start_time: + time_start += 1 + if time_start == self.times.size: + raise Exception('Timesteps not available for interval {0}-{1}'.format(self.start_time, self.end_time)) + time_end = time_start + if self.times[time_end] >= self.end_time: + raise Exception('Timesteps not available for interval {0}-{1}'.format(self.start_time, self.end_time)) + while time_end < self.times.size - 1 and self.times[time_end + 1] < self.end_time: + time_end += 1 + self.dimension_indexes['time'] = (time_start, time_end) + + def _download_url(self, url): + temp = TempFile.get() + Utils.execute_shell_command(['nccopy', url, temp]) + if not Utils.check_netcdf_file(temp): + raise THREDDSError('Can not retrieve {0} from server'.format(url)) + return temp + + def _get_subset_url(self): + var_slice = self.var + dimensions_slice = '' + + for dimension in self.var_dimensions: + slice_index = self._get_slice_index(self.dimension_indexes[dimension]) + var_slice += slice_index + dimensions_slice += '{0}{1},'.format(dimension, slice_index) + + return '{0}?{1}{2}'.format(self.thredds_path, dimensions_slice, var_slice) + + def _get_slice_index(self, index_tuple): + return '[{0[0]}:1:{0[1]}]'.format(index_tuple) + + + -- GitLab From 8214f7217d8190701a6bc4500f4d8537b50ffb29 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 24 Nov 2016 17:13:52 +0100 Subject: [PATCH 6/8] Corrected climatological percentil to be able to work with recon data --- diags.conf | 6 ++--- .../statistics/climatologicalpercentile.py | 22 +++++++++---------- earthdiagnostics/threddsmanager.py | 22 +++++++++---------- 3 files changed, 24 insertions(+), 26 deletions(-) diff --git a/diags.conf b/diags.conf index a49265cc..13d7fa7b 100644 --- a/diags.conf +++ b/diags.conf @@ -12,7 +12,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnpostics just to CMORize, leave it # empty -DIAGS = monpercent,atmos,sfcWind,66 monpercent,atmos,sfcWind,33 +DIAGS = climpercent,atmos,sfcWind,1 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. @@ -60,7 +60,7 @@ ATMOS_MONTHLY_VARS = 167, 201, 202, 165, 166, 151, 144, 228, 205, 182, 164, 146, # SOURCE = 'EC-Earthv2.3.0, ocean: Nemo3.1, ifs31r1, lim2 [THREDDS] -SERVER_URL = http://earth.bsc.es/thredds +SERVER_URL = https://earth.bsc.es/thredds [EXPERIMENT] # Experiments parameters as defined in CMOR standard @@ -86,7 +86,7 @@ STARTDATES = 19790101 MEMBERS = 0 MEMBER_DIGITS = 1 CHUNK_SIZE = 1 -CHUNKS = 10 +CHUNKS = 36 # CHUNKS = 1 diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index 1e52049c..60fd7568 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -98,16 +98,16 @@ class ClimatologicalPercentile(Diagnostic): percentile_var = handler.createVariable('percentile', float, ('percentile',)) percentile_var[:] = self.percentiles - handler.createDimension('lat', self.lat_len) + handler.createDimension('lat', self.lat.size) lat_var = handler.createVariable('lat', float, ('lat',)) lat_var[:] = self.lat - handler.createDimension('lon', self.lon_len) + handler.createDimension('lon', self.lon.size) lon_var = handler.createVariable('lon', float, ('lon',)) lon_var[:] = self.lon - p75_var = handler.createVariable('percent', float, ('percentile', 'lat', 'lon')) - p75_var[...] = percentile_values + percentile_var = handler.createVariable('percent', float, ('percentile', 'lat', 'lon')) + percentile_var[...] = percentile_values handler.close() @@ -117,14 +117,14 @@ class ClimatologicalPercentile(Diagnostic): def _calculate_percentiles(self, distribution): Log.debug('Calculating percentiles') - def calculate_percentiles(point_distribution): + def calculate(point_distribution): cs = np.cumsum(point_distribution) total = cs[-1] percentile_values = self.percentiles * total index = np.searchsorted(cs, percentile_values) return [(self._bins[i + 1] + self._bins[i]) / 2 for i in index] - distribution = np.apply_along_axis(calculate_percentiles, 0, distribution) + distribution = np.apply_along_axis(calculate, 0, distribution) return distribution def _get_distribution(self, member_files): @@ -189,18 +189,16 @@ class ClimatologicalPercentile(Diagnostic): return histogram var = handler.variables[self.variable] - return np.apply_along_axis(calculate_histogram, 0, var[:, realization, ...]) + if 'realization' in var.dimensions or 'ensemble' in var.dimensions: + return np.apply_along_axis(calculate_histogram, 0, var[:, realization, ...]) + else: + return np.apply_along_axis(calculate_histogram, 0, var[:]) def _get_var_size(self, handler): if self.lat_len is not None: return - self.lat = handler.variables['latitude'][:] self.lon = handler.variables['longitude'][:] - self.lat = handler.dimensions['latitude'].size - self.lon = handler.dimensions['longitude'].size - - diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 4b5a1fad..0a36e02b 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -34,16 +34,19 @@ class THREDDSManager(DataManager): aggregation_path = self.get_var_url(variable, startdate, frequency, None, vartype) startdate = parse_date(startdate) - start_chunk = chunk_start_date(startdate, self.experiment.chunks, self.experiment.chunk_size, 'month', 'standard') + start_chunk = chunk_start_date(startdate, self.experiment.num_chunks, self.experiment.chunk_size, + 'month', 'standard') end_chunk = chunk_end_date(start_chunk, self.experiment.chunk_size, 'month', 'standard') - thredds_subset = THREDDSSubset(aggregation_path, variable, startdate, end_chunk) + thredds_subset = THREDDSSubset(aggregation_path, variable, startdate, end_chunk).get_url() selected_months = ','.join([str(add_months(startdate, i, 'standard').month) for i in leadtimes]) - select_months = '-selmonth,{0} {1}'.format(selected_months, thredds_subset.get_subset_url()) - selected_years = ','.join([str(add_months(startdate, i, 'standard').year) for i in leadtimes]) - temp = TempFile.get() - Utils.cdo.selyear(selected_years, input=select_months, output=temp) + if self.config.data_type == 'exp': + select_months = '-selmonth,{0} {1}'.format(selected_months, thredds_subset) + selected_years = ','.join([str(add_months(startdate, i, 'standard').year) for i in leadtimes]) + Utils.cdo.selyear(selected_years, input=select_months, output=temp) + else: + Utils.cdo.selmonth(selected_months, input=thredds_subset, output=temp) return temp def get_file(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, @@ -135,10 +138,7 @@ class THREDDSManager(DataManager): if not frequency: frequency = self.config.frequency - start_chunk = chunk_start_date(parse_date(startdate), chunk, self.experiment.chunk_size, 'month', - 'standard') - - filepath = self.get_file_path(date2str(start_chunk)[0:6], domain, var, frequency, vartype, box, grid) + filepath = self.get_file_path(startdate, domain, var, frequency, vartype, box, grid) netcdf_file = NetCDFFile(filepath, filetosend, domain, var, cmor_var) if diagnostic: netcdf_file.add_diagnostic_history(diagnostic) @@ -227,7 +227,7 @@ class THREDDSManager(DataManager): def _get_file_name(self, startdate, var): if startdate: - return '{0}_{1}.nc'.format(var, startdate) + return '{0}_{1}.nc'.format(var, startdate[0:6]) else: return '{0}.nc'.format(var) -- GitLab From 52b4cf0011f38a09ce3e2633cebb44242ff673f6 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 25 Nov 2016 15:53:31 +0100 Subject: [PATCH 7/8] Added fix for ensemble dimension (whose variable is realization instead of ensemble) --- diags.conf | 12 ++++++------ earthdiagnostics/threddsmanager.py | 6 +++++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/diags.conf b/diags.conf index 13d7fa7b..582b693d 100644 --- a/diags.conf +++ b/diags.conf @@ -5,14 +5,14 @@ DATA_ADAPTOR = THREDDS SCRATCH_DIR = /scratch/Earth/$USER # Root path for the cmorized data to use DATA_DIR = /esnas:/esarchive -DATA_TYPE = recon +DATA_TYPE = exp # Path to NEMO's mask and grid files needed for CDFTools CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnpostics just to CMORize, leave it # empty -DIAGS = climpercent,atmos,sfcWind,1 +DIAGS = monpercent,atmos,tas,66 monpercent,atmos,tas,33 climpercent,atmos,sfcwind,1 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. @@ -65,7 +65,7 @@ SERVER_URL = https://earth.bsc.es/thredds [EXPERIMENT] # Experiments parameters as defined in CMOR standard INSTITUTE = ecmwf -MODEL = erainterim +MODEL = system4_m1 # Model version: Available versions MODEL_VERSION =Ec2.3_O1L46 # Atmospheric output timestep in hours @@ -82,11 +82,11 @@ OCEAN_TIMESTEP = 6 # CHUNK_SIZE is the size of each data file, given in months # CHUNKS is the number of chunks. You can specify less chunks than present on the experiment EXPID = resilience -STARTDATES = 19790101 +STARTDATES = 19810101 MEMBERS = 0 MEMBER_DIGITS = 1 -CHUNK_SIZE = 1 -CHUNKS = 36 +CHUNK_SIZE = 7 +CHUNKS = 1 # CHUNKS = 1 diff --git a/earthdiagnostics/threddsmanager.py b/earthdiagnostics/threddsmanager.py index 0a36e02b..ae8acb00 100644 --- a/earthdiagnostics/threddsmanager.py +++ b/earthdiagnostics/threddsmanager.py @@ -227,7 +227,9 @@ class THREDDSManager(DataManager): def _get_file_name(self, startdate, var): if startdate: - return '{0}_{1}.nc'.format(var, startdate[0:6]) + if self.config.data_type != 'exp': + startdate = startdate[0:6] + return '{0}_{1}.nc'.format(var, startdate) else: return '{0}.nc'.format(var) @@ -329,6 +331,8 @@ class THREDDSSubset: for dimension in self.var_dimensions: slice_index = self._get_slice_index(self.dimension_indexes[dimension]) var_slice += slice_index + if dimension == 'ensemble': + dimension = 'realization' dimensions_slice += '{0}{1},'.format(dimension, slice_index) return '{0}?{1}{2}'.format(self.thredds_path, dimensions_slice, var_slice) -- GitLab From acaa4db6bb1e180693abe7cc9df8e14eef9a4c84 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 25 Nov 2016 17:20:44 +0100 Subject: [PATCH 8/8] Updated doc for diags and minor improvement on climpercent --- diags.conf | 3 ++- .../statistics/climatologicalpercentile.py | 14 +++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/diags.conf b/diags.conf index 582b693d..0c17c47d 100644 --- a/diags.conf +++ b/diags.conf @@ -5,6 +5,7 @@ DATA_ADAPTOR = THREDDS SCRATCH_DIR = /scratch/Earth/$USER # Root path for the cmorized data to use DATA_DIR = /esnas:/esarchive +# Specify if your data is from an experiment (exp), observation (obs) or reconstructions (recon) DATA_TYPE = exp # Path to NEMO's mask and grid files needed for CDFTools @@ -12,7 +13,7 @@ CON_FILES = /esnas/autosubmit/con_files/ # Diagnostics to run, space separated. You must provide for each one the name and the parameters (comma separated) or # an alias defined in the ALIAS section (see more below). If you are using the diagnpostics just to CMORize, leave it # empty -DIAGS = monpercent,atmos,tas,66 monpercent,atmos,tas,33 climpercent,atmos,sfcwind,1 +DIAGS = climpercent,atmos,sfcWind,1 # DIAGS = OHC # Frequency of the data you want to use by default. Some diagnostics do not use this value: i.e. monmean always stores # its results at monthly frequency (obvious) and has a parameter to specify input's frequency. diff --git a/earthdiagnostics/statistics/climatologicalpercentile.py b/earthdiagnostics/statistics/climatologicalpercentile.py index 60fd7568..5df47419 100644 --- a/earthdiagnostics/statistics/climatologicalpercentile.py +++ b/earthdiagnostics/statistics/climatologicalpercentile.py @@ -22,7 +22,7 @@ class ClimatologicalPercentile(Diagnostic): alias = 'climpercent' "Diagnostic alias for the configuration file" - def __init__(self, data_manager, domain, variable, leadtimes, experiment_config): + def __init__(self, data_manager, domain, variable, leadtimes, num_bins, experiment_config): Diagnostic.__init__(self, data_manager) self.variable = variable self.domain = domain @@ -32,7 +32,7 @@ class ClimatologicalPercentile(Diagnostic): self.realizations = None self.lat_len = None self.lon_len = None - self.num_bins = 2000 + self.num_bins = num_bins self._bins = None self.percentiles = np.array([0.1, 0.25, 0.33, 0.5, 0.66, 0.75, 0.9]) self.cmor_var = Variable.get_variable(variable, silent=True) @@ -67,14 +67,18 @@ class ClimatologicalPercentile(Diagnostic): if num_options < 3: raise Exception('You must specify the variable (and its domain) and the leadtimes you want to compute ' 'the percentiles on') - if num_options > 3: - raise Exception('You must specify three parameters for the climatological percentiles') + if num_options > 4: + raise Exception('You must specify between three and 4 parameters for the climatological percentiles') domain = Domain(options[1]) variable = options[2] leadtimes = [int(i) for i in options[3].split('-')] + if num_options > 3: + num_bins = int(options[4]) + else: + num_bins = 2000 job_list = list() - job_list.append(ClimatologicalPercentile(diags.data_manager, domain, variable, leadtimes, + job_list.append(ClimatologicalPercentile(diags.data_manager, domain, variable, leadtimes, num_bins, diags.config.experiment)) return job_list -- GitLab