From c3cd27d8dce51eb82f2a0990390177ffad0abb7a Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Thu, 19 Sep 2019 15:32:13 +0200 Subject: [PATCH 1/2] Fix DCPP support --- earthdiagnostics/frequency.py | 63 ++++++++++++++++----------- earthdiagnostics/variable.py | 81 +++++++++++++++++++++++------------ 2 files changed, 93 insertions(+), 51 deletions(-) diff --git a/earthdiagnostics/frequency.py b/earthdiagnostics/frequency.py index 5cd15f48..25915546 100644 --- a/earthdiagnostics/frequency.py +++ b/earthdiagnostics/frequency.py @@ -5,23 +5,31 @@ class Frequency(object): """Time frequency""" - _recognized = {'f': 'fx', 'fx': 'fx', 'fixed': 'fx', - 'c': 'clim', 'clim': 'clim', 'climatology': 'clim', 'monclim': 'clim', '1hrclimmon': 'clim', - 'dec': 'dec', 'decadal': 'dec', - 'y': 'year', 'yr': 'year', 'year': 'year', 'yearly': 'year', - 'm': 'mon', '1m': 'mon', 'mon': 'mon', 'monthly': 'mon', 'mm': 'mon', - 'w': 'week', '1w': 'week', 'week': 'week', 'weekly': 'week', - 'd': 'day', '1d': 'day', 'daily': 'day', 'day': 'day', - '15': '15hr', '15h': '15hr', '15hr': '15hr', '15_hourly': '15hr', '15hourly': '15hr', - '15 hourly': '15hr', - '6': '6hr', '6h': '6hr', '6hr': '6hr', '6_hourly': '6hr', '6hourly': '6hr', '6 hourly': '6hr', - '3': '3hr', '3h': '3hr', '3hr': '3hr', '3_hourly': '3hr', '3hourly': '3hr', '3 hourly': '3hr', - '1': '1hr', 'hr': '1hr', '1h': '1hr', 'hourly': '1hr', '1hr': '1hr', '1 hourly': '1hr', - '450mn': '450mn', - 'subhr': 'subhr'} + _recognized = { + 'f': 'fx', 'fx': 'fx', 'fixed': 'fx', + 'c': 'clim', 'clim': 'clim', 'climatology': 'clim', 'monclim': 'clim', '1hrclimmon': 'clim', + 'monc': 'clim', + 'dec': 'dec', 'decadal': 'dec', + 'y': 'year', 'yr': 'year', 'year': 'year', 'yearly': 'year', + 'm': 'mon', '1m': 'mon', 'mon': 'mon', 'monthly': 'mon', 'mm': 'mon', + 'w': 'week', '1w': 'week', 'week': 'week', 'weekly': 'week', + 'd': 'day', '1d': 'day', 'daily': 'day', 'day': 'day', + '15': '15hr', '15h': '15hr', '15hr': '15hr', '15_hourly': '15hr', '15hourly': '15hr', + '15 hourly': '15hr', + '6': '6hr', '6h': '6hr', '6hr': '6hr', '6_hourly': '6hr', '6hourly': '6hr', '6 hourly': '6hr', + '3': '3hr', '3h': '3hr', '3hr': '3hr', '3_hourly': '3hr', '3hourly': '3hr', '3 hourly': '3hr', + '1': '1hr', 'hr': '1hr', '1h': '1hr', 'hourly': '1hr', '1hr': '1hr', '1 hourly': '1hr', + '450mn': '450mn', + 'subhr': 'subhr' + } def __init__(self, freq): freq = freq.lower() + if freq.endswith('cm'): + freq = freq[:-2] + self.point = freq.endswith('pt') + if self.point: + freq = freq[:-2] try: self.frequency = Frequency._recognized[freq] except KeyError: @@ -49,18 +57,24 @@ class Frequency(object): str """ from earthdiagnostics.variable import VariableType - if self == Frequencies.daily: - freq_str = 'daily_{0}'.format(VariableType.to_str(vartype)) + if self == Frequencies.climatology: + return 'clim' + elif self == Frequencies.fixed: + return 'fx' + elif self == Frequencies.decadal: + freq_str = 'decadal' + elif self == Frequencies.yearly: + freq_str = 'yearly' + elif self == Frequencies.monthly: + freq_str = 'monthly' elif self == Frequencies.weekly: - freq_str = 'weekly_{0}'.format(VariableType.to_str(vartype)) - elif self == Frequencies.climatology: - freq_str = 'clim' - elif self in (Frequencies.three_hourly, Frequencies.six_hourly, Frequencies.hourly): + freq_str = 'weekly' + elif self == Frequencies.daily: + freq_str = 'daily' + elif self.frequency.endswith('hr'): freq_str = self.frequency[:-2] + 'hourly' - if vartype != VariableType.MEAN: - freq_str = '{0}_{1}'.format(freq_str, VariableType.to_str(vartype)) - else: - freq_str = 'monthly_{0}'.format(VariableType.to_str(vartype)) + if vartype != VariableType.MEAN and not self.point: + freq_str = '{0}_{1}'.format(freq_str, VariableType.to_str(vartype)) return freq_str @staticmethod @@ -88,6 +102,7 @@ class Frequencies(object): fixed = Frequency('fx') climatology = Frequency('clim') + decadal = Frequency('dec') yearly = Frequency('year') monthly = Frequency('mon') weekly = Frequency('week') diff --git a/earthdiagnostics/variable.py b/earthdiagnostics/variable.py index d6fea362..bb1ccb42 100644 --- a/earthdiagnostics/variable.py +++ b/earthdiagnostics/variable.py @@ -4,6 +4,7 @@ import csv import glob import json import os +from collections import Counter import openpyxl from bscearth.utils.log import Log @@ -24,8 +25,10 @@ class VariableManager(object): """Class for translating variable alias into standard names and provide the correct description for them""" def __init__(self): - self._cmor_tables_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'cmor_tables') - self._aliases_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'variable_alias') + self._cmor_tables_folder = os.path.join( + os.path.dirname(os.path.realpath(__file__)), 'cmor_tables') + self._aliases_folder = os.path.join(os.path.dirname( + os.path.realpath(__file__)), 'variable_alias') self.clean() def clean(self): @@ -50,7 +53,8 @@ class VariableManager(object): return self._dict_aliases[original_name.lower()][1] except KeyError: if not silent: - Log.warning('Variable {0} is not defined in the CMOR table. Please add it'.format(original_name)) + Log.warning('Variable {0} is not defined in the CMOR table. Please add it'.format( + original_name)) return None def get_all_variables(self): @@ -78,7 +82,8 @@ class VariableManager(object): return self._dict_aliases[original_name.lower()] except KeyError: if not silent: - Log.warning('Variable {0} is not defined in the CMOR table. Please add it'.format(original_name)) + Log.warning('Variable {0} is not defined in the CMOR table. Please add it'.format( + original_name)) return None, None def load_variables(self, table_name): @@ -117,11 +122,13 @@ class VariableManager(object): raise Exception('Data convention {0} unknown'.format(self.table_name)) def _get_csv_path(self, table_name): - csv_table_path = os.path.join(self._cmor_tables_folder, '{0}.csv'.format(table_name)) + csv_table_path = os.path.join( + self._cmor_tables_folder, '{0}.csv'.format(table_name)) return csv_table_path def _get_json_folder(self): - json_folder = os.path.join(self._cmor_tables_folder, '{0}/Tables'.format(self.table_name)) + json_folder = os.path.join( + self._cmor_tables_folder, '{0}/Tables'.format(self.table_name)) return json_folder def _load_file(self, csv_table_path, default=False): @@ -151,13 +158,13 @@ class VariableManager(object): self._dict_variables[var.short_name.lower()] = var def _load_json(self, json_folder): - executor = ThreadPoolExecutor() + # executor = ThreadPoolExecutor(max_workers=1) for file_name in os.listdir(json_folder): if file_name in ('CMIP6_grids.json', 'CMIP6_formula_terms.json'): continue - - executor.submit(self._load_json_file, os.path.join(json_folder, file_name)) - executor.shutdown(True) + self._load_json_file(os.path.join(json_folder, file_name)) + # executor.submit(self._load_json_file, os.path.join(json_folder, file_name)) + # executor.shutdown(True) def _load_json_file(self, json_path): with open(json_path) as json_file: @@ -169,12 +176,20 @@ class VariableManager(object): if 'variable_entry' in data: Log.debug('Parsing file {0}'.format(json_path)) table_id = data['Header']['table_id'][6:] - table = CMORTable(table_id, - Frequency(data['variable_entry'].values()[0]['frequency']), - data['Header']['table_date'], - ModelingRealms.parse(data['Header']['realm'])) + + var_freqs = (var['frequency'] + for var in data['variable_entry'].values()) + table_freq, _ = Counter(var_freqs).most_common(1)[0] + table = CMORTable( + table_id, + Frequency(table_freq), + data['Header']['table_date'], + ModelingRealms.parse(data['Header']['realm'].split(' ')[0]) + ) self.tables[table_id] = table self._load_json_variables(data['variable_entry'], table) + else: + Log.debug('Skipping file {0}'.format(json_path)) def _load_json_variables(self, json_data, table): for short_name in json_data.keys(): @@ -182,7 +197,7 @@ class VariableManager(object): pass short_name = str.strip(str(short_name)) if short_name.lower() in self._dict_variables: - self._dict_variables[short_name.lower()].tables.append(table) + self._dict_variables[short_name.lower()].add_table(table) continue variable = Variable() try: @@ -218,13 +233,15 @@ class VariableManager(object): cmor_vars.append(self._dict_variables[alias]) if len(cmor_vars) == 0: - Log.warning('Aliases {0} could not be mapped to any variable'.format(aliases)) + Log.warning( + 'Aliases {0} could not be mapped to any variable'.format(aliases)) continue elif len(cmor_vars) > 1: non_default = [var for var in cmor_vars if not var.default] if len(non_default) == 1: for default in [var for var in cmor_vars if var not in non_default]: - del self._dict_variables[default.short_name.lower()] + del self._dict_variables[default.short_name.lower( + )] cmor_vars = non_default else: @@ -254,7 +271,8 @@ class VariableManager(object): cmor_var.known_aliases.append(alias_object) def _get_aliases_csv_path(self, filename): - csv_table_path = os.path.join(self._aliases_folder, '{0}.csv'.format(filename)) + csv_table_path = os.path.join( + self._aliases_folder, '{0}.csv'.format(filename)) return csv_table_path def create_aliases_dict(self): @@ -271,11 +289,13 @@ class VariableManager(object): self._dict_aliases[alias.alias] = (alias, cmor_var) def _get_xlsx_path(self): - xlsx_table_path = os.path.join(self._cmor_tables_folder, '{0}.xlsx'.format(self.table_name)) + xlsx_table_path = os.path.join( + self._cmor_tables_folder, '{0}.xlsx'.format(self.table_name)) if os.path.isfile(xlsx_table_path): return xlsx_table_path - xlsx_table_path = os.path.join(self._cmor_tables_folder, self.table_name, 'etc', '*.xlsx') + xlsx_table_path = os.path.join( + self._cmor_tables_folder, self.table_name, 'etc', '*.xlsx') xlsx_table_path = glob.glob(xlsx_table_path) if len(xlsx_table_path) == 1: return xlsx_table_path[0] @@ -288,7 +308,8 @@ class VariableManager(object): data_sheet = excel.worksheets[0] for row in data_sheet.rows: if row[1].value in excel.sheetnames: - table_data[row[1].value] = (Frequency(row[2].value), 'Date missing') + table_data[row[1].value] = ( + Frequency(row[2].value), 'Date missing') for sheet_name in excel.sheetnames: sheet = excel[sheet_name] if sheet.title == 'Primday': @@ -314,7 +335,8 @@ class VariableManager(object): def _read_realm_from_json(self, table_name): for prefix in ('CMIP6', 'PRIMAVERA'): - json_path = os.path.join(self._get_json_folder(), '{0}_{1}.json'.format(prefix, table_name)) + json_path = os.path.join(self._get_json_folder( + ), '{0}_{1}.json'.format(prefix, table_name)) if os.path.isfile(json_path): with open(json_path) as json_file: json_data = json_file.read() @@ -412,7 +434,8 @@ class Variable(object): if 'out_name' in json_var: self.short_name = json_var['out_name'].strip() else: - raise VariableJsonException('Variable {0} has no out name defined'.format(variable)) + raise VariableJsonException( + 'Variable {0} has no out name defined'.format(variable)) self.standard_name = json_var['standard_name'].strip() self.long_name = json_var['long_name'].strip() @@ -443,14 +466,16 @@ class Variable(object): """ if len(domains) > 1: - Log.warning('Multiple modeling realms assigned to variable {0}: {1}. ', self, domains) + Log.warning( + 'Multiple modeling realms assigned to variable {0}: {1}. ', self, domains) parsed = [] for domain in domains: parsed.append(ModelingRealms.parse(domain)) selected = self._select_most_specific(parsed) if selected: - Log.warning('We will use {0} as it is the most specific', selected) + Log.warning( + 'We will use {0} as it is the most specific', selected) return selected Log.warning('We will use {0} as it is the first on the list and there is no one that is more specific', @@ -458,7 +483,8 @@ class Variable(object): return parsed[0] elif len(domains) == 0: - Log.warning('Variable {0} has no modeling realm defined'.format(self.short_name)) + Log.warning( + 'Variable {0} has no modeling realm defined'.format(self.short_name)) return None else: return ModelingRealms.parse(domains[0]) @@ -516,7 +542,8 @@ class Variable(object): if self.domain: table_name = self.domain.get_table_name(frequency, data_convention) return CMORTable(table_name, frequency, 'December 2013', self.domain) - raise ValueError('Can not get table for {0} and frequency {1}'.format(self, frequency)) + raise ValueError( + 'Can not get table for {0} and frequency {1}'.format(self, frequency)) @staticmethod def _select_most_specific(parsed): -- GitLab From 89329234a5fa744990c702bbfba5ce1f626c9b9b Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Fri, 20 Sep 2019 15:51:50 +0200 Subject: [PATCH 2/2] Fix test --- earthdiagnostics/frequency.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/earthdiagnostics/frequency.py b/earthdiagnostics/frequency.py index 25915546..45391649 100644 --- a/earthdiagnostics/frequency.py +++ b/earthdiagnostics/frequency.py @@ -73,7 +73,8 @@ class Frequency(object): freq_str = 'daily' elif self.frequency.endswith('hr'): freq_str = self.frequency[:-2] + 'hourly' - if vartype != VariableType.MEAN and not self.point: + self.point = True + if not self.point or vartype != VariableType.MEAN: freq_str = '{0}_{1}'.format(freq_str, VariableType.to_str(vartype)) return freq_str -- GitLab