Commit 198c0bca authored by Javier Vegas-Regidor's avatar Javier Vegas-Regidor
Browse files

Merge branch 'master' into devel_better_data_management

# Conflicts:
#	VERSION
#	diags.conf
#	earthdiagnostics/cdftools.py
#	earthdiagnostics/cmormanager.py
#	earthdiagnostics/config.py
#	earthdiagnostics/datamanager.py
#	earthdiagnostics/diagnostic.py
#	earthdiagnostics/earthdiags.py
#	earthdiagnostics/general/module.py
#	earthdiagnostics/general/select_levels.py
#	earthdiagnostics/general/simplify_dimensions.py
#	earthdiagnostics/ocean/heatcontentlayer.py
#	earthdiagnostics/ocean/interpolatecdo.py
#	earthdiagnostics/ocean/mask_land.py
#	earthdiagnostics/utils.py
#	test/unit/__init__.py
#	test/unit/test_heatcontent.py
parents 74c42c69 49f40187
......@@ -64,7 +64,7 @@ copyright = u'2016, BSC-CNS Earth Sciences Department'
# The short X.Y version.
version = '3.0b'
# The full version, including alpha/beta/rc tags.
release = '3.0.0b54'
release = '3.0.0b55'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
......
......@@ -2,6 +2,7 @@
from earthdiagnostics.utils import Utils
import os
from bscearth.utils.log import Log
import six
class CDFTools(object):
......@@ -20,7 +21,6 @@ class CDFTools(object):
"""
Runs one of the CDFTools
:param input_option:
:param command: executable to run
:type command: str | iterable
:param input: input file
......@@ -31,6 +31,8 @@ class CDFTools(object):
:type options: str | list[str] | Tuple[str]
:param log_level: log level at which the output of the cdftool command will be added
:type log_level: int
:param input_option: option to add before input file
:type input_option: str
"""
line = [os.path.join(self.path, command)]
......@@ -40,8 +42,7 @@ class CDFTools(object):
line.append(input_option)
self._check_input(command, input, line)
if options:
# noinspection PyTypeChecker
if isinstance(options, basestring):
if isinstance(options, six.string_types):
options = options.split()
for option in options:
line.append(str(option))
......@@ -66,8 +67,7 @@ class CDFTools(object):
@staticmethod
def _check_input(command, input, line):
if input:
# noinspection PyTypeChecker
if isinstance(input, basestring):
if isinstance(input, six.string_types):
line.append(input)
if not os.path.exists(input):
raise ValueError('Error executing {0}\n Input file {1} file does not exist', command, input)
......
Variable,Shortname,Name,Long name,Domain,Basin,Units,Valid min,Valid max,Grid,Tables
Subproject commit 10e46868e356ef3a217c38fe1e0b7d46f8d3158e
Subproject commit f25073770569ea73540d09a058637128db024c55
......@@ -272,7 +272,8 @@ class Cmorizer(object):
self._merge_and_cmorize_atmos(chunk_start, chunk_end, grid,
'{0}hr'.format(self.atmos_timestep))
def _unpack_grib(self, full_file, gribfile, grid):
@staticmethod
def _unpack_grib(full_file, gribfile, grid):
Log.info('Unpacking... ')
# remap on regular Gauss grid
if grid == 'SH':
......
......@@ -47,14 +47,26 @@ class CMORManager(DataManager):
raise Exception('Can not find model data')
self.cmor_path = os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles')
def file_exists(self, domain, var, startdate, member, chunk, grid=None, frequency=None):
def file_exists(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None,
vartype=VariableType.MEAN, possible_versions=None):
cmor_var = self.variable_list.get_variable(var)
filepath = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, frequency, grid, None, None)
# noinspection PyBroadException
try:
return os.path.isfile(filepath)
except Exception:
if possible_versions is None:
# noinspection PyBroadException
try:
return os.path.isfile(filepath)
except Exception:
return False
else:
for version in possible_versions:
# noinspection PyBroadException
try:
if os.path.isfile(filepath.replace(self.config.cmor.version, version)):
return True
except Exception:
pass
return False
def request_chunk(self, domain, var, startdate, member, chunk, grid=None, box=None, frequency=None, vartype=None):
......@@ -62,7 +74,7 @@ class CMORManager(DataManager):
Copies a given file from the CMOR repository to the scratch folder and returns the path to the scratch's copy
:param **kwargs:
:param domain: CMOR domain
:param domain: CMOR domain
:type domain: Domain
:param var: variable name
:type var: str
......@@ -223,11 +235,13 @@ class CMORManager(DataManager):
:type date_str: str|NoneType
:return: path to the file
:rtype: str|NoneType
:param cmor_var: variable instance describing the selected variable
:type cmor_var: Variable
"""
if not frequency:
frequency = self.config.frequency
folder_path = self._get_full_cmor_folder_path(startdate, member, domain, var, frequency, grid)
folder_path = self._get_full_cmor_folder_path(startdate, member, domain, var, frequency, grid, cmor_var)
file_name = self._get_cmor_file_name(startdate, member, domain, var, cmor_var, frequency,
chunk, year, date_str, grid)
......@@ -256,29 +270,41 @@ class CMORManager(DataManager):
else:
time_bound = '.nc'
if self.config.data_convention == 'specs':
if self.config.data_convention in ('specs', 'preface'):
file_name = '{0}_{1}_{2}_{3}_S{4}_{5}{6}'.format(var, cmor_table.name, self.experiment.model,
self.experiment.experiment_name, startdate,
self._get_member_str(member), time_bound)
elif self.config.data_convention in ('primavera', 'cmip6'):
if grid:
grid = '_{0}'.format(grid)
else:
grid = ''
file_name = '{0}_{1}_{2}_{3}_S{4}_{5}{6}{7}'.format(var, cmor_table.name, self.experiment.model,
self.experiment.experiment_name, startdate,
self._get_member_str(member), grid, time_bound)
if not grid:
if domain in [ModelingRealms.ocnBgchem, ModelingRealms.seaIce, ModelingRealms.ocean]:
grid = self.config.cmor.default_ocean_grid
else:
grid = self.config.cmor.default_atmos_grid
file_name = '{0}_{1}_{2}_{3}_{4}_{5}{6}'.format(var, cmor_table.name, self.experiment.experiment_name,
self.experiment.model, self._get_member_str(member),
grid, time_bound)
else:
raise Exception('Data convention {0} not supported'.format(self.config.data_convention))
return file_name
def _get_full_cmor_folder_path(self, startdate, member, domain, var, frequency, grid):
folder_path = os.path.join(self._get_startdate_path(startdate), str(frequency), domain.name, var)
if grid:
folder_path = os.path.join(folder_path, grid)
folder_path = os.path.join(folder_path, self._get_member_str(member))
def _get_full_cmor_folder_path(self, startdate, member, domain, var, frequency, grid, cmor_var):
if self.config.data_convention in ('specs', 'preface'):
folder_path = os.path.join(self._get_startdate_path(startdate), str(frequency), domain.name, var)
if grid:
folder_path = os.path.join(folder_path, grid)
folder_path = os.path.join(folder_path, self._get_member_str(member))
if self.config.cmor.version:
folder_path = os.path.join(folder_path, self.config.cmor.version)
else:
if not grid:
if domain in [ModelingRealms.ocnBgchem, ModelingRealms.seaIce, ModelingRealms.ocean]:
grid = self.config.cmor.default_ocean_grid
else:
grid = self.config.cmor.default_atmos_grid
folder_path = os.path.join(self._get_startdate_path(startdate), self._get_member_str(member),
cmor_var.get_table(frequency, self.config.data_convention).name, var,
grid, self.config.cmor.version)
return folder_path
def _get_chunk_time_bounds(self, startdate, chunk):
......@@ -286,8 +312,12 @@ class CMORManager(DataManager):
chunk_start = chunk_start_date(start, chunk, self.experiment.chunk_size, 'month', self.experiment.calendar)
chunk_end = chunk_end_date(chunk_start, self.experiment.chunk_size, 'month', self.experiment.calendar)
chunk_end = previous_day(chunk_end, self.experiment.calendar)
time_bound = "{0:04}{1:02}-{2:04}{3:02}".format(chunk_start.year, chunk_start.month, chunk_end.year,
chunk_end.month)
if self.config.data_convention == 'preface':
separator = '_'
else:
separator = '-'
time_bound = "{0:04}{1:02}{4}{2:04}{3:02}".format(chunk_start.year, chunk_start.month, chunk_end.year,
chunk_end.month, separator)
return time_bound
def link_file(self, domain, var, cmor_var, startdate, member, chunk=None, grid=None,
......@@ -319,6 +349,8 @@ class CMORManager(DataManager):
:type vartype: VariableType
:return: path to the copy created on the scratch folder
:rtype: str
:param cmor_var: variable instance describing the selected variable
:type cmor_var: Variable
"""
if not frequency:
......@@ -345,26 +377,44 @@ class CMORManager(DataManager):
if not self._unpack_cmor_files(startdate, member):
self._cmorize_member(startdate, member)
def is_cmorized(self, startdate, member, chunk, domains):
for domain in domains:
identifier = (startdate, member, chunk, domain.name)
if identifier not in self._dic_cmorized:
self._dic_cmorized[identifier] = self._is_cmorized(startdate, member, chunk, domain)
if self._dic_cmorized[identifier]:
return True
def is_cmorized(self, startdate, member, chunk):
identifier = (startdate, member, chunk)
if identifier not in self._dic_cmorized:
self._dic_cmorized[identifier] = self._is_cmorized(startdate, member, chunk)
if self._dic_cmorized[identifier]:
return True
return False
def _is_cmorized(self, startdate, member, chunk, domain):
def _is_cmorized(self, startdate, member, chunk):
startdate_path = self._get_startdate_path(startdate)
if not os.path.isdir(startdate_path):
return False
for freq in os.listdir(startdate_path):
domain_path = os.path.join(startdate_path, freq,
domain.name)
if os.path.isdir(domain_path):
for var in os.listdir(domain_path):
if self.config.data_convention == 'specs':
for freq in os.listdir(startdate_path):
for domain in (ModelingRealms.ocean, ModelingRealms.ocnBgchem, ModelingRealms.ocnBgchem,
ModelingRealms.atmos):
domain_path = os.path.join(startdate_path, freq,
domain.name)
if os.path.isdir(domain_path):
for var in os.listdir(domain_path):
cmor_var = self.variable_list.get_variable(var, True)
var_path = self.get_file_path(startdate, member, domain, var, cmor_var, chunk,
Frequency(freq))
if os.path.isfile(var_path):
return True
else:
member_path = os.path.join(startdate_path, self._get_member_str(member))
if not os.path.isdir(member_path):
return False
for table, domain, freq in (('Amon', ModelingRealms.atmos, Frequencies.monthly),
('Omon', ModelingRealms.ocean, Frequencies.monthly),
('SImon', ModelingRealms.seaIce, Frequencies.monthly)):
table_dir = os.path.join(member_path, table)
if not os.path.isdir(table_dir):
continue
for var in os.listdir(table_dir):
cmor_var = self.variable_list.get_variable(var, True)
var_path = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, Frequency(freq))
var_path = self.get_file_path(startdate, member, domain, var, cmor_var, chunk, frequency=freq)
if os.path.isfile(var_path):
return True
return False
......@@ -386,9 +436,7 @@ class CMORManager(DataManager):
cmorized = False
if not self.config.cmor.force_untar:
while self.is_cmorized(startdate, member, chunk,
(ModelingRealms.ocean, ModelingRealms.seaIce,
ModelingRealms.atmos, ModelingRealms.ocnBgchem)):
while self.is_cmorized(startdate, member, chunk):
chunk += 1
while self._unpack_chunk(startdate, member, chunk):
......@@ -507,18 +555,25 @@ class CMORManager(DataManager):
:return: path to the startdate's CMOR º
:rtype: str
"""
return os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles', self.experiment.institute,
self.experiment.model, self.experiment.experiment_name, 'S' + startdate)
if self.config.data_convention == 'specs':
return os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles', self.experiment.institute,
self.experiment.model, self.experiment.experiment_name, 'S' + startdate)
elif self.config.data_convention == 'preface':
return os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles', self.experiment.institute,
self.experiment.experiment_name, 'S' + startdate)
else:
return os.path.join(self.config.data_dir, self.experiment.expid, 'cmorfiles', self.config.cmor.activity,
self.experiment.institute, self.experiment.model, self.experiment.experiment_name)
def _get_member_str(self, member):
if self.config.data_convention == 'specs':
template = 'r{0}i1p1'
if self.config.data_convention in ('specs', 'preface'):
template = 'r{0}i{1}p1'
elif self.config.data_convention in ('primavera', 'cmip6'):
template = 'r{0}i1p1f1'
template = 'r{0}i{1}p1f1'
else:
raise Exception('Data convention {0} not supported'.format(self.config.data_convention))
return template.format(member + 1 - self.experiment.member_count_start)
return template.format(member + 1 - self.experiment.member_count_start, self.config.cmor.initialization_number)
class MergeYear(Diagnostic):
......
# coding=utf-8
import os
import six
from bscearth.utils.log import Log
from bscearth.utils.date import parse_date, chunk_start_date, chunk_end_date, date2str
from bscearth.utils.config_parser import ConfigParser
......@@ -53,9 +54,9 @@ class Config(object):
self.mask_regions_3d = parser.get_path_option('DIAGNOSTICS', 'MASK_REGIONS_3D', '')
"Custom mask regions 3D file to use"
self.data_convention = parser.get_choice_option('DIAGNOSTICS', 'DATA_CONVENTION',
('specs', 'primavera', 'cmip6'), 'specs', ignore_case=True)
('specs', 'primavera', 'cmip6', 'preface'), 'specs',
ignore_case=True)
VariableManager().load_variables(self.data_convention)
self._diags = parser.get_option('DIAGNOSTICS', 'DIAGS')
self.frequency = Frequency(parser.get_option('DIAGNOSTICS', 'FREQUENCY'))
......@@ -97,6 +98,7 @@ class Config(object):
self.cmor = CMORConfig(parser)
self.thredds = THREDDSConfig(parser)
self.report = ReportConfig(parser)
def get_commands(self):
"""
......@@ -121,9 +123,14 @@ class CMORConfig(object):
self.associated_model = parser.get_option('CMOR', 'ASSOCIATED_MODEL', 'to be filled')
self.initialization_description = parser.get_option('CMOR', 'INITIALIZATION_DESCRIPTION', 'to be filled')
self.initialization_method = parser.get_option('CMOR', 'INITIALIZATION_METHOD', '1')
self.initialization_number = parser.get_int_option('CMOR', 'INITIALIZATION_NUMBER', 1)
self.physics_description = parser.get_option('CMOR', 'PHYSICS_DESCRIPTION', 'to be filled')
self.physics_version = parser.get_option('CMOR', 'PHYSICS_VERSION', '1')
self.source = parser.get_option('CMOR', 'SOURCE', 'to be filled')
self.version = parser.get_option('CMOR', 'VERSION', '')
self.default_ocean_grid = parser.get_option('CMOR', 'DEFAULT_OCEAN_GRID', 'gn')
self.default_atmos_grid = parser.get_option('CMOR', 'DEFAULT_ATMOS_GRID', 'gr')
self.activity = parser.get_option('CMOR', 'ACTIVITY', 'CMIP')
vars_string = parser.get_option('CMOR', 'VARIABLE_LIST', '')
var_manager = VariableManager()
......@@ -317,7 +324,7 @@ class ExperimentConfig(object):
def get_chunk_start(self, startdate, chunk):
# noinspection PyTypeChecker
if isinstance(startdate, basestring):
if isinstance(startdate, six.string_types):
startdate = parse_date(startdate)
return chunk_start_date(startdate, chunk, self.chunk_size, 'month', self.calendar)
......@@ -365,3 +372,8 @@ class ExperimentConfig(object):
"""
return '{0}{1}'.format(self.member_prefix, str(member).zfill(self.member_digits))
class ReportConfig(object):
def __init__(self, parser):
self.maximum_priority = parser.get_int_option('REPORT', 'MAXIMUM_PRIORITY', 10)
self.path = parser.get_path_option('REPORT', 'PATH', '')
......@@ -119,7 +119,13 @@ class Basins(object):
}
self.Global = Basin('Global')
self.Atlantic = Basin('Atlantic_Ocean')
self.Pacific = Basin('Pacific_Ocean')
self.IndoPacific = Basin('Indo_Pacific_Ocean')
self.Indian = Basin('Indian_Ocean')
self._known_aliases = {}
self._add_alias('glob', self.Global)
self._add_alias(self.Global.name, self.Global)
def get_available_basins(self, handler):
"""
......@@ -143,8 +149,6 @@ class Basins(object):
except KeyError:
pass
self._add_alias('glob', self.Global)
def _add_alias(self, basin, basin_object):
self._known_aliases[basin.lower()] = basin_object
......
......@@ -66,7 +66,7 @@ class Diagnostic(Publisher):
"""
Register a new diagnostic using the given alias. It must be call using the derived class.
:param cls: diagnostic class to register
:type cls: T >= Diagnostic
:type cls: Type[Diagnostic]
"""
if not issubclass(cls, Diagnostic):
raise ValueError('Class {0} must be derived from Diagnostic'.format(cls))
......@@ -282,6 +282,16 @@ class DiagnosticIntOption(DiagnosticOption):
class DiagnosticListIntOption(DiagnosticOption):
"""
:param name:
:type name: str
:param default_value:
:type default_value: int|NoneType
:param min_limit:
:type min_limit: int|NoneType
:param max_limit:
:type max_limit: int|NoneType
"""
def __init__(self, name, default_value=None, min_limit=None, max_limit=None):
super(DiagnosticListIntOption, self).__init__(name, default_value)
......
......@@ -164,7 +164,8 @@ class EarthDiags(object):
self._read_basins_from_file('mask_regions.nc')
self._read_basins_from_file('mask_regions.3d.nc')
def _read_basins_from_file(self, filename):
@staticmethod
def _read_basins_from_file(filename):
if not os.path.isfile(filename):
return
handler = Utils.openCdf(filename)
......@@ -206,13 +207,18 @@ class EarthDiags(object):
def report(self):
Log.info('Looking for existing vars...')
self._prepare_data_manager()
base_folder = self.config.report.path
if not base_folder:
base_folder = self.config.scratch_dir
Utils.create_folder_tree(base_folder)
for startdate in self.config.experiment.startdates:
for member in self.config.experiment.members:
results = self._get_variable_report(startdate, member)
report_path = os.path.join(self.config.scratch_dir,
report_path = os.path.join(base_folder,
'{0}_{1}.report'.format(startdate,
self.config.experiment.get_member_str(member)))
Utils.create_folder_tree(self.config.scratch_dir)
self.create_report(report_path, results)
Log.result('Report finished')
......@@ -222,40 +228,41 @@ class EarthDiags(object):
var_manager = VariableManager()
results = list()
for var in var_manager.get_all_variables():
if var.priority is None or var.domain is None:
if var.domain is None:
continue
for table in var.tables:
for table, priority in var.tables:
if priority is None or priority > self.config.report.maximum_priority:
continue
if not self.data_manager.file_exists(var.domain, var.short_name, startdate, member, 1,
frequency=table.frequency):
results.append((var, table))
return results
def create_report(self, report_path, results):
realms = set([result[0].domain for result in results])
realms = sorted(realms)
for realm in realms:
file_handler = open('{0}.{1}'.format(report_path, realm), 'w')
realm_results = [result for result in results if result[0].domain == realm]
tables = set([result[1].name for result in realm_results])
tables = sorted(tables)
for table in tables:
table_results = [result for result in realm_results if result[1].name == table]
results.append((var, table, priority))
Log.debug('Variable {0.short_name} not found in {1.name}', var, table)
else:
Log.result('Variable {0.short_name} found in {1.name}', var, table)
file_handler.write('\nTable {0}\n'.format(table))
file_handler.write('===================================\n')
priorities = set([int(result[0].priority) for result in table_results])
priorities = sorted(priorities)
for priority in priorities:
priority_results = [result for result in table_results if int(result[0].priority) == priority]
priority_results = sorted(priority_results, key=lambda res: res[0].short_name)
file_handler.write('\nMissing variables with priority {0}:\n'.format(priority))
file_handler.write('--------------------------------------\n')
for var, table_name in priority_results:
file_handler.write('{0:12}: {1}\n'.format(var.short_name, var.standard_name))
return results
@staticmethod
def create_report(report_path, results):
tables = set([result[1].name for result in results])
for table in tables:
file_handler = open('{0}.{1}'.format(report_path, table), 'w')
table_results = [result for result in results if result[1].name == table]
file_handler.write('\nTable {0}\n'.format(table))
file_handler.write('===================================\n')
priorities = set([result[2] for result in table_results])
priorities = sorted(priorities)
for priority in priorities:
priority_results = [result[0] for result in table_results if result[2] == priority]
priority_results = sorted(priority_results, key=lambda v: v.short_name)
file_handler.write('\nMissing variables with priority {0}:\n'.format(priority))
file_handler.write('--------------------------------------\n')
for var in priority_results:
file_handler.write('{0:12}: {1}\n'.format(var.short_name, var.standard_name))
file_handler.flush()
file_handler.close()
def _prepare_mesh_files(self):
......@@ -366,8 +373,6 @@ class EarthDiags(object):
Log.info('File {0} ready', destiny)
def main():
if not EarthDiags.parse_args():
exit(1)
......
......@@ -30,7 +30,7 @@ class Module(Diagnostic):
alias = 'module'
"Diagnostic alias for the configuration file"
def __init__(self, data_manager, startdate, member, chunk, domain, componentu, componentv, module_name, grid):
def __init__(self, data_manager, startdate, member, chunk, domain, componentu, componentv, module_var, grid):
Diagnostic.__init__(self, data_manager)
self.startdate = startdate
self.member = member
......@@ -38,7 +38,7 @@ class Module(Diagnostic):
self.domain = domain
self.componentu = componentu
self.componentv = componentv
self.module = module_name
self.module = module_var
self.grid = grid
self.original_values = None
......
......@@ -2,7 +2,7 @@
from earthdiagnostics.diagnostic import Diagnostic, DiagnosticOption, DiagnosticDomainOption, \
DiagnosticVariableListOption, DiagnosticIntOption
from earthdiagnostics.modelingrealm import ModelingRealm
from earthdiagnostics.utils import Utils, TempFile
from earthdiagnostics.utils import Utils
from earthdiagnostics.box import Box
......@@ -48,7 +48,7 @@ class SelectLevels(Diagnostic):
return 'Select levels Startdate: {0} Member: {1} Chunk: {2} ' \
'Variable: {3}:{4} Levels: {6}-{7} ' \
'Grid: {5}'.format(self.startdate, self.member, self.chunk, self.domain, self.variable,
self.grid, self.box.min_depth, self.box.max_depth)
self.grid, self.box.min_depth, self.box.max_depth)
def __eq__(self, other):
return self.startdate == other.startdate and self.member == other.member and self.chunk == other.chunk and \
......@@ -99,7 +99,8 @@ class SelectLevels(Diagnostic):
options=('-O -d lev,{0.min_depth},{0.max_depth}'.format(self.box),))
self.result.set_local_file(temp)
def _create_var(self, var_name, var_values, source, destiny):
@staticmethod
def _create_var(var_name, var_values, source, destiny):
old_var = source.variables[var_name]
new_var = destiny.createVariable(var_name, old_var.dtype, dimensions=(var_name, ))
new_var[:] = var_values
......